mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-13 05:58:35 +03:00
auto remove stale sheets and email users
This commit is contained in:
@@ -21,6 +21,7 @@ fastapi-utils = "*"
|
|||||||
prometheus-fastapi-instrumentator = "*"
|
prometheus-fastapi-instrumentator = "*"
|
||||||
auto-archiver = "*"
|
auto-archiver = "*"
|
||||||
pydantic-settings = "*"
|
pydantic-settings = "*"
|
||||||
|
fastapi-mail = "*"
|
||||||
|
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
watchdog = "*"
|
watchdog = "*"
|
||||||
|
|||||||
49
src/Pipfile.lock
generated
49
src/Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "601b24d82095b8b58f1376755d9a1c5c3bbf144aa622e1eef9fc034835f097fe"
|
"sha256": "14a7ead66a74419eebfa72478bbb7e3efe378df9f41e401738faa2871f5c4344"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {
|
"requires": {
|
||||||
@@ -122,6 +122,14 @@
|
|||||||
"markers": "python_version >= '3.9'",
|
"markers": "python_version >= '3.9'",
|
||||||
"version": "==1.3.2"
|
"version": "==1.3.2"
|
||||||
},
|
},
|
||||||
|
"aiosmtplib": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:08fd840f9dbc23258025dca229e8a8f04d2ccf3ecb1319585615bfc7933f7f47",
|
||||||
|
"sha256:8783059603a34834c7c90ca51103c3aa129d5922003b5ce98dbaa6d4440f10fc"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '3.8'",
|
||||||
|
"version": "==3.0.2"
|
||||||
|
},
|
||||||
"aiosqlite": {
|
"aiosqlite": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:36a1deaca0cac40ebe32aac9977a6e2bbc7f5189f23f4a54d5908986729e5bd6",
|
"sha256:36a1deaca0cac40ebe32aac9977a6e2bbc7f5189f23f4a54d5908986729e5bd6",
|
||||||
@@ -674,6 +682,22 @@
|
|||||||
"markers": "python_version >= '3.7'",
|
"markers": "python_version >= '3.7'",
|
||||||
"version": "==1.2.0"
|
"version": "==1.2.0"
|
||||||
},
|
},
|
||||||
|
"dnspython": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86",
|
||||||
|
"sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '3.9'",
|
||||||
|
"version": "==2.7.0"
|
||||||
|
},
|
||||||
|
"email-validator": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631",
|
||||||
|
"sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '3.8'",
|
||||||
|
"version": "==2.2.0"
|
||||||
|
},
|
||||||
"exceptiongroup": {
|
"exceptiongroup": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b",
|
"sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b",
|
||||||
@@ -691,6 +715,15 @@
|
|||||||
"markers": "python_version >= '3.8'",
|
"markers": "python_version >= '3.8'",
|
||||||
"version": "==0.115.6"
|
"version": "==0.115.6"
|
||||||
},
|
},
|
||||||
|
"fastapi-mail": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:04bde1005c624f42dfc0a9c1e313fcc544499fdd6b3531e606c500d80ac2ffcb",
|
||||||
|
"sha256:3525cf342ff91f6bcb3298570d1783498082e586957f668ee4164a0aab6ec743"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"markers": "python_full_version >= '3.8.1' and python_version < '4.0'",
|
||||||
|
"version": "==1.4.2"
|
||||||
|
},
|
||||||
"fastapi-utils": {
|
"fastapi-utils": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:6c4d507a76bab9a016cee0c4fa3a4638c636b2b2689e39c62254b1b2e4e81825",
|
"sha256:6c4d507a76bab9a016cee0c4fa3a4638c636b2b2689e39c62254b1b2e4e81825",
|
||||||
@@ -1867,11 +1900,11 @@
|
|||||||
},
|
},
|
||||||
"pydantic": {
|
"pydantic": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff",
|
"sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584",
|
||||||
"sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"
|
"sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '3.8'",
|
"markers": "python_version >= '3.8'",
|
||||||
"version": "==2.10.5"
|
"version": "==2.10.6"
|
||||||
},
|
},
|
||||||
"pydantic-core": {
|
"pydantic-core": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -2409,11 +2442,11 @@
|
|||||||
},
|
},
|
||||||
"starlette": {
|
"starlette": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835",
|
"sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f",
|
||||||
"sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"
|
"sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '3.8'",
|
"markers": "python_version >= '3.9'",
|
||||||
"version": "==0.41.3"
|
"version": "==0.45.3"
|
||||||
},
|
},
|
||||||
"telethon": {
|
"telethon": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from db.database import get_db, get_db_async, make_engine
|
|||||||
from shared.settings import get_settings
|
from shared.settings import get_settings
|
||||||
from utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
|
from utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
|
||||||
from worker.main import create_sheet_task
|
from worker.main import create_sheet_task
|
||||||
|
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
@@ -35,6 +36,11 @@ async def lifespan(app: FastAPI):
|
|||||||
else:
|
else:
|
||||||
logger.warning("[CRON] Sheet archive cronjobs are disabled.")
|
logger.warning("[CRON] Sheet archive cronjobs are disabled.")
|
||||||
|
|
||||||
|
if get_settings().CRON_DELETE_STALE_SHEETS:
|
||||||
|
asyncio.create_task(delete_stale_sheets())
|
||||||
|
else:
|
||||||
|
logger.warning("[CRON] Delete stale sheets cronjob is disabled.")
|
||||||
|
|
||||||
yield # separates startup from shutdown instructions
|
yield # separates startup from shutdown instructions
|
||||||
|
|
||||||
# SHUTDOWN
|
# SHUTDOWN
|
||||||
@@ -66,3 +72,39 @@ async def archive_sheets_cronjob(frequency: str, interval: int, current_time_uni
|
|||||||
task = create_sheet_task.apply_async(args=[schemas.SubmitSheet(sheet_id=s.id, author_id=s.author_id, group=s.group_id).model_dump_json()])
|
task = create_sheet_task.apply_async(args=[schemas.SubmitSheet(sheet_id=s.id, author_id=s.author_id, group=s.group_id).model_dump_json()])
|
||||||
triggered_jobs.append({"sheet_id": s.id, "task_id": task.id})
|
triggered_jobs.append({"sheet_id": s.id, "task_id": task.id})
|
||||||
logger.info(f"[CRON {frequency.upper()}:{current_time_unit}] Triggered {len(triggered_jobs)} sheet tasks: {triggered_jobs}")
|
logger.info(f"[CRON {frequency.upper()}:{current_time_unit}] Triggered {len(triggered_jobs)} sheet tasks: {triggered_jobs}")
|
||||||
|
|
||||||
|
|
||||||
|
@repeat_every(seconds=86400, wait_first=150, on_exception=logger.error)
|
||||||
|
async def delete_stale_sheets():
|
||||||
|
STALE_DAYS = get_settings().DELETE_STALE_SHEETS_DAYS
|
||||||
|
logger.info(f"[CRON] Deleting stale sheets older than {STALE_DAYS} days.")
|
||||||
|
async with get_db_async() as db:
|
||||||
|
user_sheets = await crud.delete_stale_sheets(db, STALE_DAYS)
|
||||||
|
|
||||||
|
if not user_sheets: return
|
||||||
|
|
||||||
|
fastmail = FastMail(get_settings().MAIL_CONFIG)
|
||||||
|
# notify users
|
||||||
|
for email in user_sheets:
|
||||||
|
list_of_sheets = "\n".join([f'<li><a href="https://docs.google.com/spreadsheets/d/{s.id}">{s.name}</a></li>' for s in user_sheets[email]])
|
||||||
|
message = MessageSchema(
|
||||||
|
subject="Auto Archiver: Stale Sheets Removed",
|
||||||
|
recipients=[email],
|
||||||
|
body=f"""
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>Hi {email},</p>
|
||||||
|
<p>Your stale sheets have been removed from our system as no new URL was archived in the past {STALE_DAYS} days:</p>
|
||||||
|
<ul>
|
||||||
|
{list_of_sheets}
|
||||||
|
</ul>
|
||||||
|
<p>You can always re-add them at https://auto-archiver.bellingcat.com/.</p>
|
||||||
|
<p>Best,<br>The Auto Archiver team</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
""",
|
||||||
|
subtype=MessageType.html
|
||||||
|
)
|
||||||
|
await fastmail.send_message(message)
|
||||||
|
logger.info(f"[CRON] Email sent to {email} about stale sheets deletion.")
|
||||||
|
|
||||||
|
|||||||
@@ -261,6 +261,18 @@ async def get_sheets_by_id_hash(db: AsyncSession, frequency: str, modulo: str, i
|
|||||||
filtered.append(sheet)
|
filtered.append(sheet)
|
||||||
return filtered
|
return filtered
|
||||||
|
|
||||||
|
async def delete_stale_sheets(db: AsyncSession, inactivity_days: int) -> dict:
|
||||||
|
time_threshold = datetime.now() - timedelta(days=inactivity_days)
|
||||||
|
result = await db.execute(
|
||||||
|
select(models.Sheet).filter(models.Sheet.last_url_archived_at < time_threshold)
|
||||||
|
)
|
||||||
|
deleted = defaultdict(list)
|
||||||
|
for sheet in result.scalars():
|
||||||
|
await db.delete(sheet)
|
||||||
|
deleted[sheet.author_id].append(sheet)
|
||||||
|
await db.commit()
|
||||||
|
return dict(deleted)
|
||||||
|
|
||||||
def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
|
def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
|
||||||
db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
|
db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
|
||||||
if db_sheet:
|
if db_sheet:
|
||||||
|
|||||||
@@ -36,16 +36,16 @@ def get_db_dependency():
|
|||||||
with get_db() as db:
|
with get_db() as db:
|
||||||
yield db
|
yield db
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ASYNC connections
|
# ASYNC connections
|
||||||
|
|
||||||
|
|
||||||
async def make_async_engine(database_url: str) -> AsyncEngine:
|
async def make_async_engine(database_url: str) -> AsyncEngine:
|
||||||
engine = create_async_engine(database_url, connect_args={"check_same_thread": False})
|
engine = create_async_engine(database_url, connect_args={"check_same_thread": False})
|
||||||
return engine
|
return engine
|
||||||
|
|
||||||
|
|
||||||
async def make_async_session_local(engine: AsyncEngine) -> AsyncSession:
|
async def make_async_session_local(engine: AsyncEngine) -> AsyncSession:
|
||||||
return async_sessionmaker(engine, expire_on_commit=False)
|
return async_sessionmaker(engine, expire_on_commit=False, autoflush=False, autocommit=False)
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from fastapi_mail import ConnectionConfig
|
||||||
from pydantic_settings import BaseSettings
|
from pydantic_settings import BaseSettings
|
||||||
from pydantic import ConfigDict
|
from pydantic import ConfigDict
|
||||||
from typing import Annotated, Set
|
from typing import Annotated, Set
|
||||||
@@ -16,6 +17,8 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
# cronjobs
|
# cronjobs
|
||||||
CRON_ARCHIVE_SHEETS: bool = False
|
CRON_ARCHIVE_SHEETS: bool = False
|
||||||
|
CRON_DELETE_STALE_SHEETS: bool = True
|
||||||
|
DELETE_STALE_SHEETS_DAYS: int = 14
|
||||||
|
|
||||||
# database
|
# database
|
||||||
DATABASE_PATH: str
|
DATABASE_PATH: str
|
||||||
@@ -39,6 +42,29 @@ class Settings(BaseSettings):
|
|||||||
#TODO: deprecate blocklist?
|
#TODO: deprecate blocklist?
|
||||||
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
|
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
|
||||||
|
|
||||||
|
# email configuration, if needed
|
||||||
|
MAIL_FROM: str = "noreply@bellingcat.com"
|
||||||
|
MAIL_FROM_NAME: str = "Bellingcat's Auto Archiver"
|
||||||
|
MAIL_USERNAME: str = ""
|
||||||
|
MAIL_PASSWORD: str = ""
|
||||||
|
MAIL_SERVER: str = ""
|
||||||
|
MAIL_PORT: int = 587
|
||||||
|
MAIL_STARTTLS: bool = False
|
||||||
|
MAIL_SSL_TLS: bool = True
|
||||||
|
@property
|
||||||
|
def MAIL_CONFIG(self) -> str:
|
||||||
|
return ConnectionConfig(
|
||||||
|
MAIL_FROM=self.MAIL_FROM,
|
||||||
|
MAIL_FROM_NAME=self.MAIL_FROM_NAME,
|
||||||
|
MAIL_USERNAME=self.MAIL_USERNAME,
|
||||||
|
MAIL_PASSWORD=self.MAIL_PASSWORD,
|
||||||
|
MAIL_SERVER=self.MAIL_SERVER,
|
||||||
|
MAIL_PORT=self.MAIL_PORT,
|
||||||
|
MAIL_STARTTLS=self.MAIL_STARTTLS,
|
||||||
|
MAIL_SSL_TLS=self.MAIL_SSL_TLS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_settings():
|
def get_settings():
|
||||||
return Settings()
|
return Settings()
|
||||||
Reference in New Issue
Block a user