mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-08 03:28:35 +03:00
auto remove stale sheets and email users
This commit is contained in:
@@ -21,6 +21,7 @@ fastapi-utils = "*"
|
||||
prometheus-fastapi-instrumentator = "*"
|
||||
auto-archiver = "*"
|
||||
pydantic-settings = "*"
|
||||
fastapi-mail = "*"
|
||||
|
||||
[dev-packages]
|
||||
watchdog = "*"
|
||||
|
||||
49
src/Pipfile.lock
generated
49
src/Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "601b24d82095b8b58f1376755d9a1c5c3bbf144aa622e1eef9fc034835f097fe"
|
||||
"sha256": "14a7ead66a74419eebfa72478bbb7e3efe378df9f41e401738faa2871f5c4344"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -122,6 +122,14 @@
|
||||
"markers": "python_version >= '3.9'",
|
||||
"version": "==1.3.2"
|
||||
},
|
||||
"aiosmtplib": {
|
||||
"hashes": [
|
||||
"sha256:08fd840f9dbc23258025dca229e8a8f04d2ccf3ecb1319585615bfc7933f7f47",
|
||||
"sha256:8783059603a34834c7c90ca51103c3aa129d5922003b5ce98dbaa6d4440f10fc"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==3.0.2"
|
||||
},
|
||||
"aiosqlite": {
|
||||
"hashes": [
|
||||
"sha256:36a1deaca0cac40ebe32aac9977a6e2bbc7f5189f23f4a54d5908986729e5bd6",
|
||||
@@ -674,6 +682,22 @@
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.2.0"
|
||||
},
|
||||
"dnspython": {
|
||||
"hashes": [
|
||||
"sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86",
|
||||
"sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1"
|
||||
],
|
||||
"markers": "python_version >= '3.9'",
|
||||
"version": "==2.7.0"
|
||||
},
|
||||
"email-validator": {
|
||||
"hashes": [
|
||||
"sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631",
|
||||
"sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==2.2.0"
|
||||
},
|
||||
"exceptiongroup": {
|
||||
"hashes": [
|
||||
"sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b",
|
||||
@@ -691,6 +715,15 @@
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==0.115.6"
|
||||
},
|
||||
"fastapi-mail": {
|
||||
"hashes": [
|
||||
"sha256:04bde1005c624f42dfc0a9c1e313fcc544499fdd6b3531e606c500d80ac2ffcb",
|
||||
"sha256:3525cf342ff91f6bcb3298570d1783498082e586957f668ee4164a0aab6ec743"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_full_version >= '3.8.1' and python_version < '4.0'",
|
||||
"version": "==1.4.2"
|
||||
},
|
||||
"fastapi-utils": {
|
||||
"hashes": [
|
||||
"sha256:6c4d507a76bab9a016cee0c4fa3a4638c636b2b2689e39c62254b1b2e4e81825",
|
||||
@@ -1867,11 +1900,11 @@
|
||||
},
|
||||
"pydantic": {
|
||||
"hashes": [
|
||||
"sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff",
|
||||
"sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"
|
||||
"sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584",
|
||||
"sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==2.10.5"
|
||||
"version": "==2.10.6"
|
||||
},
|
||||
"pydantic-core": {
|
||||
"hashes": [
|
||||
@@ -2409,11 +2442,11 @@
|
||||
},
|
||||
"starlette": {
|
||||
"hashes": [
|
||||
"sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835",
|
||||
"sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"
|
||||
"sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f",
|
||||
"sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==0.41.3"
|
||||
"markers": "python_version >= '3.9'",
|
||||
"version": "==0.45.3"
|
||||
},
|
||||
"telethon": {
|
||||
"hashes": [
|
||||
|
||||
@@ -12,6 +12,7 @@ from db.database import get_db, get_db_async, make_engine
|
||||
from shared.settings import get_settings
|
||||
from utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
|
||||
from worker.main import create_sheet_task
|
||||
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@@ -35,6 +36,11 @@ async def lifespan(app: FastAPI):
|
||||
else:
|
||||
logger.warning("[CRON] Sheet archive cronjobs are disabled.")
|
||||
|
||||
if get_settings().CRON_DELETE_STALE_SHEETS:
|
||||
asyncio.create_task(delete_stale_sheets())
|
||||
else:
|
||||
logger.warning("[CRON] Delete stale sheets cronjob is disabled.")
|
||||
|
||||
yield # separates startup from shutdown instructions
|
||||
|
||||
# SHUTDOWN
|
||||
@@ -66,3 +72,39 @@ async def archive_sheets_cronjob(frequency: str, interval: int, current_time_uni
|
||||
task = create_sheet_task.apply_async(args=[schemas.SubmitSheet(sheet_id=s.id, author_id=s.author_id, group=s.group_id).model_dump_json()])
|
||||
triggered_jobs.append({"sheet_id": s.id, "task_id": task.id})
|
||||
logger.info(f"[CRON {frequency.upper()}:{current_time_unit}] Triggered {len(triggered_jobs)} sheet tasks: {triggered_jobs}")
|
||||
|
||||
|
||||
@repeat_every(seconds=86400, wait_first=150, on_exception=logger.error)
|
||||
async def delete_stale_sheets():
|
||||
STALE_DAYS = get_settings().DELETE_STALE_SHEETS_DAYS
|
||||
logger.info(f"[CRON] Deleting stale sheets older than {STALE_DAYS} days.")
|
||||
async with get_db_async() as db:
|
||||
user_sheets = await crud.delete_stale_sheets(db, STALE_DAYS)
|
||||
|
||||
if not user_sheets: return
|
||||
|
||||
fastmail = FastMail(get_settings().MAIL_CONFIG)
|
||||
# notify users
|
||||
for email in user_sheets:
|
||||
list_of_sheets = "\n".join([f'<li><a href="https://docs.google.com/spreadsheets/d/{s.id}">{s.name}</a></li>' for s in user_sheets[email]])
|
||||
message = MessageSchema(
|
||||
subject="Auto Archiver: Stale Sheets Removed",
|
||||
recipients=[email],
|
||||
body=f"""
|
||||
<html>
|
||||
<body>
|
||||
<p>Hi {email},</p>
|
||||
<p>Your stale sheets have been removed from our system as no new URL was archived in the past {STALE_DAYS} days:</p>
|
||||
<ul>
|
||||
{list_of_sheets}
|
||||
</ul>
|
||||
<p>You can always re-add them at https://auto-archiver.bellingcat.com/.</p>
|
||||
<p>Best,<br>The Auto Archiver team</p>
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
subtype=MessageType.html
|
||||
)
|
||||
await fastmail.send_message(message)
|
||||
logger.info(f"[CRON] Email sent to {email} about stale sheets deletion.")
|
||||
|
||||
|
||||
@@ -261,6 +261,18 @@ async def get_sheets_by_id_hash(db: AsyncSession, frequency: str, modulo: str, i
|
||||
filtered.append(sheet)
|
||||
return filtered
|
||||
|
||||
async def delete_stale_sheets(db: AsyncSession, inactivity_days: int) -> dict:
|
||||
time_threshold = datetime.now() - timedelta(days=inactivity_days)
|
||||
result = await db.execute(
|
||||
select(models.Sheet).filter(models.Sheet.last_url_archived_at < time_threshold)
|
||||
)
|
||||
deleted = defaultdict(list)
|
||||
for sheet in result.scalars():
|
||||
await db.delete(sheet)
|
||||
deleted[sheet.author_id].append(sheet)
|
||||
await db.commit()
|
||||
return dict(deleted)
|
||||
|
||||
def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
|
||||
db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
|
||||
if db_sheet:
|
||||
|
||||
@@ -36,16 +36,16 @@ def get_db_dependency():
|
||||
with get_db() as db:
|
||||
yield db
|
||||
|
||||
|
||||
|
||||
# ASYNC connections
|
||||
|
||||
|
||||
async def make_async_engine(database_url: str) -> AsyncEngine:
|
||||
engine = create_async_engine(database_url, connect_args={"check_same_thread": False})
|
||||
return engine
|
||||
|
||||
|
||||
async def make_async_session_local(engine: AsyncEngine) -> AsyncSession:
|
||||
return async_sessionmaker(engine, expire_on_commit=False)
|
||||
return async_sessionmaker(engine, expire_on_commit=False, autoflush=False, autocommit=False)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
from functools import lru_cache
|
||||
from fastapi_mail import ConnectionConfig
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import ConfigDict
|
||||
from typing import Annotated, Set
|
||||
@@ -16,6 +17,8 @@ class Settings(BaseSettings):
|
||||
|
||||
# cronjobs
|
||||
CRON_ARCHIVE_SHEETS: bool = False
|
||||
CRON_DELETE_STALE_SHEETS: bool = True
|
||||
DELETE_STALE_SHEETS_DAYS: int = 14
|
||||
|
||||
# database
|
||||
DATABASE_PATH: str
|
||||
@@ -39,6 +42,29 @@ class Settings(BaseSettings):
|
||||
#TODO: deprecate blocklist?
|
||||
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
|
||||
|
||||
# email configuration, if needed
|
||||
MAIL_FROM: str = "noreply@bellingcat.com"
|
||||
MAIL_FROM_NAME: str = "Bellingcat's Auto Archiver"
|
||||
MAIL_USERNAME: str = ""
|
||||
MAIL_PASSWORD: str = ""
|
||||
MAIL_SERVER: str = ""
|
||||
MAIL_PORT: int = 587
|
||||
MAIL_STARTTLS: bool = False
|
||||
MAIL_SSL_TLS: bool = True
|
||||
@property
|
||||
def MAIL_CONFIG(self) -> str:
|
||||
return ConnectionConfig(
|
||||
MAIL_FROM=self.MAIL_FROM,
|
||||
MAIL_FROM_NAME=self.MAIL_FROM_NAME,
|
||||
MAIL_USERNAME=self.MAIL_USERNAME,
|
||||
MAIL_PASSWORD=self.MAIL_PASSWORD,
|
||||
MAIL_SERVER=self.MAIL_SERVER,
|
||||
MAIL_PORT=self.MAIL_PORT,
|
||||
MAIL_STARTTLS=self.MAIL_STARTTLS,
|
||||
MAIL_SSL_TLS=self.MAIL_SSL_TLS,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings():
|
||||
return Settings()
|
||||
Reference in New Issue
Block a user