mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-11 04:58:33 +03:00
Format and lint shared directory (#64)
This commit is contained in:
@@ -18,9 +18,9 @@ def make_engine(database_url: str):
|
||||
engine = create_engine(
|
||||
database_url,
|
||||
connect_args={"check_same_thread": False},
|
||||
pool_size=15, # Increase pool size
|
||||
max_overflow=20, # Allow more temporary connections
|
||||
pool_recycle=1800 # Recycle connections every 30 minutes
|
||||
pool_size=15, # Increase pool size
|
||||
max_overflow=20, # Allow more temporary connections
|
||||
pool_recycle=1800, # Recycle connections every 30 minutes
|
||||
)
|
||||
|
||||
@event.listens_for(engine, "connect")
|
||||
@@ -40,8 +40,10 @@ def make_session_local(engine: Engine):
|
||||
@contextmanager
|
||||
def get_db():
|
||||
session = make_session_local(make_engine(get_settings().DATABASE_PATH))()
|
||||
try: yield session
|
||||
finally: session.close()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
|
||||
def get_db_dependency():
|
||||
@@ -59,22 +61,32 @@ def wal_checkpoint():
|
||||
|
||||
# ASYNC connections
|
||||
async def make_async_engine(database_url: str) -> AsyncEngine:
|
||||
engine = create_async_engine(database_url, connect_args={"check_same_thread": False})
|
||||
engine = create_async_engine(
|
||||
database_url, connect_args={"check_same_thread": False}
|
||||
)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(lambda sync_conn: sync_conn.execute(text("PRAGMA journal_mode=WAL;")))
|
||||
await conn.run_sync(
|
||||
lambda sync_conn: sync_conn.execute(
|
||||
text("PRAGMA journal_mode=WAL;")
|
||||
)
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
async def make_async_session_local(engine: AsyncEngine) -> AsyncSession:
|
||||
return async_sessionmaker(engine, expire_on_commit=False, autoflush=False, autocommit=False)
|
||||
return async_sessionmaker(
|
||||
engine, expire_on_commit=False, autoflush=False, autocommit=False
|
||||
)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_db_async():
|
||||
engine = await make_async_engine(get_settings().ASYNC_DATABASE_PATH)
|
||||
engine = await make_async_engine(get_settings().async_database_path)
|
||||
async_session = await make_async_session_local(engine)
|
||||
async with async_session() as session:
|
||||
try: yield session
|
||||
finally: await engine.dispose()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
@@ -42,7 +42,9 @@ class Archive(Base):
|
||||
id = Column(String, primary_key=True, index=True)
|
||||
url = Column(String, index=True)
|
||||
result = Column(JSON, default=None)
|
||||
public = Column(Boolean, default=True) # if public=false, access by group and author
|
||||
public = Column(
|
||||
Boolean, default=True
|
||||
) # if public=false, access by group and author
|
||||
deleted = Column(Boolean, default=False)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
||||
@@ -52,7 +54,11 @@ class Archive(Base):
|
||||
author_id = Column(String, ForeignKey("users.email"))
|
||||
sheet_id = Column(String, ForeignKey("sheets.id"), default=None)
|
||||
|
||||
tags = relationship("Tag", back_populates="archives", secondary=association_table_archive_tags)
|
||||
tags = relationship(
|
||||
"Tag",
|
||||
back_populates="archives",
|
||||
secondary=association_table_archive_tags,
|
||||
)
|
||||
group = relationship("Group", back_populates="archives")
|
||||
author = relationship("User", back_populates="archives")
|
||||
urls = relationship("ArchiveUrl", back_populates="archive")
|
||||
@@ -75,7 +81,11 @@ class Tag(Base):
|
||||
id = Column(String, primary_key=True, index=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
archives = relationship("Archive", back_populates="tags", secondary=association_table_archive_tags)
|
||||
archives = relationship(
|
||||
"Archive",
|
||||
back_populates="tags",
|
||||
secondary=association_table_archive_tags,
|
||||
)
|
||||
|
||||
|
||||
class User(Base):
|
||||
@@ -85,7 +95,9 @@ class User(Base):
|
||||
|
||||
archives = relationship("Archive", back_populates="author")
|
||||
sheets = relationship("Sheet", back_populates="author")
|
||||
groups = relationship("Group", back_populates="users", secondary=association_table_user_groups)
|
||||
groups = relationship(
|
||||
"Group", back_populates="users", secondary=association_table_user_groups
|
||||
)
|
||||
|
||||
|
||||
class Group(Base):
|
||||
@@ -101,7 +113,9 @@ class Group(Base):
|
||||
|
||||
archives = relationship("Archive", back_populates="group")
|
||||
sheets = relationship("Sheet", back_populates="group")
|
||||
users = relationship("User", back_populates="groups", secondary=association_table_user_groups)
|
||||
users = relationship(
|
||||
"User", back_populates="groups", secondary=association_table_user_groups
|
||||
)
|
||||
|
||||
|
||||
class Sheet(Base):
|
||||
@@ -110,11 +124,27 @@ class Sheet(Base):
|
||||
id = Column(String, primary_key=True, index=True, doc="Google Sheet ID")
|
||||
name = Column(String, default=None)
|
||||
author_id = Column(String, ForeignKey("users.email"))
|
||||
group_id = Column(String, ForeignKey("groups.id"), doc="Group ID, user must be in a group to create a sheet.")
|
||||
frequency = Column(String, default="daily", doc="Frequency of archiving: hourly, daily, weekly.")
|
||||
group_id = Column(
|
||||
String,
|
||||
ForeignKey("groups.id"),
|
||||
doc="Group ID, user must be in a group to create a sheet.",
|
||||
)
|
||||
frequency = Column(
|
||||
String,
|
||||
default="daily",
|
||||
doc="Frequency of archiving: hourly, daily, weekly.",
|
||||
)
|
||||
# TODO: stats is not being used, consider removing
|
||||
stats = Column(JSON, default={}, doc="Sheet statistics like total links, total rows, ...")
|
||||
last_url_archived_at = Column(DateTime(timezone=True), server_default=func.now(), doc="Last time a new link was archived.")
|
||||
stats = Column(
|
||||
JSON,
|
||||
default={},
|
||||
doc="Sheet statistics like total links, total rows, ...",
|
||||
)
|
||||
last_url_archived_at = Column(
|
||||
DateTime(timezone=True),
|
||||
server_default=func.now(),
|
||||
doc="Last time a new link was archived.",
|
||||
)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
||||
|
||||
|
||||
@@ -9,7 +9,9 @@ from app.shared.db import models
|
||||
# TODO: isolate database operations away from worker and into WEB
|
||||
# ONLY WORKER
|
||||
def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
|
||||
db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
|
||||
db_sheet = (
|
||||
db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
|
||||
)
|
||||
if db_sheet:
|
||||
db_sheet.last_url_archived_at = datetime.now()
|
||||
db.commit()
|
||||
@@ -19,12 +21,17 @@ def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
|
||||
|
||||
# ONLY WORKER and INTEROP
|
||||
|
||||
|
||||
def get_group(db: Session, group_name: str) -> models.Group:
|
||||
return db.query(models.Group).filter(models.Group.id == group_name).first()
|
||||
|
||||
|
||||
def create_or_get_user(db: Session, author_id: str) -> models.User:
|
||||
if type(author_id) == str: author_id = author_id.lower()
|
||||
db_user = db.query(models.User).filter(models.User.email == author_id).first()
|
||||
if isinstance(author_id, str):
|
||||
author_id = author_id.lower()
|
||||
db_user = (
|
||||
db.query(models.User).filter(models.User.email == author_id).first()
|
||||
)
|
||||
if not db_user:
|
||||
db_user = models.User(email=author_id)
|
||||
db.add(db_user)
|
||||
@@ -43,8 +50,22 @@ def create_tag(db: Session, tag: str) -> models.Tag:
|
||||
return db_tag
|
||||
|
||||
|
||||
def create_archive(db: Session, archive: schemas.ArchiveCreate, tags: list[models.Tag], urls: list[models.ArchiveUrl]) -> models.Archive:
|
||||
db_archive = models.Archive(id=archive.id, url=archive.url, result=archive.result, public=archive.public, author_id=archive.author_id, group_id=archive.group_id, sheet_id=archive.sheet_id, store_until=archive.store_until)
|
||||
def create_archive(
|
||||
db: Session,
|
||||
archive: schemas.ArchiveCreate,
|
||||
tags: list[models.Tag],
|
||||
urls: list[models.ArchiveUrl],
|
||||
) -> models.Archive:
|
||||
db_archive = models.Archive(
|
||||
id=archive.id,
|
||||
url=archive.url,
|
||||
result=archive.result,
|
||||
public=archive.public,
|
||||
author_id=archive.author_id,
|
||||
group_id=archive.group_id,
|
||||
sheet_id=archive.sheet_id,
|
||||
store_until=archive.store_until,
|
||||
)
|
||||
db_archive.tags = tags
|
||||
db_archive.urls = urls
|
||||
db.add(db_archive)
|
||||
@@ -53,10 +74,14 @@ def create_archive(db: Session, archive: schemas.ArchiveCreate, tags: list[model
|
||||
return db_archive
|
||||
|
||||
|
||||
def store_archived_url(db: Session, archive: schemas.ArchiveCreate) -> models.Archive:
|
||||
def store_archived_url(
|
||||
db: Session, archive: schemas.ArchiveCreate
|
||||
) -> models.Archive:
|
||||
# create and load user, tags, if needed
|
||||
create_or_get_user(db, archive.author_id)
|
||||
db_tags = [create_tag(db, tag) for tag in (archive.tags or [])]
|
||||
# insert everything
|
||||
db_archive = create_archive(db, archive=archive, tags=db_tags, urls=archive.urls)
|
||||
db_archive = create_archive(
|
||||
db, archive=archive, tags=db_tags, urls=archive.urls
|
||||
)
|
||||
return db_archive
|
||||
|
||||
Reference in New Issue
Block a user