mirror of
https://github.com/bellingcat/whisperbox-transcribe.git
synced 2026-06-12 13:38:34 +03:00
refactor: restructure project layout
This commit is contained in:
0
app/shared/__init__.py
Normal file
0
app/shared/__init__.py
Normal file
16
app/shared/config.py
Normal file
16
app/shared/config.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import os
|
||||
|
||||
from pydantic import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
API_SECRET: str
|
||||
DATABASE_URI: str
|
||||
ENVIRONMENT: str
|
||||
REDIS_URI: str
|
||||
|
||||
|
||||
if "ENVIRONMENT" in os.environ and os.environ["ENVIRONMENT"] == "test":
|
||||
settings = Settings(_env_file=".env.test") # type: ignore
|
||||
else:
|
||||
settings = Settings()
|
||||
0
app/shared/db/__init__.py
Normal file
0
app/shared/db/__init__.py
Normal file
1
app/shared/db/alembic/README
Normal file
1
app/shared/db/alembic/README
Normal file
@@ -0,0 +1 @@
|
||||
Generic single-database configuration.
|
||||
80
app/shared/db/alembic/env.py
Normal file
80
app/shared/db/alembic/env.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
from app.shared.config import settings
|
||||
from app.shared.db.models import Base
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
config.set_main_option("sqlalchemy.url", settings.DATABASE_URI)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
# from myapp import mymodel
|
||||
# target_metadata = mymodel.Base.metadata
|
||||
target_metadata = Base.metadata
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
24
app/shared/db/alembic/script.py.mako
Normal file
24
app/shared/db/alembic/script.py.mako
Normal file
@@ -0,0 +1,24 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = ${repr(up_revision)}
|
||||
down_revision = ${repr(down_revision)}
|
||||
branch_labels = ${repr(branch_labels)}
|
||||
depends_on = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,71 @@
|
||||
"""add_job_and_artifact_tables
|
||||
|
||||
Revision ID: c43a1ddae8b7
|
||||
Revises:
|
||||
Create Date: 2023-01-05 12:00:58.824773
|
||||
|
||||
"""
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "c43a1ddae8b7"
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table(
|
||||
"jobs",
|
||||
sa.Column("url", sa.String(length=2048), nullable=True),
|
||||
sa.Column(
|
||||
"status",
|
||||
sa.Enum("Create", "Error", "Success", name="jobstatus"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("type", sa.Enum("Transcript", name="jobtype"), nullable=False),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("updated_at", sa.DateTime(), nullable=True),
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index(op.f("ix_jobs_id"), "jobs", ["id"], unique=False)
|
||||
op.create_table(
|
||||
"artifacts",
|
||||
sa.Column("job_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("data", sa.JSON(none_as_null=True), nullable=True),
|
||||
sa.Column(
|
||||
"type",
|
||||
sa.Enum("RawTranscript", name="artifacttype"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("updated_at", sa.DateTime(), nullable=True),
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.ForeignKeyConstraint(["job_id"], ["jobs.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index(op.f("ix_artifacts_id"), "artifacts", ["id"], unique=False)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index(op.f("ix_artifacts_id"), table_name="artifacts")
|
||||
op.drop_table("artifacts")
|
||||
op.drop_index(op.f("ix_jobs_id"), table_name="jobs")
|
||||
op.drop_table("jobs")
|
||||
# ### end Alembic commands ###
|
||||
21
app/shared/db/base.py
Normal file
21
app/shared/db/base.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from typing import Generator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from app.shared.config import settings
|
||||
|
||||
engine = create_engine(settings.DATABASE_URI)
|
||||
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
def get_session() -> Generator[Session, None, None]:
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
41
app/shared/db/dtos.py
Normal file
41
app/shared/db/dtos.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import enum
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import AnyHttpUrl, BaseModel, Json
|
||||
|
||||
|
||||
class ArtifactType(enum.Enum):
|
||||
RawTranscript = "RawTranscript"
|
||||
|
||||
|
||||
class JobType(enum.Enum):
|
||||
Transcript = "Transcript"
|
||||
|
||||
|
||||
class JobStatus(enum.Enum):
|
||||
Create = "Create"
|
||||
Error = "Error"
|
||||
Success = "Success"
|
||||
|
||||
|
||||
class WithDbFields(BaseModel):
|
||||
id: UUID
|
||||
created_at: datetime
|
||||
updated_at: Optional[datetime]
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
|
||||
class Job(WithDbFields):
|
||||
status: JobStatus
|
||||
type: JobType
|
||||
url: AnyHttpUrl
|
||||
|
||||
|
||||
class Artifact(WithDbFields):
|
||||
data: Optional[Json]
|
||||
job_id: UUID
|
||||
type: ArtifactType
|
||||
50
app/shared/db/models.py
Normal file
50
app/shared/db/models.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import JSON, Column, DateTime, Enum, ForeignKey, String, func
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import Mapped, declarative_mixin, declared_attr
|
||||
|
||||
from .dtos import ArtifactType, JobStatus, JobType
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
@declarative_mixin
|
||||
class WithStandardFields:
|
||||
"""Mixin that adds standard fields (id, created_at, updated_at)."""
|
||||
|
||||
@declared_attr
|
||||
def created_at(cls) -> Mapped[DateTime]:
|
||||
return Column(DateTime, server_default=func.now(), nullable=False)
|
||||
|
||||
@declared_attr
|
||||
def updated_at(cls) -> Mapped[Optional[DateTime]]:
|
||||
return Column(DateTime, onupdate=func.now())
|
||||
|
||||
@declared_attr
|
||||
def id(cls) -> Mapped[UUID]:
|
||||
return Column(
|
||||
UUID(as_uuid=True), primary_key=True, index=True, default=uuid.uuid4
|
||||
)
|
||||
|
||||
|
||||
class Job(Base, WithStandardFields):
|
||||
__tablename__ = "jobs"
|
||||
|
||||
# TODO: job config
|
||||
url = Column(String(length=2048))
|
||||
status = Column(Enum(JobStatus), nullable=False)
|
||||
type = Column(Enum(JobType), nullable=False)
|
||||
|
||||
|
||||
class Artifact(Base, WithStandardFields):
|
||||
__tablename__ = "artifacts"
|
||||
|
||||
job_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("jobs.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
||||
data = Column(JSON(none_as_null=True))
|
||||
type = Column(Enum(ArtifactType), nullable=False)
|
||||
Reference in New Issue
Block a user