feat: upgrade tooling, fix lints

This commit is contained in:
Felix Spöttel
2023-01-25 16:56:59 +01:00
parent b4e57451e8
commit 8669a18110
17 changed files with 159 additions and 68 deletions

View File

@@ -1,4 +1,4 @@
DATABASE_URI="postgresql://postgres:postgres@localhost:5432/whisper_api_test"
DATABASE_URI="postgresql://postgres:postgres@localhost:5432/whisperbox_test"
ENVIRONMENT="development"
API_SECRET="foo"
REDIS_URI="redis://localhost:6379/0"
BROKER_URI="redis://localhost:6379/0"

View File

@@ -1,4 +1,4 @@
[flake8]
max-line-length = 88
max-line-length = 90
extend-ignore = E203
exclude = .git,__pycache__,__init__.py,.mypy_cache,.pytest_cache,app/alembic/versions

2
.gitignore vendored
View File

@@ -161,3 +161,5 @@ cython_debug/
# VS Code
.vscode
.DS_Store

View File

@@ -1,3 +1,5 @@
clean:
docker-compose -f docker/dev.docker-compose.yml down --volumes --remove-orphans
dev:
docker-compose -f docker/dev.docker-compose.yml build --progress tty
docker-compose -f docker/dev.docker-compose.yml up --remove-orphans
@@ -7,8 +9,8 @@ fmt:
isort app
lint:
mypy app
flake8 app
mypy app
test:
pytest

View File

@@ -7,7 +7,9 @@ class Settings(BaseSettings):
API_SECRET: str
DATABASE_URI: str
ENVIRONMENT: str
REDIS_URI: str
# derived settings
BROKER_URI: str
if "pytest" in sys.modules:

View File

@@ -0,0 +1,27 @@
"""add_job_meta_field
Revision ID: 684a5e546314
Revises: bb249ed79907
Create Date: 2023-01-18 13:38:07.692830
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "684a5e546314"
down_revision = "bb249ed79907"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("jobs", sa.Column("meta", sa.JSON(none_as_null=True), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("jobs", "meta")
# ### end Alembic commands ###

View File

@@ -1,16 +1,16 @@
"""add_job_tables
Revision ID: bb249ed79907
Revises:
Revises:
Create Date: 2023-01-17 14:30:30.920466
"""
from alembic import op
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = 'bb249ed79907'
revision = "bb249ed79907"
down_revision = None
branch_labels = None
depends_on = None
@@ -18,34 +18,46 @@ depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('jobs',
sa.Column('url', sa.String(length=2048), nullable=True),
sa.Column('status', sa.Enum('create', 'error', 'success', name='jobstatus'), nullable=False),
sa.Column('type', sa.Enum('transcript', name='jobtype'), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
sa.PrimaryKeyConstraint('id')
op.create_table(
"jobs",
sa.Column("url", sa.String(length=2048), nullable=True),
sa.Column(
"status",
sa.Enum("create", "error", "processing", "success", name="jobstatus"),
nullable=False,
),
sa.Column("type", sa.Enum("transcript", name="jobtype"), nullable=False),
sa.Column(
"created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
),
sa.Column("updated_at", sa.DateTime(), nullable=True),
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f('ix_jobs_id'), 'jobs', ['id'], unique=False)
op.create_table('artifacts',
sa.Column('job_id', postgresql.UUID(as_uuid=True), nullable=False),
sa.Column('data', sa.JSON(none_as_null=True), nullable=True),
sa.Column('type', sa.Enum('raw_transcript', name='artifacttype'), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
sa.ForeignKeyConstraint(['job_id'], ['jobs.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
op.create_index(op.f("ix_jobs_id"), "jobs", ["id"], unique=False)
op.create_table(
"artifacts",
sa.Column("job_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("data", sa.JSON(none_as_null=True), nullable=True),
sa.Column(
"type", sa.Enum("raw_transcript", name="artifacttype"), nullable=False
),
sa.Column(
"created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
),
sa.Column("updated_at", sa.DateTime(), nullable=True),
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.ForeignKeyConstraint(["job_id"], ["jobs.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f('ix_artifacts_id'), 'artifacts', ['id'], unique=False)
op.create_index(op.f("ix_artifacts_id"), "artifacts", ["id"], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_artifacts_id'), table_name='artifacts')
op.drop_table('artifacts')
op.drop_index(op.f('ix_jobs_id'), table_name='jobs')
op.drop_table('jobs')
op.drop_index(op.f("ix_artifacts_id"), table_name="artifacts")
op.drop_table("artifacts")
op.drop_index(op.f("ix_jobs_id"), table_name="jobs")
op.drop_table("jobs")
# ### end Alembic commands ###

View File

@@ -3,7 +3,16 @@ from datetime import datetime
from typing import Any, Optional
from uuid import UUID
from pydantic import AnyHttpUrl, BaseModel, Json
from pydantic import AnyHttpUrl, BaseModel
class WithDbFields(BaseModel):
id: UUID
created_at: datetime
updated_at: Optional[datetime]
class Config:
orm_mode = True
class ArtifactType(str, enum.Enum):
@@ -16,23 +25,21 @@ class JobType(str, enum.Enum):
class JobStatus(str, enum.Enum):
create = "create"
processing = "processing"
error = "error"
success = "success"
class WithDbFields(BaseModel):
id: UUID
created_at: datetime
updated_at: Optional[datetime]
class Config:
orm_mode = True
class JobMeta(BaseModel):
language: Optional[str]
task_id: Optional[UUID]
class Job(WithDbFields):
status: JobStatus
type: JobType
url: AnyHttpUrl
meta: Optional[JobMeta]
class Artifact(WithDbFields):

View File

@@ -3,8 +3,8 @@ from typing import Optional
from sqlalchemy import JSON, Column, DateTime, Enum, ForeignKey, String, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Mapped, declarative_mixin, declared_attr
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.orm import Mapped, declarative_mixin # type: ignore
from .dtos import ArtifactType, JobStatus, JobType
@@ -33,9 +33,9 @@ class WithStandardFields:
class Job(Base, WithStandardFields):
__tablename__ = "jobs"
# TODO: job config
url = Column(String(length=2048))
status = Column(Enum(JobStatus), nullable=False)
meta = Column(JSON(none_as_null=True))
type = Column(Enum(JobType), nullable=False)

View File

@@ -3,10 +3,10 @@ from typing import Dict, Generator
import pytest
from sqlalchemy.orm import Session
from sqlalchemy_utils import create_database, database_exists, drop_database
from app.shared.config import settings
from app.shared.db.base import SessionLocal, engine, get_session
import app.shared.db.models as models
from app.shared.config import settings
from app.shared.db.base import SessionLocal, engine, get_session
from app.web.main import app

View File

@@ -1,12 +1,13 @@
from typing import Dict
from fastapi.testclient import TestClient
import pytest
from fastapi.testclient import TestClient
from sqlalchemy.orm import Session
from app.shared.db.dtos import JobType, JobStatus
from app.web.main import app
import app.shared.db.models as models
import app.shared.db.dtos as dtos
import app.shared.db.models as models
from app.shared.db.dtos import JobStatus, JobType
from app.web.main import app
client = TestClient(app)
@@ -73,7 +74,7 @@ def test_get_job_pass(auth_headers: Dict[str, str], mock_job: models.Job) -> Non
def test_get_job_not_found(auth_headers: Dict[str, str], mock_job: models.Job) -> None:
res = client.get(
f"/api/v1/jobs/c8ecf5ea-77cf-48a2-9ecd-199ef35e0ccb",
"/api/v1/jobs/c8ecf5ea-77cf-48a2-9ecd-199ef35e0ccb",
headers=auth_headers,
)
assert res.status_code == 404

View File

@@ -2,7 +2,6 @@ from typing import Dict
from fastapi.testclient import TestClient
from app.shared.config import settings
from app.web.main import app
client = TestClient(app)
@@ -19,7 +18,7 @@ def test_authorization_header_malformed() -> None:
def test_incorrect_api_key() -> None:
res = client.get("/api/v1", headers={"Authorization": "Bearer incorrect" })
res = client.get("/api/v1", headers={"Authorization": "Bearer incorrect"})
assert res.status_code == 401

View File

@@ -9,6 +9,7 @@ import app.shared.db.dtos as dtos
import app.shared.db.models as models
from app.shared.db.base import get_session
from app.web.security import authenticate_api_key
from app.worker.main import transcribe
app = FastAPI()
@@ -30,9 +31,11 @@ def create_job(
payload: TranscriptPayload, session: Session = Depends(get_session)
) -> models.Job:
job = models.Job(url=payload.url, status=dtos.JobStatus.create, type=payload.type)
session.add(job)
session.flush()
transcribe.delay(job.id)
return job
@@ -51,7 +54,7 @@ def get_transcripts(
@api_router.get("/jobs/{id}", response_model=dtos.Job)
def get_transcript(
id: UUID = Path(), session: Session = Depends(get_session)
) -> Optional[dtos.Job]:
) -> Optional[models.Job]:
job = session.query(models.Job).filter(models.Job.id == id).one_or_none()
if not job:
raise HTTPException(status_code=404)
@@ -61,7 +64,7 @@ def get_transcript(
@api_router.get("/jobs/{id}/artifacts", response_model=List[dtos.Artifact])
def get_artifacts_for_job(
id: UUID = Path(), session: Session = Depends(get_session)
) -> List[dtos.Artifact]:
) -> List[models.Artifact]:
artifacts = (
session.query(models.Artifact).filter(models.Artifact.job_id == id)
).all()

View File

@@ -1,7 +1,37 @@
from celery import Celery
from time import sleep
from uuid import UUID
from celery import Celery
from sqlalchemy.orm import Session
import app.shared.db.dtos as dtos
import app.shared.db.models as models
from app.shared.config import settings
from app.shared.db.base import SessionLocal
celery = Celery(__name__)
celery.conf.broker_url = settings.REDIS_URI
celery.conf.broker_url = settings.BROKER_URI
def update_job_status(db: Session, job_id: UUID, status: dtos.JobStatus) -> None:
db.begin()
job = db.query(models.Job).filter(models.Job.id == job_id).one()
job.status = status
db.commit()
@celery.task()
def transcribe(job_id: UUID) -> int:
try:
db: Session = SessionLocal()
update_job_status(db, job_id, dtos.JobStatus.processing)
sleep(60)
update_job_status(db, job_id, dtos.JobStatus.success)
db.commit()
except Exception:
update_job_status(db, job_id, dtos.JobStatus.error)
finally:
db.close()
return 0

View File

@@ -2,18 +2,18 @@ version: "3.8"
x-app-variables: &app-variables
API_SECRET: a_very_secret_token
DATABASE_URI: postgresql://postgres:postgres@postgres/whisper_api
DATABASE_URI: postgresql://postgres:postgres@postgres/whisperbox
ENVIRONMENT: development
REDIS_URI: redis://redis:6379/0
BROKER_URI: redis://redis:6379/0
services:
postgres:
container_name: whisper_api_postgres
container_name: whisperbox_postgres
image: postgres:15-alpine
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: whisper_api
POSTGRES_DB: whisperbox
ports:
- "5432:5432"
networks:
@@ -27,8 +27,9 @@ services:
retries: 5
redis:
container_name: whisper_api_redis
container_name: whisperbox_redis
image: redis:7-alpine
command: ["redis-server", "--save", "60 1"]
ports:
- 6379:6379
networks:
@@ -37,7 +38,7 @@ services:
- redis-data:/data
app:
container_name: whisper_api_app
container_name: whisperbox_app
build:
context: ../
dockerfile: docker/app.dev.Dockerfile
@@ -58,7 +59,7 @@ services:
build:
context: ../
dockerfile: docker/worker.dev.Dockerfile
container_name: whisper_api_worker
container_name: whisperbox_worker
volumes:
- ../:/code
environment: *app-variables
@@ -69,7 +70,7 @@ services:
- app
flower:
container_name: whisper_api_flower
container_name: whisperbox_flower
image: mher/flower
command: celery --broker redis://redis:6379/0 flower --port=5555
ports:

View File

@@ -1,4 +1,4 @@
[mypy]
plugins = pydantic.mypy, sqlmypy, sqlalchemy.ext.mypy.plugin
plugins = pydantic.mypy, sqlmypy
ignore_missing_imports = True
disallow_untyped_defs = True

View File

@@ -1,5 +1,5 @@
[project]
name = "whisper-api"
name = "whisperbox"
description = ""
version = "0.0.1"
@@ -7,7 +7,7 @@ dependencies=[
"celery[redis] ==5.2.7",
"psycopg2 ==2.9.5",
"sqlalchemy[mypy] == 1.4.45",
"python-dotenv ==0.21.0",
"pydantic ==1.10.4"
]
[project.optional-dependencies]
@@ -21,7 +21,7 @@ worker=[
"whisper-openai ==1.0.0"
]
dev = [
lint = [
# code formatting
"black",
"isort",
@@ -34,9 +34,14 @@ test = [
"httpx",
"sqlalchemy-stubs",
"sqlalchemy-utils",
"python-dotenv",
"pytest"
]
worker_dev = [
"watchdog[watchmedo]"
]
[tool.isort]
profile = "black"