refactor: remove shared schemas

2026-06-13 05:58:35 +03:00 · 2023-06-29 12:38:56 +02:00
parent 908bd48170
commit b3f8d5c82a
8 changed files with 204 additions and 201 deletions
--- a/app/shared/db/models.py
+++ b/app/shared/db/models.py
@@ -1,12 +1,14 @@
 import enum
 import uuid

+from pydantic import BaseModel, Field
 from sqlalchemy import JSON, VARCHAR, Column, DateTime, Enum, ForeignKey, String, func
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import Mapped, declarative_base, declarative_mixin, declared_attr

 Base = declarative_base()

+
 # Enums


@@ -32,7 +34,55 @@ class ArtifactType(str, enum.Enum):
    language_detection = "language_detection"


-# SQLAlchemy models
+# JSON field types
+
+
+class JobConfig(BaseModel):
+    """(JSON) Configuration for a job."""
+
+    language: str | None = Field(
+        description=(
+            "Spoken language in the media file. "
+            "While optional, this can improve output."
+        )
+    )
+
+
+class JobMeta(BaseModel):
+    """(JSON) Metadata relating to a job's execution."""
+
+    error: str | None = Field(
+        description="Will contain a descriptive error message if processing failed."
+    )
+
+    task_id: uuid.UUID | None = Field(
+        description="Internal celery id of this job submission."
+    )
+
+
+class RawTranscript(BaseModel):
+    """(JSON) A single transcript passage returned by whisper."""
+
+    id: int
+    seek: int
+    start: float
+    end: float
+    text: str
+    tokens: list[int]
+    temperature: float
+    avg_logprob: float
+    compression_ratio: float
+    no_speech_prob: float
+
+
+class LanguageDetection(BaseModel):
+    """A language detection"""
+
+    language_code: str
+
+
+# Sum type for all possible artifact data values
+ArtifactData = list[RawTranscript] | LanguageDetection | None


@declarative_mixin
--- a/app/shared/db/schemas.py
+++ b/app/shared/db/schemas.py
@@ -1,80 +0,0 @@
-from datetime import datetime
-from uuid import UUID
-
-from pydantic import AnyHttpUrl, BaseModel, Field
-
-from app.shared.db.models import ArtifactType, JobStatus, JobType
-
-# JSON field types
-
-
-class JobConfig(BaseModel):
-    """Configuration for a job."""
-
-    language: str | None = Field(
-        description=(
-            "Spoken language in the media file. "
-            "While optional, this can improve output."
-        )
-    )
-
-
-class JobMeta(BaseModel):
-    """Metadata relating to a job's execution."""
-
-    error: str | None = Field(
-        description="Will contain a descriptive error message if processing failed."
-    )
-
-    task_id: UUID | None = Field(
-        description="Internal celery id of this job submission."
-    )
-
-
-class RawTranscript(BaseModel):
-    """A single transcript passage returned by whisper."""
-
-    id: int
-    seek: int
-    start: float
-    end: float
-    text: str
-    tokens: list[int]
-    temperature: float
-    avg_logprob: float
-    compression_ratio: float
-    no_speech_prob: float
-
-
-class LanguageDetection(BaseModel):
-    """A language detection"""
-
-    code: str
-
-
-# DB objects
-
-
-class WithDbFields(BaseModel):
-    id: UUID
-    created_at: datetime
-    updated_at: datetime | None
-
-    class Config:
-        orm_mode = True
-
-
-class Job(WithDbFields):
-    """A transcription job for one media file."""
-
-    status: JobStatus
-    type: JobType
-    url: AnyHttpUrl
-    meta: JobMeta | None
-    config: JobConfig | None
-
-
-class Artifact(WithDbFields):
-    job_id: UUID
-    data: LanguageDetection | RawTranscript | None
-    type: ArtifactType