Files
whisperbox-transcribe/app/shared/db/schemas.py
2023-06-29 09:13:11 +02:00

81 lines
1.6 KiB
Python

from datetime import datetime
from uuid import UUID
from pydantic import AnyHttpUrl, BaseModel, Field
from app.shared.db.models import ArtifactType, JobStatus, JobType
# JSON field types
class JobConfig(BaseModel):
"""Configuration for a job."""
language: str | None = Field(
description=(
"Spoken language in the media file. "
"While optional, this can improve output."
)
)
class JobMeta(BaseModel):
"""Metadata relating to a job's execution."""
error: str | None = Field(
description="Will contain a descriptive error message if processing failed."
)
task_id: UUID | None = Field(
description="Internal celery id of this job submission."
)
class RawTranscript(BaseModel):
"""A single transcript passage returned by whisper."""
id: int
seek: int
start: float
end: float
text: str
tokens: list[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
class LanguageDetection(BaseModel):
"""A language detection"""
code: str
# DB objects
class WithDbFields(BaseModel):
id: UUID
created_at: datetime
updated_at: datetime | None
class Config:
orm_mode = True
class Job(WithDbFields):
"""A transcription job for one media file."""
status: JobStatus
type: JobType
url: AnyHttpUrl
meta: JobMeta | None
config: JobConfig | None
class Artifact(WithDbFields):
job_id: UUID
data: LanguageDetection | RawTranscript | None
type: ArtifactType