mirror of
https://github.com/bellingcat/whisperbox-transcribe.git
synced 2026-06-08 03:28:35 +03:00
feat: add traefik, simplify docker setup
This commit is contained in:
6
.env.dev
Normal file
6
.env.dev
Normal file
@@ -0,0 +1,6 @@
|
||||
API_SECRET="a_very_secret_token"
|
||||
DOMAIN="whisperbox.localhost"
|
||||
WHISPER_MODEL="tiny"
|
||||
ENVIRONMENT="development"
|
||||
DATABASE_URI="sqlite:///./whisperbox.sqlite"
|
||||
BROKER_URL="redis://redis:6379/"0
|
||||
3
.env.example
Normal file
3
.env.example
Normal file
@@ -0,0 +1,3 @@
|
||||
API_SECRET="change_me"
|
||||
WHISPER_MODEL="small"
|
||||
DOMAIN="whisperbox.localhost"
|
||||
10
Makefile
10
Makefile
@@ -1,9 +1,9 @@
|
||||
clean:
|
||||
docker compose -f docker/dev/docker-compose.yml down --volumes --remove-orphans
|
||||
docker compose -f docker-compose.base.yml -f docker-compose.dev.yml down --volumes --remove-orphans
|
||||
|
||||
dev:
|
||||
docker compose -f docker/dev/docker-compose.yml build
|
||||
docker compose -f docker/dev/docker-compose.yml up --remove-orphans
|
||||
docker compose -f docker-compose.base.yml -f docker-compose.dev.yml build
|
||||
docker compose -f docker-compose.base.yml -f docker-compose.dev.yml up --remove-orphans
|
||||
|
||||
fmt:
|
||||
black app
|
||||
@@ -17,5 +17,5 @@ test:
|
||||
pytest
|
||||
|
||||
run:
|
||||
docker compose -f docker/prod/docker-compose.yml build
|
||||
docker compose -f docker/prod/docker-compose.yml up --remove-orphans
|
||||
docker compose -f docker-compose.base.yml -f docker-compose.prod.yml build
|
||||
docker compose -f docker-compose.base.yml -f docker-compose.prod.yml up --remove-orphans
|
||||
|
||||
@@ -8,6 +8,7 @@ from uuid import UUID
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
import torch
|
||||
from whisper import load_model
|
||||
|
||||
import app.shared.db.schemas as schemas
|
||||
@@ -26,6 +27,18 @@ class LocalStrategy:
|
||||
self.job_id = job_id
|
||||
self.url = url
|
||||
self.config = config
|
||||
|
||||
if torch.cuda.is_available():
|
||||
self.model = load_model(
|
||||
os.environ["WHISPER_MODEL"],
|
||||
download_root="/models"
|
||||
).cuda()
|
||||
else:
|
||||
self.model = load_model(
|
||||
os.environ["WHISPER_MODEL"],
|
||||
download_root="/models"
|
||||
)
|
||||
|
||||
logger.info(f"[{self.job_id}]: initialized local strategy.")
|
||||
|
||||
def transcribe(self) -> List[Any]:
|
||||
@@ -54,7 +67,6 @@ class LocalStrategy:
|
||||
def run_whisper(self, filepath: str, task: str) -> List[Any]:
|
||||
try:
|
||||
language = self.config.language if self.config else None
|
||||
model = load_model("small", download_root="/models")
|
||||
|
||||
result = model.transcribe(
|
||||
filepath,
|
||||
|
||||
69
docker-compose.base.yml
Normal file
69
docker-compose.base.yml
Normal file
@@ -0,0 +1,69 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
traefik:
|
||||
image: "traefik:latest"
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "80:80"
|
||||
command:
|
||||
- "--providers.docker=true"
|
||||
- "--providers.docker.exposedbydefault=false"
|
||||
- "--providers.docker.network=whisperbox_transcription_traefik"
|
||||
- "--entrypoints.web.address=:80"
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
depends_on:
|
||||
- web
|
||||
networks:
|
||||
- traefik
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
networks:
|
||||
- app
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 128M
|
||||
|
||||
worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: worker.Dockerfile
|
||||
args:
|
||||
WHISPER_MODEL: tiny
|
||||
networks:
|
||||
- app
|
||||
depends_on:
|
||||
- redis
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "celery -b redis://redis:6379/0 inspect ping -d celery@$$HOSTNAME"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
web:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: web.Dockerfile
|
||||
networks:
|
||||
- app
|
||||
- traefik
|
||||
depends_on:
|
||||
worker:
|
||||
condition: service_healthy
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.web.loadbalancer.server.port=8000"
|
||||
- "traefik.http.routers.web.rule=(Host(`${DOMAIN}`))"
|
||||
networks:
|
||||
app:
|
||||
driver: bridge
|
||||
traefik:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
whisperbox-data:
|
||||
36
docker-compose.dev.yml
Normal file
36
docker-compose.dev.yml
Normal file
@@ -0,0 +1,36 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
traefik:
|
||||
container_name: whisperbox_traefik_dev
|
||||
redis:
|
||||
container_name: whisperbox_redis_dev
|
||||
|
||||
web:
|
||||
container_name: whisperbox_web_dev
|
||||
env_file: .env
|
||||
command: bash -c "alembic upgrade head && uvicorn app.web.main:app --reload --host ${HOST:-0.0.0.0} --port ${PORT:-8000} --log-level info"
|
||||
volumes:
|
||||
- ./:/etc/whisperbox/
|
||||
labels:
|
||||
- "traefik.http.routers.web.entrypoints=web"
|
||||
|
||||
worker:
|
||||
container_name: whisperbox_worker_dev
|
||||
env_file: .env
|
||||
command: watchmedo auto-restart -d app/worker -p *.py --recursive celery -- --app=app.worker.main.celery worker --loglevel=info --concurrency=1 --pool solo
|
||||
volumes:
|
||||
- ./:/etc/whisperbox/
|
||||
|
||||
flower:
|
||||
container_name: whisperbox_flower_dev
|
||||
image: mher/flower
|
||||
command: celery --broker redis://redis:6379/0 flower --port=5555
|
||||
ports:
|
||||
- 5555:5555
|
||||
depends_on:
|
||||
worker:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app
|
||||
|
||||
25
docker-compose.prod.yml
Normal file
25
docker-compose.prod.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
redis:
|
||||
container_name: whisperbox_redis
|
||||
|
||||
worker:
|
||||
container_name: whisperbox_worker
|
||||
env_file: .env
|
||||
volumes:
|
||||
- whisperbox-data:/etc/whisperbox/data
|
||||
# <ENABLE GPU SUPPORT>
|
||||
# deploy:
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: 1
|
||||
# capabilities: [gpu]
|
||||
|
||||
web:
|
||||
container_name: whisperbox_web
|
||||
env_file: .env
|
||||
volumes:
|
||||
- whisperbox-data:/etc/whisperbox/data
|
||||
@@ -1,73 +0,0 @@
|
||||
version: "3.8"
|
||||
|
||||
x-app-variables: &app-variables
|
||||
API_SECRET: a_very_secret_token
|
||||
DATABASE_URI: sqlite:///./whisperbox.sqlite
|
||||
ENVIRONMENT: development
|
||||
BROKER_URL: redis://redis:6379/0
|
||||
WHISPER_MODELS: small
|
||||
|
||||
services:
|
||||
redis:
|
||||
container_name: whisperbox_redis_dev
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
networks:
|
||||
- app
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 128M
|
||||
|
||||
web:
|
||||
container_name: whisperbox_web_dev
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/dev/web.Dockerfile
|
||||
environment: *app-variables
|
||||
ports:
|
||||
- "8000:80"
|
||||
networks:
|
||||
- app
|
||||
volumes:
|
||||
- ../../:/code
|
||||
depends_on:
|
||||
worker:
|
||||
condition: service_healthy
|
||||
|
||||
worker:
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/dev/worker.Dockerfile
|
||||
args:
|
||||
WHISPER_MODELS: small
|
||||
container_name: whisperbox_worker_dev
|
||||
volumes:
|
||||
- ../../:/code
|
||||
environment: *app-variables
|
||||
depends_on:
|
||||
- redis
|
||||
networks:
|
||||
- app
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "celery -b redis://redis:6379/0 inspect ping -d celery@$$HOSTNAME"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
flower:
|
||||
container_name: whisperbox_flower_dev
|
||||
image: mher/flower
|
||||
command: celery --broker redis://redis:6379/0 flower --port=5555
|
||||
ports:
|
||||
- 5555:5555
|
||||
depends_on:
|
||||
worker:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app
|
||||
|
||||
networks:
|
||||
app:
|
||||
driver: bridge
|
||||
@@ -1,11 +0,0 @@
|
||||
FROM python:3.10
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
ENV PYTHONIOENCODING=utf-8
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
COPY pyproject.toml .
|
||||
RUN pip install --no-cache-dir --user .[web]
|
||||
|
||||
CMD alembic upgrade head && uvicorn app.web.main:app --reload --host ${HOST:-0.0.0.0} --port ${PORT:-80} --log-level info
|
||||
@@ -1,19 +0,0 @@
|
||||
FROM python:3.10
|
||||
|
||||
ARG WHISPER_MODELS
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
ENV PYTHONIOENCODING=utf-8
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
COPY --from=mwader/static-ffmpeg:5.1.2 /ffmpeg /usr/local/bin/
|
||||
COPY --from=mwader/static-ffmpeg:5.1.2 /ffprobe /usr/local/bin/
|
||||
|
||||
COPY pyproject.toml .
|
||||
RUN pip install --no-cache-dir --user .[worker,worker_dev]
|
||||
|
||||
COPY scripts/download_models.py .
|
||||
RUN python download_models.py ${WHISPER_MODELS}
|
||||
|
||||
CMD watchmedo auto-restart -d app/worker -p *.py --recursive celery -- --app=app.worker.main.celery worker --loglevel=info --concurrency=1
|
||||
@@ -1,2 +0,0 @@
|
||||
API_SECRET="change_me"
|
||||
WHISPER_MODELS="small"
|
||||
@@ -1,70 +0,0 @@
|
||||
version: "3.8"
|
||||
|
||||
x-app-variables: &app-variables
|
||||
DATABASE_URI: sqlite:////etc/whisperbox/data/whisperbox.sqlite
|
||||
ENVIRONMENT: production
|
||||
BROKER_URL: redis://redis:6379/0
|
||||
|
||||
services:
|
||||
redis:
|
||||
container_name: whisperbox_redis
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
networks:
|
||||
- app
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 128M
|
||||
|
||||
worker:
|
||||
container_name: whisperbox_worker
|
||||
build:
|
||||
context: ../../
|
||||
# <ENABLE GPU SUPPORT>
|
||||
# dockerfile: docker/prod/worker.gpu.Dockerfile
|
||||
dockerfile: docker/prod/worker.Dockerfile
|
||||
environment: *app-variables
|
||||
env_file: .env
|
||||
volumes:
|
||||
- whisperbox-data:/etc/whisperbox/data
|
||||
networks:
|
||||
- app
|
||||
depends_on:
|
||||
- redis
|
||||
# <ENABLE GPU SUPPORT>
|
||||
# deploy:
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: 1
|
||||
# capabilities: [gpu]
|
||||
|
||||
# TODO: reverse proxy
|
||||
web:
|
||||
container_name: whisperbox_web
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/prod/web.Dockerfile
|
||||
environment: *app-variables
|
||||
env_file: .env
|
||||
ports:
|
||||
- "8000:8000"
|
||||
networks:
|
||||
- app
|
||||
depends_on:
|
||||
worker:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- whisperbox-data:/etc/whisperbox/data
|
||||
- whisper-models:/models
|
||||
|
||||
volumes:
|
||||
whisperbox-data:
|
||||
whisper-models:
|
||||
|
||||
networks:
|
||||
app:
|
||||
driver: bridge
|
||||
@@ -18,6 +18,7 @@ web=[
|
||||
]
|
||||
|
||||
worker=[
|
||||
"watchdog[watchmedo] ==2.3.1",
|
||||
"whisper-openai ==1.0.0",
|
||||
"requests ==2.28.2"
|
||||
]
|
||||
@@ -39,10 +40,6 @@ tooling = [
|
||||
"types-requests ==2.28.11.15"
|
||||
]
|
||||
|
||||
worker_dev = [
|
||||
"watchdog[watchmedo] ==2.3.1"
|
||||
]
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import os
|
||||
import sys
|
||||
from whisper import _download, _MODELS # type: ignore
|
||||
|
||||
if __name__ == "__main__":
|
||||
for name in os.environ["WHISPER_MODELS"].split(","):
|
||||
_download(_MODELS[name], "/models/", False)
|
||||
if name != "large":
|
||||
_download(_MODELS[f"{name}.en"], "/models/", False)
|
||||
model_name = sys.argv[1].strip()
|
||||
_download(_MODELS[model_name], "/models/", False)
|
||||
# if model_name != "large":
|
||||
# _download(_MODELS[f"{model_name}.en"], "/models/", False)
|
||||
|
||||
@@ -10,7 +10,7 @@ RUN python -m venv /opt/venv && \
|
||||
|
||||
FROM python:3.10 as python-deploy
|
||||
|
||||
ARG WHISPER_MODELS
|
||||
ARG WHISPER_MODEL
|
||||
|
||||
WORKDIR /etc/whisperbox
|
||||
|
||||
@@ -25,6 +25,6 @@ ENV VIRTUAL_ENV /opt/venv
|
||||
ENV PATH /opt/venv/bin:$PATH
|
||||
|
||||
COPY scripts/download_models.py .
|
||||
RUN python download_models.py ${WHISPER_MODELS}
|
||||
RUN python download_models.py ${WHISPER_MODEL}
|
||||
|
||||
CMD celery --app=app.worker.main.celery worker --loglevel=info --concurrency=1
|
||||
CMD celery --app=app.worker.main.celery worker --loglevel=info --concurrency=1 --pool=solo
|
||||
Reference in New Issue
Block a user