diff --git a/.env.dev b/.env.dev new file mode 100644 index 0000000..a8da679 --- /dev/null +++ b/.env.dev @@ -0,0 +1,6 @@ +API_SECRET="a_very_secret_token" +DOMAIN="whisperbox.localhost" +WHISPER_MODEL="tiny" +ENVIRONMENT="development" +DATABASE_URI="sqlite:///./whisperbox.sqlite" +BROKER_URL="redis://redis:6379/"0 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..becac1f --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +API_SECRET="change_me" +WHISPER_MODEL="small" +DOMAIN="whisperbox.localhost" diff --git a/Makefile b/Makefile index eb571dc..f3bf2ab 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ clean: - docker compose -f docker/dev/docker-compose.yml down --volumes --remove-orphans + docker compose -f docker-compose.base.yml -f docker-compose.dev.yml down --volumes --remove-orphans dev: - docker compose -f docker/dev/docker-compose.yml build - docker compose -f docker/dev/docker-compose.yml up --remove-orphans + docker compose -f docker-compose.base.yml -f docker-compose.dev.yml build + docker compose -f docker-compose.base.yml -f docker-compose.dev.yml up --remove-orphans fmt: black app @@ -17,5 +17,5 @@ test: pytest run: - docker compose -f docker/prod/docker-compose.yml build - docker compose -f docker/prod/docker-compose.yml up --remove-orphans + docker compose -f docker-compose.base.yml -f docker-compose.prod.yml build + docker compose -f docker-compose.base.yml -f docker-compose.prod.yml up --remove-orphans diff --git a/app/worker/strategies/local.py b/app/worker/strategies/local.py index d2eb0f6..42cfbf1 100644 --- a/app/worker/strategies/local.py +++ b/app/worker/strategies/local.py @@ -8,6 +8,7 @@ from uuid import UUID import requests from pydantic import BaseModel from sqlalchemy.orm import Session +import torch from whisper import load_model import app.shared.db.schemas as schemas @@ -26,6 +27,18 @@ class LocalStrategy: self.job_id = job_id self.url = url self.config = config + + if torch.cuda.is_available(): + self.model = load_model( + os.environ["WHISPER_MODEL"], + download_root="/models" + ).cuda() + else: + self.model = load_model( + os.environ["WHISPER_MODEL"], + download_root="/models" + ) + logger.info(f"[{self.job_id}]: initialized local strategy.") def transcribe(self) -> List[Any]: @@ -54,7 +67,6 @@ class LocalStrategy: def run_whisper(self, filepath: str, task: str) -> List[Any]: try: language = self.config.language if self.config else None - model = load_model("small", download_root="/models") result = model.transcribe( filepath, diff --git a/docker-compose.base.yml b/docker-compose.base.yml new file mode 100644 index 0000000..b9a15ff --- /dev/null +++ b/docker-compose.base.yml @@ -0,0 +1,69 @@ +version: "3.8" + +services: + traefik: + image: "traefik:latest" + restart: unless-stopped + ports: + - "80:80" + command: + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--providers.docker.network=whisperbox_transcription_traefik" + - "--entrypoints.web.address=:80" + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + - web + networks: + - traefik + + redis: + image: redis:7-alpine + ports: + - 6379:6379 + networks: + - app + deploy: + resources: + limits: + memory: 128M + + worker: + build: + context: . + dockerfile: worker.Dockerfile + args: + WHISPER_MODEL: tiny + networks: + - app + depends_on: + - redis + healthcheck: + test: ["CMD-SHELL", "celery -b redis://redis:6379/0 inspect ping -d celery@$$HOSTNAME"] + interval: 5s + timeout: 5s + retries: 5 + + web: + build: + context: . + dockerfile: web.Dockerfile + networks: + - app + - traefik + depends_on: + worker: + condition: service_healthy + labels: + - "traefik.enable=true" + - "traefik.http.services.web.loadbalancer.server.port=8000" + - "traefik.http.routers.web.rule=(Host(`${DOMAIN}`))" +networks: + app: + driver: bridge + traefik: + driver: bridge + +volumes: + whisperbox-data: diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 0000000..fc4a739 --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,36 @@ +version: "3.8" + +services: + traefik: + container_name: whisperbox_traefik_dev + redis: + container_name: whisperbox_redis_dev + + web: + container_name: whisperbox_web_dev + env_file: .env + command: bash -c "alembic upgrade head && uvicorn app.web.main:app --reload --host ${HOST:-0.0.0.0} --port ${PORT:-8000} --log-level info" + volumes: + - ./:/etc/whisperbox/ + labels: + - "traefik.http.routers.web.entrypoints=web" + + worker: + container_name: whisperbox_worker_dev + env_file: .env + command: watchmedo auto-restart -d app/worker -p *.py --recursive celery -- --app=app.worker.main.celery worker --loglevel=info --concurrency=1 --pool solo + volumes: + - ./:/etc/whisperbox/ + + flower: + container_name: whisperbox_flower_dev + image: mher/flower + command: celery --broker redis://redis:6379/0 flower --port=5555 + ports: + - 5555:5555 + depends_on: + worker: + condition: service_healthy + networks: + - app + diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000..35c6e41 --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,25 @@ +version: "3.8" + +services: + redis: + container_name: whisperbox_redis + + worker: + container_name: whisperbox_worker + env_file: .env + volumes: + - whisperbox-data:/etc/whisperbox/data + # + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] + + web: + container_name: whisperbox_web + env_file: .env + volumes: + - whisperbox-data:/etc/whisperbox/data diff --git a/docker/dev/docker-compose.yml b/docker/dev/docker-compose.yml deleted file mode 100644 index 1f7efdd..0000000 --- a/docker/dev/docker-compose.yml +++ /dev/null @@ -1,73 +0,0 @@ -version: "3.8" - -x-app-variables: &app-variables - API_SECRET: a_very_secret_token - DATABASE_URI: sqlite:///./whisperbox.sqlite - ENVIRONMENT: development - BROKER_URL: redis://redis:6379/0 - WHISPER_MODELS: small - -services: - redis: - container_name: whisperbox_redis_dev - image: redis:7-alpine - ports: - - 6379:6379 - networks: - - app - deploy: - resources: - limits: - memory: 128M - - web: - container_name: whisperbox_web_dev - build: - context: ../../ - dockerfile: docker/dev/web.Dockerfile - environment: *app-variables - ports: - - "8000:80" - networks: - - app - volumes: - - ../../:/code - depends_on: - worker: - condition: service_healthy - - worker: - build: - context: ../../ - dockerfile: docker/dev/worker.Dockerfile - args: - WHISPER_MODELS: small - container_name: whisperbox_worker_dev - volumes: - - ../../:/code - environment: *app-variables - depends_on: - - redis - networks: - - app - healthcheck: - test: ["CMD-SHELL", "celery -b redis://redis:6379/0 inspect ping -d celery@$$HOSTNAME"] - interval: 5s - timeout: 5s - retries: 5 - - flower: - container_name: whisperbox_flower_dev - image: mher/flower - command: celery --broker redis://redis:6379/0 flower --port=5555 - ports: - - 5555:5555 - depends_on: - worker: - condition: service_healthy - networks: - - app - -networks: - app: - driver: bridge diff --git a/docker/dev/web.Dockerfile b/docker/dev/web.Dockerfile deleted file mode 100644 index b373bec..0000000 --- a/docker/dev/web.Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.10 - -WORKDIR /code - -ENV PYTHONIOENCODING=utf-8 -ENV PATH=/root/.local/bin:$PATH - -COPY pyproject.toml . -RUN pip install --no-cache-dir --user .[web] - -CMD alembic upgrade head && uvicorn app.web.main:app --reload --host ${HOST:-0.0.0.0} --port ${PORT:-80} --log-level info diff --git a/docker/dev/worker.Dockerfile b/docker/dev/worker.Dockerfile deleted file mode 100644 index 7549c8f..0000000 --- a/docker/dev/worker.Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM python:3.10 - -ARG WHISPER_MODELS - -WORKDIR /code - -ENV PYTHONIOENCODING=utf-8 -ENV PATH=/root/.local/bin:$PATH - -COPY --from=mwader/static-ffmpeg:5.1.2 /ffmpeg /usr/local/bin/ -COPY --from=mwader/static-ffmpeg:5.1.2 /ffprobe /usr/local/bin/ - -COPY pyproject.toml . -RUN pip install --no-cache-dir --user .[worker,worker_dev] - -COPY scripts/download_models.py . -RUN python download_models.py ${WHISPER_MODELS} - -CMD watchmedo auto-restart -d app/worker -p *.py --recursive celery -- --app=app.worker.main.celery worker --loglevel=info --concurrency=1 diff --git a/docker/prod/.env.example b/docker/prod/.env.example deleted file mode 100644 index 654c500..0000000 --- a/docker/prod/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -API_SECRET="change_me" -WHISPER_MODELS="small" diff --git a/docker/prod/docker-compose.yml b/docker/prod/docker-compose.yml deleted file mode 100644 index 77c2608..0000000 --- a/docker/prod/docker-compose.yml +++ /dev/null @@ -1,70 +0,0 @@ -version: "3.8" - -x-app-variables: &app-variables - DATABASE_URI: sqlite:////etc/whisperbox/data/whisperbox.sqlite - ENVIRONMENT: production - BROKER_URL: redis://redis:6379/0 - -services: - redis: - container_name: whisperbox_redis - image: redis:7-alpine - ports: - - 6379:6379 - networks: - - app - deploy: - resources: - limits: - memory: 128M - - worker: - container_name: whisperbox_worker - build: - context: ../../ - # - # dockerfile: docker/prod/worker.gpu.Dockerfile - dockerfile: docker/prod/worker.Dockerfile - environment: *app-variables - env_file: .env - volumes: - - whisperbox-data:/etc/whisperbox/data - networks: - - app - depends_on: - - redis - # - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: [gpu] - - # TODO: reverse proxy - web: - container_name: whisperbox_web - build: - context: ../../ - dockerfile: docker/prod/web.Dockerfile - environment: *app-variables - env_file: .env - ports: - - "8000:8000" - networks: - - app - depends_on: - worker: - condition: service_healthy - volumes: - - whisperbox-data:/etc/whisperbox/data - - whisper-models:/models - -volumes: - whisperbox-data: - whisper-models: - -networks: - app: - driver: bridge diff --git a/pyproject.toml b/pyproject.toml index 558dbbf..bb4e498 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ web=[ ] worker=[ + "watchdog[watchmedo] ==2.3.1", "whisper-openai ==1.0.0", "requests ==2.28.2" ] @@ -39,10 +40,6 @@ tooling = [ "types-requests ==2.28.11.15" ] -worker_dev = [ - "watchdog[watchmedo] ==2.3.1" -] - [tool.isort] profile = "black" diff --git a/scripts/download_models.py b/scripts/download_models.py index ee92627..fb509a4 100644 --- a/scripts/download_models.py +++ b/scripts/download_models.py @@ -1,8 +1,9 @@ import os +import sys from whisper import _download, _MODELS # type: ignore if __name__ == "__main__": - for name in os.environ["WHISPER_MODELS"].split(","): - _download(_MODELS[name], "/models/", False) - if name != "large": - _download(_MODELS[f"{name}.en"], "/models/", False) + model_name = sys.argv[1].strip() + _download(_MODELS[model_name], "/models/", False) + # if model_name != "large": + # _download(_MODELS[f"{model_name}.en"], "/models/", False) diff --git a/docker/prod/web.Dockerfile b/web.Dockerfile similarity index 100% rename from docker/prod/web.Dockerfile rename to web.Dockerfile diff --git a/docker/prod/worker.Dockerfile b/worker.Dockerfile similarity index 87% rename from docker/prod/worker.Dockerfile rename to worker.Dockerfile index d76f524..9ba5d3c 100644 --- a/docker/prod/worker.Dockerfile +++ b/worker.Dockerfile @@ -10,7 +10,7 @@ RUN python -m venv /opt/venv && \ FROM python:3.10 as python-deploy -ARG WHISPER_MODELS +ARG WHISPER_MODEL WORKDIR /etc/whisperbox @@ -25,6 +25,6 @@ ENV VIRTUAL_ENV /opt/venv ENV PATH /opt/venv/bin:$PATH COPY scripts/download_models.py . -RUN python download_models.py ${WHISPER_MODELS} +RUN python download_models.py ${WHISPER_MODEL} -CMD celery --app=app.worker.main.celery worker --loglevel=info --concurrency=1 +CMD celery --app=app.worker.main.celery worker --loglevel=info --concurrency=1 --pool=solo