From 83e04a0c0c3aa19055da2e990d377f94b246cd5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Sp=C3=B6ttel?= <1682504+fspoettel@users.noreply.github.com> Date: Tue, 27 Jun 2023 16:34:36 +0200 Subject: [PATCH] docs: add deploy instructions --- .env.example | 11 ++++++++-- README.md | 47 ++++++++++++++++++++++++++++++++--------- docker-compose.prod.yml | 28 ++++++++++++------------ web.Dockerfile | 2 +- 4 files changed, 60 insertions(+), 28 deletions(-) diff --git a/.env.example b/.env.example index 227ff94..63e271e 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,14 @@ +# this key is later used to authenticate against the API. API_SECRET="change_me" + +# see https://github.com/openai/whisper#available-models-and-languages WHISPER_MODEL="small" -TRAEFIK_SSLEMAIL="" + TRAEFIK_DOMAIN="whisperbox-transcribe.localhost" + +TRAEFIK_SSLEMAIL="" + +# you probably do not need to change this. +BROKER_URL="redis://redis:6379/0" DATABASE_URI="sqlite:////etc/whisperbox-transcribe/data/whisperbox-transcribe.sqlite" ENVIRONMENT="production" -BROKER_URL="redis://redis:6379/0" diff --git a/README.md b/README.md index 3116290..efb7174 100644 --- a/README.md +++ b/README.md @@ -2,16 +2,43 @@ > HTTP wrapper around [openai/whisper](https://github.com/openai/whisper). -## API documentation +## Overview -OpenAPI documentation can be accessed via `/docs`. +This project wraps OpenAI's `whisper` models with a simple HTTP API. It is assumed that the service is used by exactly one consumer, so a pre-shared API key is used as authentication. + +The API design takes inspiration from the [rev.ai async speech-to-text API](https://docs.rev.ai/api/asynchronous/get-started/). Transcription jobs are submitted via a `HTTP POST`, returning an internal reference, which can later be used to retrieve the transcription results. Results are stored in an internal database until retrieved, and can optionally be deleted afterwards. + +OpenAPI documentation for the service is available at `/docs`. ## Deploy - 1. Clone this repository to the host machine. - 2. Create an `.env` file from `.env.example`. - 3. Run `make run` to start the server. - 4. Wrap in a systemd service to launch at startup. +### 0. Choose model & instance size + +Whisper provides [several sizes](https://github.com/openai/whisper#available-models-and-languages) of their model, where model size is a trade-off between model accuracy, resource usage and transcription speed. Smaller models are generally faster and lighter, but more inaccurate, especially for certain languages and translation tasks. + +Whisper inference can be run on both CPU and GPU, and this project supports both via slightly altered docker compose configuration. (see GPU support section) CPU inference is a lot slower, but easier to host. CPU inference generally scales well with CPU speed. + +Another consideration when choosing an instance is disk size. In order to transcribe audio, it needs to be downloaded to a temporary folder before processing, so the HDD needs to have enough free space to allow for that. For some hosting environments (e.g. Digital Ocean), it can make sense to mount an additional disk in the VM instead of choosing a larger instance. + +As a baseline, the `small` model can run on a `4GB` Digital Ocean droplet, achieving roughly a 1-2x speedup over original audio when transcribing. + +### 1. Prepare host environment + +This project is intended to be run via [docker compose](https://docs.docker.com/compose/). To get started: + 1. [install](https://docs.docker.com/engine/install/) docker engine. + 2. Clone this repository to the machine. + +### 2. Configure service + +2. Create an `.env` file from `.env.example` and configure it: + - `API_SECRET`: the API key used to authenticate against the API. + - `WHISPER_MODEL`: the whisper model size you want to use. + - `TRAEFIK_DOMAIN`: the domain you want to access the service from. Its A records need to point to the host IP. + - `TRAEFIK_SSLEMAIL`: an email which is used to verify domain ownership before a TLS certificate is issued. + +### 3. Run service + +Run `make run` to start the server.4. To launch at system startup, wrap it in a systemd launch service. ## Develop @@ -31,10 +58,10 @@ Builds and starts the docker containers. ``` # Bindings -http://localhost:5555 => Celery dashboard -http://whisperbox-transcribe.localhost => API -http://whisperbox-transcribe.localhost => API docs -./whisperbox-transcribe.sqlite => Database +http://localhost:5555 => Celery dashboard +http://whisperbox-transcribe.localhost => API +http://whisperbox-transcribe.localhost/docs => API docs +./whisperbox-transcribe.sqlite => Database ``` ## Destroy diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index bf80161..3b00828 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -5,24 +5,22 @@ services: container_name: whisperbox-transcribe_traefik ports: - "80:80" - + - "443:443" command: - "--providers.docker=true" - "--providers.docker.exposedbydefault=false" - "--providers.docker.network=whisperbox-transcribe_traefik" - "--entrypoints.web.address=:80" - # - "--certificatesresolvers.le.acme.email=${TRAEFIK_SSLEMAIL}" - # - "--certificatesresolvers.le.acme.storage=./acme.json" - # - "--certificatesresolvers.le.acme.tlschallenge=true" - labels: - # - "traefik.http.routers.traefik.tls=true" - # - "traefik.http.routers.traefik.tls.certresolver=le" - - "traefik.http.routers.traefik.entrypoints=web" - # - "traefik.http.routers.http-catchall.rule=hostregexp(`{host:.+}`)" - # - "traefik.http.routers.http-catchall.entrypoints=web" - # - "traefik.http.routers.http-catchall.middlewares=redirect-to-https" - # - "traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https" - + - "--entrypoints.websecure.address=:443" + - "--entrypoints.web.http.redirections.entryPoint.to=websecure" + - "--entrypoints.web.http.redirections.entryPoint.scheme=https" + - "--entrypoints.web.http.redirections.entrypoint.permanent=true" + - "--certificatesresolvers.le.acme.email=${TRAEFIK_SSLEMAIL}" + - "--certificatesresolvers.le.acme.storage=/letsencrypt/acme.json" + - "--certificatesresolvers.le.acme.tlschallenge=true" + volumes: + - ./data/letsencrypt:/letsencrypt + - /var/run/docker.sock:/var/run/docker.sock:ro redis: container_name: whisperbox-transcribe_redis @@ -54,8 +52,8 @@ services: - "traefik.http.services.web.loadbalancer.server.port=8000" - "traefik.http.routers.web.rule=(Host(`${TRAEFIK_DOMAIN}`))" - "traefik.http.routers.web.entrypoints=web" - # - "traefik.http.routers.web.tls=true" - # - "traefik.http.routers.web.tls.certresolver=le" + - "traefik.http.routers.web.tls=true" + - "traefik.http.routers.web.tls.certresolver=le" volumes: whisperbox-transcribe-data: diff --git a/web.Dockerfile b/web.Dockerfile index c106984..b165dcf 100644 --- a/web.Dockerfile +++ b/web.Dockerfile @@ -20,4 +20,4 @@ COPY alembic.ini . ENV VIRTUAL_ENV /opt/venv ENV PATH /opt/venv/bin:$PATH -CMD alembic upgrade head && uvicorn app.web.main:app --host ${HOST:-0.0.0.0} --port ${PORT:-8000} --log-level info --workers 4 --proxy-head> +CMD alembic upgrade head && uvicorn app.web.main:app --host ${HOST:-0.0.0.0} --port ${PORT:-8000} --log-level info --workers 4 --proxy-headers