From c69a5fa1c978c24369432377d8428816c3820fab Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Sun, 12 Jan 2025 12:38:12 +0000 Subject: [PATCH] Refactor Dockerfile for multi-stage builds. Combining environment and runtime stages due to Poetry's dependency on source code. --- Dockerfile | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index d7ca54a..b1c4bd0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,13 @@ -FROM webrecorder/browsertrix-crawler:1.0.4 +FROM webrecorder/browsertrix-crawler:1.0.4 AS base -ENV RUNNING_IN_DOCKER=1 -ENV PATH="/usr/local/bin:/root/.local/bin:$PATH" - -WORKDIR /app +ENV RUNNING_IN_DOCKER=1 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONFAULTHANDLER=1 \ + PATH="/root/.local/bin:$PATH" +# Installing system dependencies RUN add-apt-repository ppa:mozillateam/ppa && \ apt-get update && \ apt-get install -y --no-install-recommends gcc ffmpeg fonts-noto exiftool && \ @@ -17,20 +20,39 @@ RUN add-apt-repository ppa:mozillateam/ppa && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* + +# Poetry and runtime +FROM base AS runtime + +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 + + RUN pip install --upgrade pip && \ pip install "poetry>=2.0.0" +WORKDIR /app + + COPY pyproject.toml poetry.lock README.md ./ +# Copy dependency files and install dependencies (excluding the package itself) +RUN poetry install --only main --no-root --no-cache -# doing this at the end helps during development, builds are quick + +# Copy code: This is needed for poetry to install the package itself, +# but the environment should be cached from the previous step if toml and lock files haven't changed COPY ./src/ . - -# Verify Poetry installation and install dependencies -RUN poetry install +RUN poetry install --only main --no-cache -ENTRYPOINT ["poetry", "run", "python3", "-m", "auto_archiver"] +# Update PATH to include virtual environment binaries +# Allowing entry point to run the application directly with Python +ENV VIRTUAL_ENV=/app/.venv \ + PATH="/app/.venv/bin:$PATH" +ENTRYPOINT ["python3", "-m", "auto_archiver"] # should be executed with 2 volumes (3 if local_storage is used) # docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive aa pipenv run python3 -m auto_archiver --config secrets/orchestration.yaml +