From 0dd45d90f1d4b607a87804af0d8fcf45de2ca070 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Fri, 8 Sep 2023 15:09:50 +0100 Subject: [PATCH] fix: docker+wacz troubles --- Dockerfile | 2 +- src/auto_archiver/enrichers/wacz_enricher.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index b63aa90..0b4abf1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM webrecorder/browsertrix-crawler:latest ENV RUNNING_IN_DOCKER=1 -WORKDIR /app/auto-archiver +WORKDIR /app RUN pip install --upgrade pip && \ pip install pipenv && \ diff --git a/src/auto_archiver/enrichers/wacz_enricher.py b/src/auto_archiver/enrichers/wacz_enricher.py index b4eeefb..b269162 100644 --- a/src/auto_archiver/enrichers/wacz_enricher.py +++ b/src/auto_archiver/enrichers/wacz_enricher.py @@ -27,6 +27,7 @@ class WaczArchiverEnricher(Enricher, Archiver): def configs() -> dict: return { "profile": {"default": None, "help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles)."}, + "browsertrix_home": {"default": None, "help": "path to use when calling docker run with a volume, by default it will be the tmp folder generated during execution, but setting this option is needed when running the auto-archiver in a docker container that calls another container via DooD."}, "timeout": {"default": 120, "help": "timeout for WACZ generation in seconds"}, "extract_media": {"default": True, "help": "If enabled all the images/videos/audio present in the WACZ archive will be extracted into separate Media. The .wacz file will be kept untouched."} } @@ -46,7 +47,7 @@ class WaczArchiverEnricher(Enricher, Archiver): url = to_enrich.get_url() collection = str(uuid.uuid4())[0:8] - browsertrix_home = os.path.abspath(ArchivingContext.get_tmp_dir()) + browsertrix_home = self.browsertrix_home or os.path.abspath(ArchivingContext.get_tmp_dir()) if os.getenv('RUNNING_IN_DOCKER'): logger.debug(f"generating WACZ without Docker for {url=}")