From 68992025b0cc1fd038946a09ffb0d94a1a4d62c5 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 14:29:44 +0000 Subject: [PATCH 1/8] Update version comparison. --- src/auto_archiver/core/orchestrator.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index b637878..f110f1b 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -5,6 +5,7 @@ formatting, database operations and clean up. """ from __future__ import annotations +from packaging import version from typing import Generator, Union, List, Type, TYPE_CHECKING import argparse import os @@ -436,16 +437,19 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_ def check_for_updates(self): response = requests.get("https://pypi.org/pypi/auto-archiver/json").json() - latest_version = response["info"]["version"] + latest_version = version.parse(response["info"]["version"]) + current_version = version.parse(__version__) # check version compared to current version - if latest_version != __version__: + if latest_version > current_version: if os.environ.get("RUNNING_IN_DOCKER"): update_cmd = "`docker pull bellingcat/auto-archiver:latest`" else: update_cmd = "`pip install --upgrade auto-archiver`" logger.warning("") logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********") - logger.warning(f"A new version of auto-archiver is available (v{latest_version}, you have {__version__})") + logger.warning( + f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})" + ) logger.warning(f"Make sure to update to the latest version using: {update_cmd}") logger.warning("") From 6c7f6af4b49fc5b4eee43afcf5efd450513c81b7 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 31 Mar 2025 11:11:56 +0100 Subject: [PATCH 2/8] Add cache action with key to OS, py version and lockfile hash, and install packages from source. --- .github/workflows/tests-core.yaml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests-core.yaml b/.github/workflows/tests-core.yaml index c839249..5cfbd2d 100644 --- a/.github/workflows/tests-core.yaml +++ b/.github/workflows/tests-core.yaml @@ -28,17 +28,26 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install Poetry - run: pipx install poetry - - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'poetry' - - name: Install dependencies + - name: Install latest Poetry + run: pipx install poetry + + - name: Cache Poetry and pip artifacts + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.cache/pip + key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies from source only run: poetry install --no-interaction --with dev + env: + PIP_NO_BINARY: ":all:" - name: Run Core Tests run: | From 1edfdae03ee3054a7dac0d5645e727d702a0d379 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 31 Mar 2025 11:17:40 +0100 Subject: [PATCH 3/8] Update download tests to match cache process. --- .github/workflows/tests-download.yaml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests-download.yaml b/.github/workflows/tests-download.yaml index a68231f..7f8b27f 100644 --- a/.github/workflows/tests-download.yaml +++ b/.github/workflows/tests-download.yaml @@ -22,17 +22,23 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install poetry - run: pipx install poetry - - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'poetry' - - name: Install dependencies + - name: Cache Poetry and pip artifacts + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.cache/pip + key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies from source only run: poetry install --no-interaction --with dev + env: + PIP_NO_BINARY: ":all:" - name: Run Download Tests run: poetry run pytest -ra -v -x -m "download" From a5906472797c6491fe233fc15321573763445fd7 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 31 Mar 2025 11:23:49 +0100 Subject: [PATCH 4/8] Small code tidy to trigger tests. --- .../modules/telethon_extractor/telethon_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py index 73fb4e8..d17b25b 100644 --- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py +++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py @@ -190,7 +190,7 @@ class TelethonExtractor(Extractor): if getattr(original_post, "grouped_id", None) is None: return [original_post] if getattr(original_post, "media", False) else [] - search_ids = [i for i in range(original_post.id - max_amp, original_post.id + max_amp + 1)] + search_ids = list(range(original_post.id - max_amp, original_post.id + max_amp + 1)) posts = self.client.get_messages(chat, ids=search_ids) media = [] for post in posts: From c2821d7c83842b06ae51d5d2b4a9cecab320ed75 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 31 Mar 2025 11:25:51 +0100 Subject: [PATCH 5/8] Fix poetry install deletion --- .github/workflows/tests-download.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/tests-download.yaml b/.github/workflows/tests-download.yaml index 7f8b27f..a944633 100644 --- a/.github/workflows/tests-download.yaml +++ b/.github/workflows/tests-download.yaml @@ -27,6 +27,9 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Install latest Poetry + run: pipx install poetry + - name: Cache Poetry and pip artifacts uses: actions/cache@v4 with: From 894058063870dd10d73678d7f66835b534b38cfc Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 31 Mar 2025 11:41:26 +0100 Subject: [PATCH 6/8] Add poetry cache clear, and small code change --- .github/workflows/tests-core.yaml | 3 +++ .github/workflows/tests-download.yaml | 3 +++ .../modules/gsheet_feeder_db/gsheet_feeder_db.py | 5 +---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests-core.yaml b/.github/workflows/tests-core.yaml index 5cfbd2d..1a771ac 100644 --- a/.github/workflows/tests-core.yaml +++ b/.github/workflows/tests-core.yaml @@ -44,6 +44,9 @@ jobs: ~/.cache/pip key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + - name: Clear Poetry PyPI cache + run: poetry cache clear pypi --all + - name: Install dependencies from source only run: poetry install --no-interaction --with dev env: diff --git a/.github/workflows/tests-download.yaml b/.github/workflows/tests-download.yaml index a944633..50bc9da 100644 --- a/.github/workflows/tests-download.yaml +++ b/.github/workflows/tests-download.yaml @@ -38,6 +38,9 @@ jobs: ~/.cache/pip key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + - name: Clear Poetry PyPI cache + run: poetry cache clear pypi --all + - name: Install dependencies from source only run: poetry install --no-interaction --with dev env: diff --git a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py index 109be3f..ce552b3 100644 --- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py +++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py @@ -88,10 +88,7 @@ class GsheetsFeederDB(Feeder, Database): if len(self.allow_worksheets) and sheet_name not in self.allow_worksheets: # ALLOW rules exist AND sheet name not explicitly allowed return False - if len(self.block_worksheets) and sheet_name in self.block_worksheets: - # BLOCK rules exist AND sheet name is blocked - return False - return True + return not (self.block_worksheets and sheet_name in self.block_worksheets) def missing_required_columns(self, gw: GWorksheet) -> list: missing = [] From 9c2b506189a912f641d5b357e8a6e3878c00797e Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 31 Mar 2025 12:00:24 +0100 Subject: [PATCH 7/8] update runner os to matrix os. --- .github/workflows/tests-core.yaml | 7 +------ .github/workflows/tests-download.yaml | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/workflows/tests-core.yaml b/.github/workflows/tests-core.yaml index 1a771ac..768f9b8 100644 --- a/.github/workflows/tests-core.yaml +++ b/.github/workflows/tests-core.yaml @@ -42,15 +42,10 @@ jobs: path: | ~/.cache/pypoetry ~/.cache/pip - key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} - - - name: Clear Poetry PyPI cache - run: poetry cache clear pypi --all + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} - name: Install dependencies from source only run: poetry install --no-interaction --with dev - env: - PIP_NO_BINARY: ":all:" - name: Run Core Tests run: | diff --git a/.github/workflows/tests-download.yaml b/.github/workflows/tests-download.yaml index 50bc9da..6c1e600 100644 --- a/.github/workflows/tests-download.yaml +++ b/.github/workflows/tests-download.yaml @@ -36,15 +36,10 @@ jobs: path: | ~/.cache/pypoetry ~/.cache/pip - key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} - - - name: Clear Poetry PyPI cache - run: poetry cache clear pypi --all + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} - name: Install dependencies from source only run: poetry install --no-interaction --with dev - env: - PIP_NO_BINARY: ":all:" - name: Run Download Tests run: poetry run pytest -ra -v -x -m "download" From 7e4ba6291873d49a2ea6455a06e7b951a874499e Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 31 Mar 2025 12:05:39 +0100 Subject: [PATCH 8/8] Small code change --- .../modules/gsheet_feeder_db/gsheet_feeder_db.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py index ce552b3..7ad5734 100644 --- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py +++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py @@ -158,9 +158,8 @@ class GsheetsFeederDB(Feeder, Database): if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"): batch_if_valid("screenshot", "\n".join(screenshot.urls)) - if thumbnail := item.get_first_image("thumbnail"): - if hasattr(thumbnail, "urls"): - batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")') + if (thumbnail := item.get_first_image("thumbnail")) and hasattr(thumbnail, "urls"): + batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")') if browsertrix := item.get_media_by_id("browsertrix"): batch_if_valid("wacz", "\n".join(browsertrix.urls))