diff --git a/.github/workflows/tests-core.yaml b/.github/workflows/tests-core.yaml index e0a66d1..768f9b8 100644 --- a/.github/workflows/tests-core.yaml +++ b/.github/workflows/tests-core.yaml @@ -28,15 +28,23 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install Poetry - run: pipx install poetry - - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install latest Poetry + run: pipx install poetry + + - name: Cache Poetry and pip artifacts + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.cache/pip + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies from source only run: poetry install --no-interaction --with dev - name: Run Core Tests diff --git a/.github/workflows/tests-download.yaml b/.github/workflows/tests-download.yaml index 08ad7be..6c1e600 100644 --- a/.github/workflows/tests-download.yaml +++ b/.github/workflows/tests-download.yaml @@ -22,15 +22,23 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install poetry - run: pipx install poetry - - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install latest Poetry + run: pipx install poetry + + - name: Cache Poetry and pip artifacts + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.cache/pip + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies from source only run: poetry install --no-interaction --with dev - name: Run Download Tests diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index b637878..f110f1b 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -5,6 +5,7 @@ formatting, database operations and clean up. """ from __future__ import annotations +from packaging import version from typing import Generator, Union, List, Type, TYPE_CHECKING import argparse import os @@ -436,16 +437,19 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_ def check_for_updates(self): response = requests.get("https://pypi.org/pypi/auto-archiver/json").json() - latest_version = response["info"]["version"] + latest_version = version.parse(response["info"]["version"]) + current_version = version.parse(__version__) # check version compared to current version - if latest_version != __version__: + if latest_version > current_version: if os.environ.get("RUNNING_IN_DOCKER"): update_cmd = "`docker pull bellingcat/auto-archiver:latest`" else: update_cmd = "`pip install --upgrade auto-archiver`" logger.warning("") logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********") - logger.warning(f"A new version of auto-archiver is available (v{latest_version}, you have {__version__})") + logger.warning( + f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})" + ) logger.warning(f"Make sure to update to the latest version using: {update_cmd}") logger.warning("") diff --git a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py index 109be3f..7ad5734 100644 --- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py +++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py @@ -88,10 +88,7 @@ class GsheetsFeederDB(Feeder, Database): if len(self.allow_worksheets) and sheet_name not in self.allow_worksheets: # ALLOW rules exist AND sheet name not explicitly allowed return False - if len(self.block_worksheets) and sheet_name in self.block_worksheets: - # BLOCK rules exist AND sheet name is blocked - return False - return True + return not (self.block_worksheets and sheet_name in self.block_worksheets) def missing_required_columns(self, gw: GWorksheet) -> list: missing = [] @@ -161,9 +158,8 @@ class GsheetsFeederDB(Feeder, Database): if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"): batch_if_valid("screenshot", "\n".join(screenshot.urls)) - if thumbnail := item.get_first_image("thumbnail"): - if hasattr(thumbnail, "urls"): - batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")') + if (thumbnail := item.get_first_image("thumbnail")) and hasattr(thumbnail, "urls"): + batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")') if browsertrix := item.get_media_by_id("browsertrix"): batch_if_valid("wacz", "\n".join(browsertrix.urls)) diff --git a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py index 73fb4e8..d17b25b 100644 --- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py +++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py @@ -190,7 +190,7 @@ class TelethonExtractor(Extractor): if getattr(original_post, "grouped_id", None) is None: return [original_post] if getattr(original_post, "media", False) else [] - search_ids = [i for i in range(original_post.id - max_amp, original_post.id + max_amp + 1)] + search_ids = list(range(original_post.id - max_amp, original_post.id + max_amp + 1)) posts = self.client.get_messages(chat, ids=search_ids) media = [] for post in posts: