Merge pull request #286 from bellingcat/version_comparison

Small code fixes and GH Actions cache
2026-06-11 20:58:29 +03:00 · 2025-03-31 12:40:42 +01:00
parent 8685b6bf13 7e4ba62918
commit c7c7eb00a1
5 changed files with 35 additions and 19 deletions
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -5,6 +5,7 @@ formatting, database operations and clean up.
 """

 from __future__ import annotations
+from packaging import version
 from typing import Generator, Union, List, Type, TYPE_CHECKING
 import argparse
 import os
@@ -436,16 +437,19 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_

    def check_for_updates(self):
        response = requests.get("https://pypi.org/pypi/auto-archiver/json").json()
-        latest_version = response["info"]["version"]
+        latest_version = version.parse(response["info"]["version"])
+        current_version = version.parse(__version__)
        # check version compared to current version
-        if latest_version != __version__:
+        if latest_version > current_version:
            if os.environ.get("RUNNING_IN_DOCKER"):
                update_cmd = "`docker pull bellingcat/auto-archiver:latest`"
            else:
                update_cmd = "`pip install --upgrade auto-archiver`"
            logger.warning("")
            logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********")
-            logger.warning(f"A new version of auto-archiver is available (v{latest_version}, you have {__version__})")
+            logger.warning(
+                f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})"
+            )
            logger.warning(f"Make sure to update to the latest version using: {update_cmd}")
            logger.warning("")

--- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
+++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
@@ -88,10 +88,7 @@ class GsheetsFeederDB(Feeder, Database):
        if len(self.allow_worksheets) and sheet_name not in self.allow_worksheets:
            # ALLOW rules exist AND sheet name not explicitly allowed
            return False
-        if len(self.block_worksheets) and sheet_name in self.block_worksheets:
-            # BLOCK rules exist AND sheet name is blocked
-            return False
-        return True
+        return not (self.block_worksheets and sheet_name in self.block_worksheets)

    def missing_required_columns(self, gw: GWorksheet) -> list:
        missing = []
@@ -161,9 +158,8 @@ class GsheetsFeederDB(Feeder, Database):
        if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"):
            batch_if_valid("screenshot", "\n".join(screenshot.urls))

-        if thumbnail := item.get_first_image("thumbnail"):
-            if hasattr(thumbnail, "urls"):
-                batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")')
+        if (thumbnail := item.get_first_image("thumbnail")) and hasattr(thumbnail, "urls"):
+            batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")')

        if browsertrix := item.get_media_by_id("browsertrix"):
            batch_if_valid("wacz", "\n".join(browsertrix.urls))
--- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
+++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
@@ -190,7 +190,7 @@ class TelethonExtractor(Extractor):
        if getattr(original_post, "grouped_id", None) is None:
            return [original_post] if getattr(original_post, "media", False) else []

-        search_ids = [i for i in range(original_post.id - max_amp, original_post.id + max_amp + 1)]
+        search_ids = list(range(original_post.id - max_amp, original_post.id + max_amp + 1))
        posts = self.client.get_messages(chat, ids=search_ids)
        media = []
        for post in posts: