Merge pull request #286 from bellingcat/version_comparison

Small code fixes and GH Actions cache
This commit is contained in:
Erin Clark
2025-03-31 12:40:42 +01:00
committed by GitHub
5 changed files with 35 additions and 19 deletions

View File

@@ -5,6 +5,7 @@ formatting, database operations and clean up.
"""
from __future__ import annotations
from packaging import version
from typing import Generator, Union, List, Type, TYPE_CHECKING
import argparse
import os
@@ -436,16 +437,19 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
def check_for_updates(self):
response = requests.get("https://pypi.org/pypi/auto-archiver/json").json()
latest_version = response["info"]["version"]
latest_version = version.parse(response["info"]["version"])
current_version = version.parse(__version__)
# check version compared to current version
if latest_version != __version__:
if latest_version > current_version:
if os.environ.get("RUNNING_IN_DOCKER"):
update_cmd = "`docker pull bellingcat/auto-archiver:latest`"
else:
update_cmd = "`pip install --upgrade auto-archiver`"
logger.warning("")
logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********")
logger.warning(f"A new version of auto-archiver is available (v{latest_version}, you have {__version__})")
logger.warning(
f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})"
)
logger.warning(f"Make sure to update to the latest version using: {update_cmd}")
logger.warning("")

View File

@@ -88,10 +88,7 @@ class GsheetsFeederDB(Feeder, Database):
if len(self.allow_worksheets) and sheet_name not in self.allow_worksheets:
# ALLOW rules exist AND sheet name not explicitly allowed
return False
if len(self.block_worksheets) and sheet_name in self.block_worksheets:
# BLOCK rules exist AND sheet name is blocked
return False
return True
return not (self.block_worksheets and sheet_name in self.block_worksheets)
def missing_required_columns(self, gw: GWorksheet) -> list:
missing = []
@@ -161,9 +158,8 @@ class GsheetsFeederDB(Feeder, Database):
if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"):
batch_if_valid("screenshot", "\n".join(screenshot.urls))
if thumbnail := item.get_first_image("thumbnail"):
if hasattr(thumbnail, "urls"):
batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")')
if (thumbnail := item.get_first_image("thumbnail")) and hasattr(thumbnail, "urls"):
batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")')
if browsertrix := item.get_media_by_id("browsertrix"):
batch_if_valid("wacz", "\n".join(browsertrix.urls))

View File

@@ -190,7 +190,7 @@ class TelethonExtractor(Extractor):
if getattr(original_post, "grouped_id", None) is None:
return [original_post] if getattr(original_post, "media", False) else []
search_ids = [i for i in range(original_post.id - max_amp, original_post.id + max_amp + 1)]
search_ids = list(range(original_post.id - max_amp, original_post.id + max_amp + 1))
posts = self.client.get_messages(chat, ids=search_ids)
media = []
for post in posts: