Merge pull request #286 from bellingcat/version_comparison

Small code fixes and GH Actions cache
This commit is contained in:
Erin Clark
2025-03-31 12:40:42 +01:00
committed by GitHub
5 changed files with 35 additions and 19 deletions

View File

@@ -28,15 +28,23 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Install Poetry
run: pipx install poetry
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install dependencies - name: Install latest Poetry
run: pipx install poetry
- name: Cache Poetry and pip artifacts
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.cache/pip
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install dependencies from source only
run: poetry install --no-interaction --with dev run: poetry install --no-interaction --with dev
- name: Run Core Tests - name: Run Core Tests

View File

@@ -22,15 +22,23 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Install poetry
run: pipx install poetry
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install dependencies - name: Install latest Poetry
run: pipx install poetry
- name: Cache Poetry and pip artifacts
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.cache/pip
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install dependencies from source only
run: poetry install --no-interaction --with dev run: poetry install --no-interaction --with dev
- name: Run Download Tests - name: Run Download Tests

View File

@@ -5,6 +5,7 @@ formatting, database operations and clean up.
""" """
from __future__ import annotations from __future__ import annotations
from packaging import version
from typing import Generator, Union, List, Type, TYPE_CHECKING from typing import Generator, Union, List, Type, TYPE_CHECKING
import argparse import argparse
import os import os
@@ -436,16 +437,19 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
def check_for_updates(self): def check_for_updates(self):
response = requests.get("https://pypi.org/pypi/auto-archiver/json").json() response = requests.get("https://pypi.org/pypi/auto-archiver/json").json()
latest_version = response["info"]["version"] latest_version = version.parse(response["info"]["version"])
current_version = version.parse(__version__)
# check version compared to current version # check version compared to current version
if latest_version != __version__: if latest_version > current_version:
if os.environ.get("RUNNING_IN_DOCKER"): if os.environ.get("RUNNING_IN_DOCKER"):
update_cmd = "`docker pull bellingcat/auto-archiver:latest`" update_cmd = "`docker pull bellingcat/auto-archiver:latest`"
else: else:
update_cmd = "`pip install --upgrade auto-archiver`" update_cmd = "`pip install --upgrade auto-archiver`"
logger.warning("") logger.warning("")
logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********") logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********")
logger.warning(f"A new version of auto-archiver is available (v{latest_version}, you have {__version__})") logger.warning(
f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})"
)
logger.warning(f"Make sure to update to the latest version using: {update_cmd}") logger.warning(f"Make sure to update to the latest version using: {update_cmd}")
logger.warning("") logger.warning("")

View File

@@ -88,10 +88,7 @@ class GsheetsFeederDB(Feeder, Database):
if len(self.allow_worksheets) and sheet_name not in self.allow_worksheets: if len(self.allow_worksheets) and sheet_name not in self.allow_worksheets:
# ALLOW rules exist AND sheet name not explicitly allowed # ALLOW rules exist AND sheet name not explicitly allowed
return False return False
if len(self.block_worksheets) and sheet_name in self.block_worksheets: return not (self.block_worksheets and sheet_name in self.block_worksheets)
# BLOCK rules exist AND sheet name is blocked
return False
return True
def missing_required_columns(self, gw: GWorksheet) -> list: def missing_required_columns(self, gw: GWorksheet) -> list:
missing = [] missing = []
@@ -161,9 +158,8 @@ class GsheetsFeederDB(Feeder, Database):
if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"): if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"):
batch_if_valid("screenshot", "\n".join(screenshot.urls)) batch_if_valid("screenshot", "\n".join(screenshot.urls))
if thumbnail := item.get_first_image("thumbnail"): if (thumbnail := item.get_first_image("thumbnail")) and hasattr(thumbnail, "urls"):
if hasattr(thumbnail, "urls"): batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")')
batch_if_valid("thumbnail", f'=IMAGE("{thumbnail.urls[0]}")')
if browsertrix := item.get_media_by_id("browsertrix"): if browsertrix := item.get_media_by_id("browsertrix"):
batch_if_valid("wacz", "\n".join(browsertrix.urls)) batch_if_valid("wacz", "\n".join(browsertrix.urls))

View File

@@ -190,7 +190,7 @@ class TelethonExtractor(Extractor):
if getattr(original_post, "grouped_id", None) is None: if getattr(original_post, "grouped_id", None) is None:
return [original_post] if getattr(original_post, "media", False) else [] return [original_post] if getattr(original_post, "media", False) else []
search_ids = [i for i in range(original_post.id - max_amp, original_post.id + max_amp + 1)] search_ids = list(range(original_post.id - max_amp, original_post.id + max_amp + 1))
posts = self.client.get_messages(chat, ids=search_ids) posts = self.client.get_messages(chat, ids=search_ids)
media = [] media = []
for post in posts: for post in posts: