Bump version to v0.5.28 for release

fix: twitter hack videos extension detection
fix: remove unnecessary log
2026-06-12 21:28:29 +03:00 · 2023-07-26 16:13:14 +01:00 · 2023-07-26 16:12:56 +01:00 · 2023-07-11 12:17:15 +01:00
5 changed files with 12 additions and 4 deletions
--- a/example.orchestration.yaml
+++ b/example.orchestration.yaml
@@ -18,7 +18,7 @@ steps:
    # - thumbnail_enricher
    # - wayback_archiver_enricher
    # - wacz_enricher
-    # - pdq_hash_enricher
+    # - pdq_hash_enricher # if you want to calculate hashes for thumbnails, include this after thumbnail_enricher
  formatter: html_formatter # defaults to mute_formatter
  storages:
    - local_storage
--- a/src/auto_archiver/archivers/telegram_archiver.py
+++ b/src/auto_archiver/archivers/telegram_archiver.py
@@ -49,7 +49,6 @@ class TelegramArchiver(Archiver):
        if video is None:
            logger.warning("could not find video")
            image_tags = s.find_all(class_="tgme_widget_message_photo_wrap")
            logger.info(image_tags)
            image_urls = []
            for im in image_tags:
--- a/src/auto_archiver/archivers/twitter_archiver.py
+++ b/src/auto_archiver/archivers/twitter_archiver.py
@@ -6,6 +6,7 @@ from slugify import slugify
 from . import Archiver
 from ..core import Metadata, Media
 from ..utils.misc import remove_get_parameters
 class TwitterArchiver(Archiver):
@@ -133,7 +134,7 @@ class TwitterArchiver(Archiver):
            media = Media(filename="")
            media.set("src", u)
            ext = ""
-            if (mtype := mimetypes.guess_type(u)[0]):
+            if (mtype := mimetypes.guess_type(remove_get_parameters(u))[0]):
                ext = mimetypes.guess_extension(mtype)
            media.filename = self.download_from_url(u, f'{slugify(url)}_{i}{ext}', item)
--- a/src/auto_archiver/utils/misc.py
+++ b/src/auto_archiver/utils/misc.py
@@ -2,6 +2,7 @@
 import os, json, requests
 from datetime import datetime
 from loguru import logger
 from urllib.parse import urlparse, urlunparse
 def mkdir_if_not_exists(folder):
@@ -20,6 +21,13 @@ def expand_url(url):
            logger.error(f'Failed to expand url {url}')
    return url
 def remove_get_parameters(url):
    # http://example.com/file.mp4?t=1 -> http://example.com/file.mp4
    # useful for mimetypes to work
    parsed_url = urlparse(url)
    new_url = urlunparse(parsed_url._replace(query=''))
    return new_url
 def getattr_or(o: object, prop: str, default=None):
    try:
--- a/src/auto_archiver/version.py
+++ b/src/auto_archiver/version.py
@@ -3,7 +3,7 @@ _MAJOR = "0"
 _MINOR = "5"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "27"
+_PATCH = "28"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""
Author	SHA1	Message	Date
msramalho	65e3c99483	Bump version to v0.5.28 for release	2023-07-26 16:13:14 +01:00
msramalho	888ad8f004	fix: twitter hack videos extension detection	2023-07-26 16:12:56 +01:00
msramalho	086a9e6c84	fix: remove unnecessary log	2023-07-11 12:17:15 +01:00