Compare commits

...

3 Commits

Author SHA1 Message Date
msramalho
65e3c99483 Bump version to v0.5.28 for release 2023-07-26 16:13:14 +01:00
msramalho
888ad8f004 fix: twitter hack videos extension detection 2023-07-26 16:12:56 +01:00
msramalho
086a9e6c84 fix: remove unnecessary log 2023-07-11 12:17:15 +01:00
5 changed files with 12 additions and 4 deletions

View File

@@ -18,7 +18,7 @@ steps:
# - thumbnail_enricher # - thumbnail_enricher
# - wayback_archiver_enricher # - wayback_archiver_enricher
# - wacz_enricher # - wacz_enricher
# - pdq_hash_enricher # - pdq_hash_enricher # if you want to calculate hashes for thumbnails, include this after thumbnail_enricher
formatter: html_formatter # defaults to mute_formatter formatter: html_formatter # defaults to mute_formatter
storages: storages:
- local_storage - local_storage

View File

@@ -49,7 +49,6 @@ class TelegramArchiver(Archiver):
if video is None: if video is None:
logger.warning("could not find video") logger.warning("could not find video")
image_tags = s.find_all(class_="tgme_widget_message_photo_wrap") image_tags = s.find_all(class_="tgme_widget_message_photo_wrap")
logger.info(image_tags)
image_urls = [] image_urls = []
for im in image_tags: for im in image_tags:

View File

@@ -6,6 +6,7 @@ from slugify import slugify
from . import Archiver from . import Archiver
from ..core import Metadata, Media from ..core import Metadata, Media
from ..utils.misc import remove_get_parameters
class TwitterArchiver(Archiver): class TwitterArchiver(Archiver):
@@ -133,7 +134,7 @@ class TwitterArchiver(Archiver):
media = Media(filename="") media = Media(filename="")
media.set("src", u) media.set("src", u)
ext = "" ext = ""
if (mtype := mimetypes.guess_type(u)[0]): if (mtype := mimetypes.guess_type(remove_get_parameters(u))[0]):
ext = mimetypes.guess_extension(mtype) ext = mimetypes.guess_extension(mtype)
media.filename = self.download_from_url(u, f'{slugify(url)}_{i}{ext}', item) media.filename = self.download_from_url(u, f'{slugify(url)}_{i}{ext}', item)

View File

@@ -2,6 +2,7 @@
import os, json, requests import os, json, requests
from datetime import datetime from datetime import datetime
from loguru import logger from loguru import logger
from urllib.parse import urlparse, urlunparse
def mkdir_if_not_exists(folder): def mkdir_if_not_exists(folder):
@@ -20,6 +21,13 @@ def expand_url(url):
logger.error(f'Failed to expand url {url}') logger.error(f'Failed to expand url {url}')
return url return url
def remove_get_parameters(url):
# http://example.com/file.mp4?t=1 -> http://example.com/file.mp4
# useful for mimetypes to work
parsed_url = urlparse(url)
new_url = urlunparse(parsed_url._replace(query=''))
return new_url
def getattr_or(o: object, prop: str, default=None): def getattr_or(o: object, prop: str, default=None):
try: try:

View File

@@ -3,7 +3,7 @@ _MAJOR = "0"
_MINOR = "5" _MINOR = "5"
# On main and in a nightly release the patch should be one ahead of the last # On main and in a nightly release the patch should be one ahead of the last
# released build. # released build.
_PATCH = "27" _PATCH = "28"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See # This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics. # https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = "" _SUFFIX = ""