From 47dc7881433b6f8fe7efdbf9213e3dc2c663c964 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Tue, 17 Jan 2023 16:29:27 +0000 Subject: [PATCH] thumbnails enricher --- src/enrichers/__init__.py | 3 +- src/enrichers/thumbnail_enricher.py | 46 +++++++++ src/enrichers/wayback_enricher.py | 3 +- src/formatters/html_formatter.py | 44 ++++++++- src/formatters/templates/html_template.html | 104 +++++++++++++++----- src/formatters/templates/media.html | 28 ++++++ src/media.py | 7 +- src/orchestrator.py | 8 ++ 8 files changed, 208 insertions(+), 35 deletions(-) create mode 100644 src/enrichers/thumbnail_enricher.py create mode 100644 src/formatters/templates/media.html diff --git a/src/enrichers/__init__.py b/src/enrichers/__init__.py index 95b3fad..8b9220b 100644 --- a/src/enrichers/__init__.py +++ b/src/enrichers/__init__.py @@ -1,4 +1,5 @@ from .enricher import Enricher from .screenshot_enricher import ScreenshotEnricher from .wayback_enricher import WaybackEnricher -from .hash_enricher import HashEnricher \ No newline at end of file +from .hash_enricher import HashEnricher +from .thumbnail_enricher import ThumbnailEnricher \ No newline at end of file diff --git a/src/enrichers/thumbnail_enricher.py b/src/enrichers/thumbnail_enricher.py new file mode 100644 index 0000000..32e09be --- /dev/null +++ b/src/enrichers/thumbnail_enricher.py @@ -0,0 +1,46 @@ +import uuid +from media import Media +from . import Enricher +from metadata import Metadata +from loguru import logger +import ffmpeg, os + + +class ThumbnailEnricher(Enricher): + """ + Generates thumbnails for all the media + """ + name = "thumbnail_enricher" + + def __init__(self, config: dict) -> None: + # without this STEP.__init__ is not called + super().__init__(config) + + @staticmethod + def configs() -> dict: + return {} + + def enrich(self, to_enrich: Metadata) -> None: + logger.debug(f"generating thumbnails") + folder = os.path.join(to_enrich.get_tmp_dir(), str(uuid.uuid4())) + os.makedirs(folder, exist_ok=True) + for i, m in enumerate(to_enrich.media[::]): + if m.is_video(): + logger.debug(f"generating thumbnails for {m.filename}") + fps, duration = 0.5, m.get("duration") + if duration is not None: + duration = float(duration) + if duration < 60: fps = 10.0 / duration + elif duration < 120: fps = 20.0 / duration + else: fps = 40.0 / duration + + stream = ffmpeg.input(m.filename) + stream = ffmpeg.filter(stream, 'fps', fps=fps).filter('scale', 512, -1) + stream.output(os.path.join(folder, 'out%d.jpg')).run() + + thumbnails = os.listdir(folder) + thumbnails_media = [] + for t, fname in enumerate(thumbnails): + if fname[-3:] == 'jpg': + thumbnails_media.append(Media(filename=os.path.join(folder, fname)).set("id", f"thumbnail_{t}")) + to_enrich.media[i].set("thumbnails", thumbnails_media) diff --git a/src/enrichers/wayback_enricher.py b/src/enrichers/wayback_enricher.py index bf55923..429f218 100644 --- a/src/enrichers/wayback_enricher.py +++ b/src/enrichers/wayback_enricher.py @@ -37,7 +37,7 @@ class WaybackEnricher(Enricher): r = requests.post('https://web.archive.org/save/', headers=ia_headers, data={'url': url}) if r.status_code != 200: - logger.error(em:=f"Internet archive failed with status of {r.status_code}: {r.json()}") + logger.error(em := f"Internet archive failed with status of {r.status_code}: {r.json()}") to_enrich.set("wayback", em) return @@ -66,3 +66,4 @@ class WaybackEnricher(Enricher): to_enrich.set("wayback", wayback_url) else: to_enrich.set("wayback", {"job_id": job_id, "check_status": f'https://web.archive.org/save/status/{job_id}'}) + to_enrich.set("wayback lookup", f"https://web.archive.org/web/*/{url}") diff --git a/src/formatters/html_formatter.py b/src/formatters/html_formatter.py index 7443568..a78ff2b 100644 --- a/src/formatters/html_formatter.py +++ b/src/formatters/html_formatter.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass from abc import abstractmethod +import mimetypes from metadata import Metadata from media import Media from formatters import Formatter @@ -16,14 +17,28 @@ class HtmlFormatter(Formatter): # without this STEP.__init__ is not called super().__init__(config) self.environment = Environment(loader=FileSystemLoader(os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/"))) + self.environment.filters.update({ + 'is_list': is_list_jinja, + 'is_video': is_video_jinja, + 'is_image': is_image_jinja, + 'is_audio': is_audio_jinja, + 'is_media': is_media_jinja, + }) self.template = self.environment.get_template("html_template.html") @staticmethod def configs() -> dict: - return {} + return { + "detect_thumbnails": {"default": True, "help": "if true will group by thumbnails generated by thumbnail enricher by id 'thumbnail_00'"}, + + } def format(self, item: Metadata) -> Media: - print("FORMATTING") + media = item.media + # thumbnails + # TODO: thumbnails_media work per media, gah + # if self.detect_thumbnails: + content = self.template.render( url=item.get_url(), title=item.get_title(), @@ -34,3 +49,28 @@ class HtmlFormatter(Formatter): with open(html_path, mode="w", encoding="utf-8") as outf: outf.write(content) return Media(filename=html_path) + + +# JINJA helper filters + + +def is_list_jinja(v) -> bool: + return isinstance(v, list) + + +def is_video_jinja(s: str) -> bool: + m = mimetypes.guess_type(s)[0] + return "video" in (m or "") + + +def is_image_jinja(s: str) -> bool: + m = mimetypes.guess_type(s)[0] + return "image" in (m or "") + + +def is_audio_jinja(s: str) -> bool: + m = mimetypes.guess_type(s)[0] + return "audio" in (m or "") + +def is_media_jinja(v) -> bool: + return isinstance(v, Media) diff --git a/src/formatters/templates/html_template.html b/src/formatters/templates/html_template.html index e757cae..47cceae 100644 --- a/src/formatters/templates/html_template.html +++ b/src/formatters/templates/html_template.html @@ -1,5 +1,5 @@ {# templates/results.html #} - +{% import 'media.html' as macros %} @@ -55,6 +55,45 @@ font-size: large; display: none; } + + img, + video { + filter: gray; + -webkit-filter: grayscale(1); + filter: grayscale(1); + } + + /* Disable grayscale on hover */ + img:hover, + video:hover { + -webkit-filter: grayscale(0); + filter: none; + } + + .collapsible { + background-color: #777; + color: white; + cursor: pointer; + padding: 5px; + margin: 10px; + width: 100%; + border: none; + text-align: left; + outline: none; + font-size: 15px; + } + + .active, + .collapsible:hover { + background-color: #555; + } + + .collapsible-content { + padding: 0 18px; + display: none; + overflow: hidden; + background-color: #f1f1f1; + } @@ -76,37 +115,31 @@