mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
pyproject
This commit is contained in:
78
src/auto_archiver/formatters/html_formatter.py
Normal file
78
src/auto_archiver/formatters/html_formatter.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from abc import abstractmethod
|
||||
import mimetypes
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
import uuid, os, pathlib
|
||||
|
||||
from ..core import Metadata
|
||||
from ..core import Media
|
||||
from . import Formatter
|
||||
|
||||
|
||||
@dataclass
|
||||
class HtmlFormatter(Formatter):
|
||||
name = "html_formatter"
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
self.environment = Environment(loader=FileSystemLoader(os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")))
|
||||
self.environment.filters.update({
|
||||
'is_list': is_list_jinja,
|
||||
'is_video': is_video_jinja,
|
||||
'is_image': is_image_jinja,
|
||||
'is_audio': is_audio_jinja,
|
||||
'is_media': is_media_jinja,
|
||||
})
|
||||
self.template = self.environment.get_template("html_template.html")
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict:
|
||||
return {
|
||||
"detect_thumbnails": {"default": True, "help": "if true will group by thumbnails generated by thumbnail enricher by id 'thumbnail_00'"},
|
||||
|
||||
}
|
||||
|
||||
def format(self, item: Metadata) -> Media:
|
||||
media = item.media
|
||||
# thumbnails
|
||||
# TODO: thumbnails_media work per media, gah
|
||||
# if self.detect_thumbnails:
|
||||
|
||||
content = self.template.render(
|
||||
url=item.get_url(),
|
||||
title=item.get_title(),
|
||||
media=item.media,
|
||||
metadata=item.get_clean_metadata()
|
||||
)
|
||||
html_path = os.path.join(item.get_tmp_dir(), f"formatted{str(uuid.uuid4())}.html")
|
||||
with open(html_path, mode="w", encoding="utf-8") as outf:
|
||||
outf.write(content)
|
||||
return Media(filename=html_path)
|
||||
|
||||
|
||||
# JINJA helper filters
|
||||
|
||||
|
||||
def is_list_jinja(v) -> bool:
|
||||
return isinstance(v, list)
|
||||
|
||||
|
||||
def is_video_jinja(s: str) -> bool:
|
||||
m = mimetypes.guess_type(s)[0]
|
||||
return "video" in (m or "")
|
||||
|
||||
|
||||
def is_image_jinja(s: str) -> bool:
|
||||
m = mimetypes.guess_type(s)[0]
|
||||
return "image" in (m or "")
|
||||
|
||||
|
||||
def is_audio_jinja(s: str) -> bool:
|
||||
m = mimetypes.guess_type(s)[0]
|
||||
return "audio" in (m or "")
|
||||
|
||||
|
||||
def is_media_jinja(v) -> bool:
|
||||
return isinstance(v, Media)
|
||||
Reference in New Issue
Block a user