mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
thumbnails enricher
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from .enricher import Enricher
|
||||
from .screenshot_enricher import ScreenshotEnricher
|
||||
from .wayback_enricher import WaybackEnricher
|
||||
from .hash_enricher import HashEnricher
|
||||
from .hash_enricher import HashEnricher
|
||||
from .thumbnail_enricher import ThumbnailEnricher
|
||||
46
src/enrichers/thumbnail_enricher.py
Normal file
46
src/enrichers/thumbnail_enricher.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import uuid
|
||||
from media import Media
|
||||
from . import Enricher
|
||||
from metadata import Metadata
|
||||
from loguru import logger
|
||||
import ffmpeg, os
|
||||
|
||||
|
||||
class ThumbnailEnricher(Enricher):
|
||||
"""
|
||||
Generates thumbnails for all the media
|
||||
"""
|
||||
name = "thumbnail_enricher"
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict:
|
||||
return {}
|
||||
|
||||
def enrich(self, to_enrich: Metadata) -> None:
|
||||
logger.debug(f"generating thumbnails")
|
||||
folder = os.path.join(to_enrich.get_tmp_dir(), str(uuid.uuid4()))
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
for i, m in enumerate(to_enrich.media[::]):
|
||||
if m.is_video():
|
||||
logger.debug(f"generating thumbnails for {m.filename}")
|
||||
fps, duration = 0.5, m.get("duration")
|
||||
if duration is not None:
|
||||
duration = float(duration)
|
||||
if duration < 60: fps = 10.0 / duration
|
||||
elif duration < 120: fps = 20.0 / duration
|
||||
else: fps = 40.0 / duration
|
||||
|
||||
stream = ffmpeg.input(m.filename)
|
||||
stream = ffmpeg.filter(stream, 'fps', fps=fps).filter('scale', 512, -1)
|
||||
stream.output(os.path.join(folder, 'out%d.jpg')).run()
|
||||
|
||||
thumbnails = os.listdir(folder)
|
||||
thumbnails_media = []
|
||||
for t, fname in enumerate(thumbnails):
|
||||
if fname[-3:] == 'jpg':
|
||||
thumbnails_media.append(Media(filename=os.path.join(folder, fname)).set("id", f"thumbnail_{t}"))
|
||||
to_enrich.media[i].set("thumbnails", thumbnails_media)
|
||||
@@ -37,7 +37,7 @@ class WaybackEnricher(Enricher):
|
||||
r = requests.post('https://web.archive.org/save/', headers=ia_headers, data={'url': url})
|
||||
|
||||
if r.status_code != 200:
|
||||
logger.error(em:=f"Internet archive failed with status of {r.status_code}: {r.json()}")
|
||||
logger.error(em := f"Internet archive failed with status of {r.status_code}: {r.json()}")
|
||||
to_enrich.set("wayback", em)
|
||||
return
|
||||
|
||||
@@ -66,3 +66,4 @@ class WaybackEnricher(Enricher):
|
||||
to_enrich.set("wayback", wayback_url)
|
||||
else:
|
||||
to_enrich.set("wayback", {"job_id": job_id, "check_status": f'https://web.archive.org/save/status/{job_id}'})
|
||||
to_enrich.set("wayback lookup", f"https://web.archive.org/web/*/{url}")
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from abc import abstractmethod
|
||||
import mimetypes
|
||||
from metadata import Metadata
|
||||
from media import Media
|
||||
from formatters import Formatter
|
||||
@@ -16,14 +17,28 @@ class HtmlFormatter(Formatter):
|
||||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
self.environment = Environment(loader=FileSystemLoader(os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")))
|
||||
self.environment.filters.update({
|
||||
'is_list': is_list_jinja,
|
||||
'is_video': is_video_jinja,
|
||||
'is_image': is_image_jinja,
|
||||
'is_audio': is_audio_jinja,
|
||||
'is_media': is_media_jinja,
|
||||
})
|
||||
self.template = self.environment.get_template("html_template.html")
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict:
|
||||
return {}
|
||||
return {
|
||||
"detect_thumbnails": {"default": True, "help": "if true will group by thumbnails generated by thumbnail enricher by id 'thumbnail_00'"},
|
||||
|
||||
}
|
||||
|
||||
def format(self, item: Metadata) -> Media:
|
||||
print("FORMATTING")
|
||||
media = item.media
|
||||
# thumbnails
|
||||
# TODO: thumbnails_media work per media, gah
|
||||
# if self.detect_thumbnails:
|
||||
|
||||
content = self.template.render(
|
||||
url=item.get_url(),
|
||||
title=item.get_title(),
|
||||
@@ -34,3 +49,28 @@ class HtmlFormatter(Formatter):
|
||||
with open(html_path, mode="w", encoding="utf-8") as outf:
|
||||
outf.write(content)
|
||||
return Media(filename=html_path)
|
||||
|
||||
|
||||
# JINJA helper filters
|
||||
|
||||
|
||||
def is_list_jinja(v) -> bool:
|
||||
return isinstance(v, list)
|
||||
|
||||
|
||||
def is_video_jinja(s: str) -> bool:
|
||||
m = mimetypes.guess_type(s)[0]
|
||||
return "video" in (m or "")
|
||||
|
||||
|
||||
def is_image_jinja(s: str) -> bool:
|
||||
m = mimetypes.guess_type(s)[0]
|
||||
return "image" in (m or "")
|
||||
|
||||
|
||||
def is_audio_jinja(s: str) -> bool:
|
||||
m = mimetypes.guess_type(s)[0]
|
||||
return "audio" in (m or "")
|
||||
|
||||
def is_media_jinja(v) -> bool:
|
||||
return isinstance(v, Media)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{# templates/results.html #}
|
||||
|
||||
{% import 'media.html' as macros %}
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
@@ -55,6 +55,45 @@
|
||||
font-size: large;
|
||||
display: none;
|
||||
}
|
||||
|
||||
img,
|
||||
video {
|
||||
filter: gray;
|
||||
-webkit-filter: grayscale(1);
|
||||
filter: grayscale(1);
|
||||
}
|
||||
|
||||
/* Disable grayscale on hover */
|
||||
img:hover,
|
||||
video:hover {
|
||||
-webkit-filter: grayscale(0);
|
||||
filter: none;
|
||||
}
|
||||
|
||||
.collapsible {
|
||||
background-color: #777;
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
padding: 5px;
|
||||
margin: 10px;
|
||||
width: 100%;
|
||||
border: none;
|
||||
text-align: left;
|
||||
outline: none;
|
||||
font-size: 15px;
|
||||
}
|
||||
|
||||
.active,
|
||||
.collapsible:hover {
|
||||
background-color: #555;
|
||||
}
|
||||
|
||||
.collapsible-content {
|
||||
padding: 0 18px;
|
||||
display: none;
|
||||
overflow: hidden;
|
||||
background-color: #f1f1f1;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
@@ -76,37 +115,31 @@
|
||||
<li><b>type:</b> <span class="copy">{{ m.mimetype }}</span></li>
|
||||
|
||||
{% for prop in m.properties %}
|
||||
{% if m.properties[prop] | length > 1 %}
|
||||
|
||||
{% if m.properties[prop] | is_list %}
|
||||
<p></p>
|
||||
<div>
|
||||
<b class="collapsible" title="expand">{{ prop }}:</b>
|
||||
<div class="collapsible-content">
|
||||
{% for subprop in m.properties[prop] %}
|
||||
{% if subprop | is_media %}
|
||||
{{ macros.display_media(subprop) }}
|
||||
{% else %}
|
||||
{{ subprop }}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
<p></p>
|
||||
{% elif m.properties[prop] | length > 1 %}
|
||||
<li><b>{{ prop }}:</b> <span class="copy">{{ m.properties[prop] }}</span></li>
|
||||
{% endif %}
|
||||
|
||||
{% endfor %}
|
||||
</ul>
|
||||
|
||||
</td>
|
||||
<td>
|
||||
{% for url in m.urls %}
|
||||
{% if 'http' in url %}
|
||||
{% if 'image' in m.mimetype %}
|
||||
<a href="{{ url }}">
|
||||
<img src="{{ url }}" style="max-height:400px;max-width:400px;"></img>
|
||||
</a>
|
||||
{% elif 'video' in m.mimetype %}
|
||||
<video src="{{ url }}" controls style="max-height:400px;max-width:600px;">
|
||||
Your browser does not support the video element.
|
||||
</video>
|
||||
{% elif 'audio' in m.mimetype %}
|
||||
<audio controls>
|
||||
<source src="{{ url }}" type="{{ m.mimetype }}">
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{% else %}
|
||||
No preview available.
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
<br>
|
||||
<a href="{{ url }}">open</a> or
|
||||
<a href="{{ url }}" download="">download</a>
|
||||
{% endfor %}
|
||||
{{ macros.display_media(m) }}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
@@ -153,10 +186,27 @@
|
||||
if (e.clipboardData) {
|
||||
e.clipboardData.setData("text/plain", el.textContent);
|
||||
console.log(e.clipboardData.getData("text"))
|
||||
showNotification("copied...")
|
||||
showNotification("copied!")
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
// collapsibles
|
||||
let coll = document.getElementsByClassName("collapsible");
|
||||
let i;
|
||||
|
||||
for (i = 0; i < coll.length; i++) {
|
||||
coll[i].addEventListener("click", function() {
|
||||
this.classList.toggle("active");
|
||||
// let content = this.nextElementSibling;
|
||||
let content = this.parentElement.querySelector(".collapsible-content");
|
||||
if (content.style.display === "block") {
|
||||
content.style.display = "none";
|
||||
} else {
|
||||
content.style.display = "block";
|
||||
}
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
</html>
|
||||
28
src/formatters/templates/media.html
Normal file
28
src/formatters/templates/media.html
Normal file
@@ -0,0 +1,28 @@
|
||||
{% macro display_media(m) -%}
|
||||
|
||||
{% for url in m.urls %}
|
||||
{% if url | length == 0 %}
|
||||
No URL available for {{ m.key }}.
|
||||
{% elif 'http' in url %}
|
||||
{% if 'image' in m.mimetype %}
|
||||
<a href="{{ url }}">
|
||||
<img src="{{ url }}" style="max-height:400px;max-width:400px;"></img>
|
||||
</a>
|
||||
{% elif 'video' in m.mimetype %}
|
||||
<video src="{{ url }}" controls style="max-height:400px;max-width:600px;">
|
||||
Your browser does not support the video element.
|
||||
</video>
|
||||
{% elif 'audio' in m.mimetype %}
|
||||
<audio controls>
|
||||
<source src="{{ url }}" type="{{ m.mimetype }}">
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{% else %}
|
||||
No preview available for {{ m.key }}.
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{{ m.url | urlize }}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{%- endmacro -%}
|
||||
@@ -1,18 +1,17 @@
|
||||
|
||||
from __future__ import annotations
|
||||
from ast import List
|
||||
from typing import Any, Union, Dict
|
||||
from typing import Any
|
||||
from dataclasses import dataclass, field
|
||||
import mimetypes
|
||||
|
||||
|
||||
@dataclass
|
||||
class Media:
|
||||
# other properties eg: hash, id, exif, ...
|
||||
filename: str
|
||||
key: str = None
|
||||
_mimetype: str = None # eg: image/jpeg
|
||||
urls: List[str] = field(default_factory=list)
|
||||
_mimetype: str = None # eg: image/jpeg
|
||||
properties: dict = field(default_factory=dict)
|
||||
|
||||
def set(self, key: str, value: Any) -> Media:
|
||||
@@ -38,4 +37,4 @@ class Media:
|
||||
self._mimetype = v
|
||||
|
||||
def is_video(self) -> bool:
|
||||
return self._mimetype.startswith("video")
|
||||
return self.mimetype.startswith("video")
|
||||
|
||||
@@ -6,6 +6,7 @@ from dataclasses import dataclass
|
||||
from archivers import Archiverv2
|
||||
from feeders import Feeder
|
||||
from formatters import Formatter
|
||||
from media import Media
|
||||
from storages import StorageV2
|
||||
from enrichers import Enricher
|
||||
from databases import Database
|
||||
@@ -177,6 +178,13 @@ class ArchivingOrchestrator:
|
||||
for s in self.storages:
|
||||
for m in result.media:
|
||||
s.store(m, result) # modifies media
|
||||
# Media can be inside media properties, examples include transformations on original media
|
||||
for prop in m.properties.values():
|
||||
if isinstance(prop, Media):
|
||||
s.store(prop, result)
|
||||
if isinstance(prop, list) and len(prop)>0 and isinstance(prop[0], Media):
|
||||
for prop_media in prop:
|
||||
s.store(prop_media, result)
|
||||
|
||||
# formatters, enrichers, and storages will sometimes look for specific properties: eg <li>Screenshot: <img src="{res.get("screenshot")}"> </li>
|
||||
# TODO: should there only be 1 formatter?
|
||||
|
||||
Reference in New Issue
Block a user