V0.9.0 - closes several open issues: new enrichers and bug fixes (#133)

* clean orchestrator code, add archiver cleanup logic

* improves documentation for database.py

* telethon archivers isolate sessions into copied files

* closes #127

* closes #125

* closes #84

* meta enricher applies to all media

* closes #61 adds subtitles and comments

* minor update

* minor fixes to yt-dlp subtitles and comments

* closes #17 but logic is imperfect.

* closes #85 ssl enhancer

* minimifies html, JS refactor for preview of certificates

* closes #91 adds freetsa timestamp authority

* version bump

* simplify download_url method

* skip ssl if nothing archived

* html preview improvements

* adds retrying lib

* manual download archiver improvements

* meta only runs when relevant data available

* new metadata convenience method

* html template improvements

* removes debug message

* does not close #91 yet, will need a few more certificate chaing logging

* adds verbosity config

* new instagram api archiver

* adds proxy support we

* adds proxy/end support and bug fix for yt-dlp

* proxy support for webdriver

* adds socks proxy to wacz_enricher

* refactor recursivity in inner media and display

* infinite recursive display

* foolproofing timestamping authortities

* version to 0.9.0

* minor fixes from code-review
This commit is contained in:
Miguel Sozinho Ramalho
2024-02-20 18:05:29 +00:00
committed by GitHub
parent 5c49124ac6
commit 7a21ae96af
34 changed files with 1696 additions and 880 deletions

View File

@@ -4,6 +4,8 @@ import mimetypes, os, pathlib
from jinja2 import Environment, FileSystemLoader
from urllib.parse import quote
from loguru import logger
import minify_html, json
import base64
from ..version import __version__
from ..core import Metadata, Media, ArchivingContext
@@ -45,6 +47,8 @@ class HtmlFormatter(Formatter):
metadata=item.metadata,
version=__version__
)
content = minify_html.minify(content, minify_js=False, minify_css=True)
html_path = os.path.join(ArchivingContext.get_tmp_dir(), f"formatted{random_str(24)}.html")
with open(html_path, mode="w", encoding="utf-8") as outf:
outf.write(content)
@@ -89,3 +93,8 @@ class JinjaHelpers:
@staticmethod
def quote(s: str) -> str:
return quote(s)
@staticmethod
def json_dump_b64(d: dict) -> str:
j = json.dumps(d, indent=4, default=str)
return base64.b64encode(j.encode()).decode()

View File

@@ -96,6 +96,16 @@
overflow: hidden;
background-color: #f1f1f1;
}
.pem-certificate, .text-preview {
text-align: left;
font-size: small;
}
.text-preview{
padding-left: 10px;
padding-right: 10px;
white-space: pre-wrap;
}
</style>
</head>
@@ -121,42 +131,7 @@
{% for m in media %}
<tr>
<td>
<ul>
<li><b>key:</b> <span class="copy">{{ m.key }}</span></li>
<li><b>type:</b> <span class="copy">{{ m.mimetype }}</span></li>
{% for prop in m.properties %}
{% if m.properties[prop] | is_list %}
<p></p>
<div>
<b class="collapsible" title="expand">{{ prop }}:</b>
<p></p>
<div class="collapsible-content">
{% for subprop in m.properties[prop] %}
{% if subprop | is_media %}
{{ macros.display_media(subprop, true, url) }}
<ul>
{% for subprop_prop in subprop.properties %}
<li><b>{{ subprop_prop }}:</b>
{{ macros.copy_urlize(subprop.properties[subprop_prop]) }}</li>
{% endfor %}
</ul>
{% else %}
{{ subprop }}
{% endif %}
{% endfor %}
</div>
</div>
<p></p>
{% elif m.properties[prop] | string | length > 1 %}
<li><b>{{ prop }}:</b> {{ macros.copy_urlize(m.properties[prop]) }}</li>
{% endif %}
{% endfor %}
</ul>
{{ macros.display_recursive(m, true) }}
</td>
<td>
{{ macros.display_media(m, true, url) }}
@@ -175,16 +150,68 @@
<tr>
<td>{{ key }}</td>
<td>
{% if metadata[key] is mapping %}
<div class="center copy" copy-value64='{{metadata[key] | json_dump_b64}}'>Copy as JSON</div>
{% endif %}
{{ macros.copy_urlize(metadata[key]) }}
</td>
</tr>
{% endfor %}
</table>
<p style="text-align:center;">Made with <a
href="https://github.com/bellingcat/auto-archiver">bellingcat/auto-archiver</a> v{{ version }}</p>
<p class="center">Made with <a href="https://github.com/bellingcat/auto-archiver">bellingcat/auto-archiver</a>
v{{ version }}</p>
</body>
<script src="https://cdnjs.cloudflare.com/ajax/libs/forge/0.10.0/forge.min.js"></script>
<script defer>
// partial decode of SSL certificates
function decodeCertificate(sslCert) {
var cert = forge.pki.certificateFromPem(sslCert);
return `SSL CERTIFICATE PREVIEW:<br/><ul>
<li><b>Subject:</b> <span class="copy">${cert.subject.attributes.map(attr => `${attr.shortName}: ${attr.value}`).join(", ")}</span></li>
<li><b>Issuer:</b> <span class="copy">${cert.issuer.attributes.map(attr => `${attr.shortName}: ${attr.value}`).join(", ")}</span></li>
<li><b>Valid From:</b> <span class="copy">${cert.validity.notBefore}</span></li>
<li><b>Valid To:</b> <span class="copy">${cert.validity.notAfter}</span></li>
<li><b>Serial Number:</b> <span class="copy">${cert.serialNumber}</span></li>
</ul>`;
}
async function run() {
await PreviewCertificates();
await PreviewText();
await enableCopyLogic();
await enableCollapsibleLogic();
await setupSafeView();
}
async function PreviewCertificates() {
await Promise.all(
Array.from(document.querySelectorAll(".pem-certificate")).map(async el => {
let certificate = await (await fetch(el.getAttribute("pem"))).text();
el.innerHTML = decodeCertificate(certificate);
let cyberChefUrl =
`https://gchq.github.io/CyberChef/#recipe=Parse_X.509_certificate('PEM')&input=${btoa(certificate)}`;
// create a new anchor with this url and append after the code
let a = document.createElement("a");
a.href = cyberChefUrl;
a.textContent = "Full certificate details";
el.parentElement.appendChild(a);
})
);
console.log("certificate preview done");
}
async function PreviewText() {
await Promise.all(
Array.from(document.querySelectorAll(".text-preview")).map(async el => {
let textContent = await (await fetch(el.getAttribute("url"))).text();
el.textContent = textContent;
})
);
console.log("text preview done");
}
// notification logic
const notification = document.getElementById("notification");
@@ -198,83 +225,99 @@
}
// copy logic
Array.from(document.querySelectorAll(".copy")).forEach(el => {
el.onclick = () => {
document.execCommand("copy");
}
el.addEventListener("copy", (e) => {
e.preventDefault();
if (e.clipboardData) {
if (el.hasAttribute("copy-value")) {
e.clipboardData.setData("text/plain", el.getAttribute("copy-value"));
} else {
e.clipboardData.setData("text/plain", el.textContent);
async function enableCopyLogic() {
await Promise.all(
Array.from(document.querySelectorAll(".copy")).map(el => {
el.onclick = () => {
document.execCommand("copy");
}
console.log(e.clipboardData.getData("text"))
showNotification("copied!")
}
})
})
el.addEventListener("copy", (e) => {
e.preventDefault();
if (e.clipboardData) {
if (el.hasAttribute("copy-value")) {
e.clipboardData.setData("text/plain", el.getAttribute("copy-value"));
} else if (el.hasAttribute("copy-value64")) {
// TODO: figure out how to decode unicode chars into utf-8
e.clipboardData.setData("text/plain", new String(atob(el.getAttribute(
"copy-value64"))));
} else {
e.clipboardData.setData("text/plain", el.textContent);
}
console.log(e.clipboardData.getData("text"))
showNotification("copied!")
}
})
})
)
console.log("copy logic enabled");
}
// collapsibles
let coll = document.getElementsByClassName("collapsible");
let i;
for (i = 0; i < coll.length; i++) {
coll[i].addEventListener("click", function () {
this.classList.toggle("active");
// let content = this.nextElementSibling;
let content = this.parentElement.querySelector(".collapsible-content");
if (content.style.display === "block") {
content.style.display = "none";
} else {
content.style.display = "block";
}
});
async function enableCollapsibleLogic() {
let coll = document.getElementsByClassName("collapsible");
for (let i = 0; i < coll.length; i++) {
await new Promise(resolve => {
coll[i].addEventListener("click", function () {
this.classList.toggle("active");
// let content = this.nextElementSibling;
let content = this.parentElement.querySelector(".collapsible-content");
if (content.style.display === "block") {
content.style.display = "none";
} else {
content.style.display = "block";
}
});
resolve();
})
}
console.log("collapsible logic enabled");
}
// logic for enabled/disabled greyscale
// Get references to the checkboxes and images/videos
const safeImageViewCheckbox = document.getElementById('safe-media-view');
const imagesVideos = document.querySelectorAll('img, video');
async function setupSafeView() {
// logic for enabled/disabled greyscale
// Get references to the checkboxes and images/videos
const safeImageViewCheckbox = document.getElementById('safe-media-view');
const imagesVideos = document.querySelectorAll('img, video');
// Function to toggle grayscale effect
function toggleGrayscale() {
// Function to toggle grayscale effect
function toggleGrayscale() {
imagesVideos.forEach(element => {
if (safeImageViewCheckbox.checked) {
// Enable grayscale effect
element.style.filter = 'grayscale(1)';
element.style.webkitFilter = 'grayscale(1)';
} else {
// Disable grayscale effect
element.style.filter = 'none';
element.style.webkitFilter = 'none';
}
});
}
// Add event listener to the checkbox to trigger the toggleGrayscale function
safeImageViewCheckbox.addEventListener('change', toggleGrayscale);
// Handle the hover effect using JavaScript
imagesVideos.forEach(element => {
if (safeImageViewCheckbox.checked) {
// Enable grayscale effect
element.style.filter = 'grayscale(1)';
element.style.webkitFilter = 'grayscale(1)';
} else {
// Disable grayscale effect
element.addEventListener('mouseenter', () => {
// Disable grayscale effect on hover
element.style.filter = 'none';
element.style.webkitFilter = 'none';
}
});
element.addEventListener('mouseleave', () => {
// Re-enable grayscale effect if checkbox is checked
if (safeImageViewCheckbox.checked) {
element.style.filter = 'grayscale(1)';
element.style.webkitFilter = 'grayscale(1)';
}
});
});
toggleGrayscale();
console.log("grayscale logic enabled");
}
// Add event listener to the checkbox to trigger the toggleGrayscale function
safeImageViewCheckbox.addEventListener('change', toggleGrayscale);
// Handle the hover effect using JavaScript
imagesVideos.forEach(element => {
element.addEventListener('mouseenter', () => {
// Disable grayscale effect on hover
element.style.filter = 'none';
element.style.webkitFilter = 'none';
});
element.addEventListener('mouseleave', () => {
// Re-enable grayscale effect if checkbox is checked
if (safeImageViewCheckbox.checked) {
element.style.filter = 'grayscale(1)';
element.style.webkitFilter = 'grayscale(1)';
}
});
});
// Call the function on page load to apply the initial state
toggleGrayscale();
run();
</script>
</html>

View File

@@ -18,6 +18,12 @@ No URL available for {{ m.key }}.
<a href="https://www.bing.com/images/search?view=detailv2&iss=sbi&form=SBIVSP&sbisrc=UrlPaste&q=imgurl:{{ url | quote }}">Bing</a>,&nbsp;
<a href="https://www.tineye.com/search/?url={{ url | quote }}">Tineye</a>
</div>
<div>
Image Forensics:&nbsp;
<a href="https://fotoforensics.com/?url={{ url | quote }}">FotoForensics</a>,&nbsp;
<a href="https://mever.iti.gr/forensics/?image={{ url }}">Media Verification Assistant</a>
</div>
<p></p>
</div>
{% elif 'video' in m.mimetype %}
@@ -35,8 +41,15 @@ No URL available for {{ m.key }}.
</div>
{% elif m.filename | get_extension == ".wacz" %}
<a href="https://replayweb.page/?source={{ url | quote }}#view=pages&url={{ main_url }}">replayweb</a>
{% elif m.filename | get_extension == ".pem" %}
<code class="pem-certificate" pem="{{url}}"></code>
{% elif 'text' in m.mimetype %}
<div>PREVIEW:<br/><code><pre class="text-preview" url="{{url}}"></pre></code></div>
{% else %}
No preview available for {{ m.key }}.
No preview available for <code>{{ m.key }}</code>.
{% endif %}
{% else %}
{{ m.url | urlize }}
@@ -54,7 +67,12 @@ No preview available for {{ m.key }}.
{% macro copy_urlize(val, href_text) -%}
{% if val is mapping %}
{% if val | is_list %}
{% for item in val %}
{{ copy_urlize(item) }}
{% endfor %}
{% elif val is mapping %}
<ul>
{% for key in val %}
<li>
@@ -64,11 +82,66 @@ No preview available for {{ m.key }}.
</ul>
{% else %}
{% if href_text | length == 0 %}
{% if href_text | length == 0 %}
<span class="copy">{{ val | string | urlize }}</span>
{% else %}
<span class="copy" copy-value="{{val}}">{{ href_text | string | urlize }}</span>
{% endif %}
{% endif %}
{%- endmacro -%}
{% macro display_recursive(prop, skip_display) -%}
{% if prop is mapping %}
<div class="center copy" copy-value64='{{prop | json_dump_b64}}'>Copy as JSON</div>
<ul>
{% for subprop in prop %}
<li>
<b>{{ subprop }}:</b>
{{ display_recursive(prop[subprop]) }}
</li>
{% endfor %}
</ul>
{% elif prop | is_list %}
{% for item in prop %}
<li>
{{ display_recursive(item) }}
</li>
{% endfor %}
{% elif prop | is_media %}
{% if not skip_display %}
{{ display_media(prop, true) }}
{% endif %}
<ul>
<li><b>key:</b> <span class="copy">{{ prop.key }}</span></li>
<li><b>type:</b> <span class="copy">{{ prop.mimetype }}</span></li>
{% for subprop in prop.properties %}
{% if prop.properties[subprop] | is_list %}
<p></p>
<div>
<b class="collapsible" title="expand">{{ subprop }} ({{ prop.properties[subprop] | length }}):</b>
<p></p>
<div class="collapsible-content">
{% for subsubprop in prop.properties[subprop] %}
{{ display_recursive(subsubprop) }}
{% endfor %}
</div>
</div>
<p></p>
{% elif prop.properties[subprop] | string | length > 1 %}
<li><b>{{ subprop }}:</b> {{ copy_urlize(prop.properties[subprop]) }}</li>
{% endif %}
{% endfor %}
</ul>
{% else %}
{{ copy_urlize(prop) }}
{% endif %}
{%- endmacro -%}