mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
* clean orchestrator code, add archiver cleanup logic * improves documentation for database.py * telethon archivers isolate sessions into copied files * closes #127 * closes #125 * closes #84 * meta enricher applies to all media * closes #61 adds subtitles and comments * minor update * minor fixes to yt-dlp subtitles and comments * closes #17 but logic is imperfect. * closes #85 ssl enhancer * minimifies html, JS refactor for preview of certificates * closes #91 adds freetsa timestamp authority * version bump * simplify download_url method * skip ssl if nothing archived * html preview improvements * adds retrying lib * manual download archiver improvements * meta only runs when relevant data available * new metadata convenience method * html template improvements * removes debug message * does not close #91 yet, will need a few more certificate chaing logging * adds verbosity config * new instagram api archiver * adds proxy support we * adds proxy/end support and bug fix for yt-dlp * proxy support for webdriver * adds socks proxy to wacz_enricher * refactor recursivity in inner media and display * infinite recursive display * foolproofing timestamping authortities * version to 0.9.0 * minor fixes from code-review
37 lines
1.2 KiB
Python
37 lines
1.2 KiB
Python
import ssl, os
|
|
from slugify import slugify
|
|
from urllib.parse import urlparse
|
|
from loguru import logger
|
|
|
|
from . import Enricher
|
|
from ..core import Metadata, ArchivingContext, Media
|
|
|
|
|
|
class SSLEnricher(Enricher):
|
|
"""
|
|
Retrieves SSL certificate information for a domain, as a file
|
|
"""
|
|
name = "ssl_enricher"
|
|
|
|
def __init__(self, config: dict) -> None:
|
|
super().__init__(config)
|
|
self. skip_when_nothing_archived = bool(self.skip_when_nothing_archived)
|
|
|
|
@staticmethod
|
|
def configs() -> dict:
|
|
return {
|
|
"skip_when_nothing_archived": {"default": True, "help": "if true, will skip enriching when no media is archived"},
|
|
}
|
|
|
|
def enrich(self, to_enrich: Metadata) -> None:
|
|
if not to_enrich.media and self.skip_when_nothing_archived: return
|
|
|
|
url = to_enrich.get_url()
|
|
domain = urlparse(url).netloc
|
|
logger.debug(f"fetching SSL certificate for {domain=} in {url=}")
|
|
|
|
cert = ssl.get_server_certificate((domain, 443))
|
|
cert_fn = os.path.join(ArchivingContext.get_tmp_dir(), f"{slugify(domain)}.pem")
|
|
with open(cert_fn, "w") as f: f.write(cert)
|
|
to_enrich.add_media(Media(filename=cert_fn), id="ssl_certificate")
|