mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
32 lines
961 B
Python
32 lines
961 B
Python
import ssl
|
|
import os
|
|
from slugify import slugify
|
|
from urllib.parse import urlparse
|
|
from loguru import logger
|
|
|
|
from auto_archiver.core import Enricher
|
|
from auto_archiver.core import Metadata, Media
|
|
|
|
|
|
class SSLEnricher(Enricher):
|
|
"""
|
|
Retrieves SSL certificate information for a domain, as a file
|
|
"""
|
|
|
|
def enrich(self, to_enrich: Metadata) -> None:
|
|
if not to_enrich.media and self.skip_when_nothing_archived:
|
|
return
|
|
|
|
url = to_enrich.get_url()
|
|
parsed = urlparse(url)
|
|
assert parsed.scheme in ["https"], f"Invalid URL scheme {url=}"
|
|
|
|
domain = parsed.netloc
|
|
logger.debug(f"fetching SSL certificate for {domain=} in {url=}")
|
|
|
|
cert = ssl.get_server_certificate((domain, 443))
|
|
cert_fn = os.path.join(self.tmp_dir, f"{slugify(domain)}.pem")
|
|
with open(cert_fn, "w") as f:
|
|
f.write(cert)
|
|
to_enrich.add_media(Media(filename=cert_fn), id="ssl_certificate")
|