Merge branch 'main' into youtubedlp-rewrite

This commit is contained in:
Patrick Robertson
2025-01-15 17:47:23 +01:00
10 changed files with 57 additions and 900 deletions

View File

@@ -1,5 +1,4 @@
import json, os, traceback
import tiktok_downloader
from loguru import logger

View File

@@ -21,7 +21,7 @@ class Metadata:
media: List[Media] = field(default_factory=list)
def __post_init__(self):
self.set("_processed_at", datetime.datetime.utcnow())
self.set("_processed_at", datetime.datetime.now(datetime.timezone.utc))
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
"""

View File

@@ -65,7 +65,7 @@ class GsheetsDb(Database):
media: Media = item.get_final_media()
if hasattr(media, "urls"):
batch_if_valid('archive', "\n".join(media.urls))
batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())
batch_if_valid('date', True, datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=datetime.timezone.utc).isoformat())
batch_if_valid('title', item.get_title())
batch_if_valid('text', item.get("content", ""))
batch_if_valid('timestamp', item.get_timestamp())

View File

@@ -55,5 +55,5 @@ class MetaEnricher(Enricher):
def enrich_archive_duration(self, to_enrich):
logger.debug(f"calculating archive duration for url={to_enrich.get_url()} ")
archive_duration = datetime.datetime.utcnow() - to_enrich.get("_processed_at")
archive_duration = datetime.datetime.now(datetime.timezone.utc) - to_enrich.get("_processed_at")
to_enrich.set("archive_duration_seconds", archive_duration.seconds)

View File

@@ -4,7 +4,7 @@ import mimetypes, os, pathlib
from jinja2 import Environment, FileSystemLoader
from urllib.parse import quote
from loguru import logger
import minify_html, json
import json
import base64
from ..version import __version__
@@ -47,7 +47,6 @@ class HtmlFormatter(Formatter):
metadata=item.metadata,
version=__version__
)
content = minify_html.minify(content, minify_js=False, minify_css=True)
html_path = os.path.join(ArchivingContext.get_tmp_dir(), f"formatted{random_str(24)}.html")
with open(html_path, mode="w", encoding="utf-8") as outf: