reduce s3 duplicating while keeping random urls via hash (#112)

This commit is contained in:
Miguel Sozinho Ramalho
2023-12-12 19:12:03 +00:00
committed by GitHub
parent 9ee323a654
commit 3e56ef137d
9 changed files with 355 additions and 294 deletions

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from dataclasses import dataclass
import mimetypes, uuid, os, pathlib
import mimetypes, os, pathlib
from jinja2 import Environment, FileSystemLoader
from urllib.parse import quote
from loguru import logger
@@ -9,6 +9,7 @@ from ..version import __version__
from ..core import Metadata, Media, ArchivingContext
from . import Formatter
from ..enrichers import HashEnricher
from ..utils.misc import random_str
@dataclass
@@ -44,7 +45,7 @@ class HtmlFormatter(Formatter):
metadata=item.metadata,
version=__version__
)
html_path = os.path.join(ArchivingContext.get_tmp_dir(), f"formatted{str(uuid.uuid4())}.html")
html_path = os.path.join(ArchivingContext.get_tmp_dir(), f"formatted{random_str(24)}.html")
with open(html_path, mode="w", encoding="utf-8") as outf:
outf.write(content)
final_media = Media(filename=html_path, _mimetype="text/html")