mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
Merge pull request #169 from bellingcat/remove_dependencies
Tidy up and remove dependencies
This commit is contained in:
904
poetry.lock
generated
904
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,9 +23,7 @@ classifiers = [
|
||||
|
||||
dependencies = [
|
||||
"gspread (>=0.0.0)",
|
||||
"argparse (>=0.0.0)",
|
||||
"beautifulsoup4 (>=0.0.0)",
|
||||
"tiktok-downloader (>=0.0.0)",
|
||||
"bs4 (>=0.0.0)",
|
||||
"loguru (>=0.0.0)",
|
||||
"ffmpeg-python (>=0.0.0)",
|
||||
@@ -55,11 +53,9 @@ dependencies = [
|
||||
"warcio (>=0.0.0)",
|
||||
"jsonlines (>=0.0.0)",
|
||||
"pysubs2 (>=0.0.0)",
|
||||
"minify-html (>=0.0.0)",
|
||||
"retrying (>=0.0.0)",
|
||||
"tsp-client (>=0.0.0)",
|
||||
"certvalidator (>=0.0.0)",
|
||||
"toml (>=0.10.2,<0.11.0)"
|
||||
]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import json, os, traceback
|
||||
import tiktok_downloader
|
||||
from loguru import logger
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class Metadata:
|
||||
media: List[Media] = field(default_factory=list)
|
||||
|
||||
def __post_init__(self):
|
||||
self.set("_processed_at", datetime.datetime.utcnow())
|
||||
self.set("_processed_at", datetime.datetime.now(datetime.timezone.utc))
|
||||
|
||||
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
|
||||
"""
|
||||
|
||||
@@ -65,7 +65,7 @@ class GsheetsDb(Database):
|
||||
media: Media = item.get_final_media()
|
||||
if hasattr(media, "urls"):
|
||||
batch_if_valid('archive', "\n".join(media.urls))
|
||||
batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())
|
||||
batch_if_valid('date', True, datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=datetime.timezone.utc).isoformat())
|
||||
batch_if_valid('title', item.get_title())
|
||||
batch_if_valid('text', item.get("content", ""))
|
||||
batch_if_valid('timestamp', item.get_timestamp())
|
||||
|
||||
@@ -55,5 +55,5 @@ class MetaEnricher(Enricher):
|
||||
def enrich_archive_duration(self, to_enrich):
|
||||
logger.debug(f"calculating archive duration for url={to_enrich.get_url()} ")
|
||||
|
||||
archive_duration = datetime.datetime.utcnow() - to_enrich.get("_processed_at")
|
||||
archive_duration = datetime.datetime.now(datetime.timezone.utc) - to_enrich.get("_processed_at")
|
||||
to_enrich.set("archive_duration_seconds", archive_duration.seconds)
|
||||
@@ -4,7 +4,7 @@ import mimetypes, os, pathlib
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from urllib.parse import quote
|
||||
from loguru import logger
|
||||
import minify_html, json
|
||||
import json
|
||||
import base64
|
||||
|
||||
from ..version import __version__
|
||||
@@ -47,7 +47,6 @@ class HtmlFormatter(Formatter):
|
||||
metadata=item.metadata,
|
||||
version=__version__
|
||||
)
|
||||
content = minify_html.minify(content, minify_js=False, minify_css=True)
|
||||
|
||||
html_path = os.path.join(ArchivingContext.get_tmp_dir(), f"formatted{random_str(24)}.html")
|
||||
with open(html_path, mode="w", encoding="utf-8") as outf:
|
||||
|
||||
17
tests/archivers/test_tiktok_archiver.py
Normal file
17
tests/archivers/test_tiktok_archiver.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import pytest
|
||||
|
||||
from .test_archiver_base import TestArchiverBase
|
||||
from auto_archiver.archivers.tiktok_archiver import TiktokArchiver
|
||||
|
||||
class TestBlueskyArchiver(TestArchiverBase):
|
||||
|
||||
archiver_class = TiktokArchiver
|
||||
config = {}
|
||||
|
||||
@pytest.mark.xfail(reason="Tiktok API is not working")
|
||||
@pytest.mark.download
|
||||
def test_download_video(self, make_item):
|
||||
# cat video
|
||||
url = "https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970?lang=en"
|
||||
item = self.archiver.download(make_item(url))
|
||||
assert item.success
|
||||
Reference in New Issue
Block a user