fixing imports

2026-06-12 13:18:28 +03:00 · 2023-01-27 00:19:58 +00:00
parent ac000d5943
commit d1e4dde3f6
28 changed files with 38 additions and 161 deletions
--- a/src/auto_archiver/core/config.py
+++ b/src/auto_archiver/core/config.py
@@ -11,8 +11,8 @@ from ..feeders import Feeder
 from ..databases import Database
 from ..formatters import Formatter
 from ..storages import Storage
-from . import Step
 from ..enrichers import Enricher
+from . import Step


@dataclass
--- a/src/auto_archiver/core/metadata.py
+++ b/src/auto_archiver/core/metadata.py
@@ -6,13 +6,11 @@ from dataclasses import dataclass, field
 from dataclasses_json import dataclass_json
 import datetime
 from urllib.parse import urlparse
-from loguru import logger
 from dateutil.parser import parse as parse_dt
 from .media import Media

+
 # annotation order matters
-
-
@dataclass_json
@dataclass
 class Metadata:
@@ -72,6 +70,7 @@ class Metadata:

 # custom getter/setters

+
    def set_url(self, url: str) -> Metadata:
        assert type(url) is str and len(url) > 0, "invalid URL"
        return self.set("url", url)
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -15,49 +15,11 @@ import tempfile, traceback
 from loguru import logger


-"""
-how not to couple the different pieces of logic
-due to the use of constants for the metadata keys?
-perhaps having methods on the Metadata level that can be used to fetch a limited number of
-keys, never using strings but rather methods?
-eg: m = Metadata() 
-    m.get("screenshot") vs m.get_all()
-    m.get_url()
-    m.get_hash()
-    m.get_main_file().get_title()
-    m.get_screenshot() # this method should only exist because of the Screenshot Enricher
-    # maybe there is a way for Archivers and Enrichers and Storages to add their own methdods
-    # which raises still the Q of how the database, eg., knows they exist? 
-    # maybe there's a function to fetch them all, and each Database can register wathever they get
-    # for eg the GoogleSheets will only register based on the available column names, it knows what it wants
-    # and if it's there: great, otherwise business as usual.
-    # and a MongoDatabase could register all data, for example.
-    # 
-How are Orchestrators created? from a configuration file?
-    orchestrator = ArchivingOrchestrator(config)
-        # Config contains 1 URL, or URLs, from the command line
-        # OR a feeder which is described in the config file
-        # config.get_feeder() # if called as docker run --url "http...." then the uses the default filter
-        # if config.yaml says config
-    orchestrator.start()
-
-
-Example applications:
-1. auto-archiver for GSheets
-2. archiver for URL: feeder is CLIFeeder(config.cli.urls="") # --urls="u1,u2"
-3. archiver backend for a UI that implements a REST API, the API calls CLI
-
-Cisticola considerations:
-1. By isolating the archiving logic into "Archiving only pieces of logic" these could simply call cisticola.tiktok_scraper(user, pass)
-2. So the auto-archiver becomes like a puzzle and fixes to Cisticola scrapers can immediately benefit it, and contributions are focused on a single source or scraping
-"""
-
-
 class ArchivingOrchestrator:
    def __init__(self, config) -> None:
        self.feeder: Feeder = config.feeder
        self.formatter: Formatter = config.formatter
-        self.enrichers = config.enrichers
+        self.enrichers: List[Enricher] = config.enrichers
        self.archivers: List[Archiver] = config.archivers
        self.databases: List[Database] = config.databases
        self.storages: List[Storage] = config.storages
@@ -124,7 +86,7 @@ class ArchivingOrchestrator:
        # 3 - call archivers until one succeeds
        for a in self.archivers:
            logger.info(f"Trying archiver {a.name}")
-            try: 
+            try:
                # Q: should this be refactored so it's just a.download(result)?
                result.merge(a.download(result))
                if result.is_success(): break
--- a/src/auto_archiver/core/step.py
+++ b/src/auto_archiver/core/step.py
@@ -2,7 +2,6 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from inspect import ClassFoundException
 from typing import Type
-from ..core import Metadata
 from abc import ABC
 # from collections.abc import Iterable