Further tidyups + refactoring for new structure

* Add implementation tests for orchestrator + logging tests * Standardise method/class vars for extractors to see if they are suitable * Fix bugs with removing default loguru logger (allows further customisation) * Fix bug loading required fields from file *
2026-06-12 13:18:28 +03:00 · 2025-01-30 13:21:10 +01:00
parent cddae65a90
commit b7d9145f6c
22 changed files with 292 additions and 51 deletions
--- a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
@@ -12,7 +12,7 @@ from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata,Media

 class TwitterApiExtractor(Extractor):
-    link_pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
+    valid_url = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")

    def setup(self, config: dict) -> None:
        super().setup(config)
@@ -54,7 +54,7 @@ class TwitterApiExtractor(Extractor):

    def get_username_tweet_id(self, url):
        # detect URLs that we definitely cannot handle
-        matches = self.link_pattern.findall(url)
+        matches = self.valid_url.findall(url)
        if not len(matches): return False, False

        username, tweet_id = matches[0]  # only one URL supported