Further tidyups + refactoring for new structure

* Add implementation tests for orchestrator + logging tests
* Standardise method/class vars for extractors to see if they are suitable
* Fix bugs with removing default loguru logger (allows further customisation)
* Fix bug loading required fields from file
*
This commit is contained in:
Patrick Robertson
2025-01-30 13:21:10 +01:00
parent cddae65a90
commit b7d9145f6c
22 changed files with 292 additions and 51 deletions

View File

@@ -12,7 +12,7 @@ from auto_archiver.core import Extractor
from auto_archiver.core import Metadata,Media
class TwitterApiExtractor(Extractor):
link_pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
valid_url = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
def setup(self, config: dict) -> None:
super().setup(config)
@@ -54,7 +54,7 @@ class TwitterApiExtractor(Extractor):
def get_username_tweet_id(self, url):
# detect URLs that we definitely cannot handle
matches = self.link_pattern.findall(url)
matches = self.valid_url.findall(url)
if not len(matches): return False, False
username, tweet_id = matches[0] # only one URL supported