Separate setup() and module_setup().

This commit is contained in:
erinhmclark
2025-02-10 17:25:15 +00:00
parent c4bb667cec
commit 2c3d1f591f
14 changed files with 17 additions and 26 deletions

View File

@@ -80,6 +80,10 @@ class BaseModule(ABC):
for key, val in config.get(self.name, {}).items(): for key, val in config.get(self.name, {}).items():
setattr(self, key, val) setattr(self, key, val)
def module_setup(self):
# For any additional setup required by modules, e.g. autehntication
pass
def auth_for_site(self, site: str, extract_cookies=True) -> Mapping[str, Any]: def auth_for_site(self, site: str, extract_cookies=True) -> Mapping[str, Any]:
""" """
Returns the authentication information for a given site. This is used to authenticate Returns the authentication information for a given site. This is used to authenticate

View File

@@ -242,6 +242,7 @@ class LazyBaseModule:
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default')) default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
config[self.name] = default_config | config.get(self.name, {}) config[self.name] = default_config | config.get(self.name, {})
instance.setup(config) instance.setup(config)
instance.module_setup()
return instance return instance
def __repr__(self): def __repr__(self):

View File

@@ -19,9 +19,7 @@ from auto_archiver.core import Storage
class GDriveStorage(Storage): class GDriveStorage(Storage):
def setup(self, config: dict) -> None: def module_setup(self) -> None:
# Step 1: Call the BaseModule setup to dynamically assign configs
super().setup(config)
self.scopes = ['https://www.googleapis.com/auth/drive'] self.scopes = ['https://www.googleapis.com/auth/drive']
# Initialize Google Drive service # Initialize Google Drive service
self._setup_google_drive_service() self._setup_google_drive_service()

View File

@@ -21,8 +21,7 @@ from . import GWorksheet
class GsheetsFeeder(Feeder): class GsheetsFeeder(Feeder):
def setup(self, config: dict): def module_setup(self) -> None:
super().setup(config)
self.gsheets_client = gspread.service_account(filename=self.service_account) self.gsheets_client = gspread.service_account(filename=self.service_account)
# TODO mv to validators # TODO mv to validators
assert self.sheet or self.sheet_id, ( assert self.sheet or self.sheet_id, (

View File

@@ -17,9 +17,8 @@ class HtmlFormatter(Formatter):
environment: Environment = None environment: Environment = None
template: any = None template: any = None
def setup(self, config: dict) -> None: def module_setup(self) -> None:
"""Sets up the Jinja2 environment and loads the template.""" """Sets up the Jinja2 environment and loads the template."""
super().setup(config) # Ensure the base class logic is executed
template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/") template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")
self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True) self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True)

View File

@@ -32,8 +32,7 @@ class InstagramAPIExtractor(Extractor):
r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?" r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?"
) )
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
if self.api_endpoint[-1] == "/": if self.api_endpoint[-1] == "/":
self.api_endpoint = self.api_endpoint[:-1] self.api_endpoint = self.api_endpoint[:-1]

View File

@@ -25,8 +25,7 @@ class InstagramExtractor(Extractor):
profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url)) profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url))
# TODO: links to stories # TODO: links to stories
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.insta = instaloader.Instaloader( self.insta = instaloader.Instaloader(
download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}" download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"

View File

@@ -27,12 +27,11 @@ class InstagramTbotExtractor(Extractor):
https://t.me/instagram_load_bot https://t.me/instagram_load_bot
""" """
def setup(self, configs) -> None: def module_setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup
2. checks if the session file is valid 2. checks if the session file is valid
""" """
super().setup(configs)
logger.info(f"SETUP {self.name} checking login...") logger.info(f"SETUP {self.name} checking login...")
self._prepare_session_file() self._prepare_session_file()
self._initialize_telegram_client() self._initialize_telegram_client()

View File

@@ -13,8 +13,7 @@ NO_DUPLICATES_FOLDER = "no-dups/"
class S3Storage(Storage): class S3Storage(Storage):
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.s3 = boto3.client( self.s3 = boto3.client(
's3', 's3',
region_name=self.region, region_name=self.region,

View File

@@ -18,14 +18,13 @@ class TelethonExtractor(Extractor):
invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)") invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)")
def setup(self, config: dict) -> None: def module_setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup
2. trigger login process for telegram or proceed if already saved in a session file 2. trigger login process for telegram or proceed if already saved in a session file
3. joins channel_invites where needed 3. joins channel_invites where needed
""" """
super().setup(config)
logger.info(f"SETUP {self.name} checking login...") logger.info(f"SETUP {self.name} checking login...")
# make a copy of the session that is used exclusively with this archiver instance # make a copy of the session that is used exclusively with this archiver instance

View File

@@ -15,9 +15,7 @@ class TwitterApiExtractor(Extractor):
valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)") valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.api_index = 0 self.api_index = 0
self.apis = [] self.apis = []
if len(self.bearer_tokens): if len(self.bearer_tokens):

View File

@@ -12,8 +12,7 @@ class VkExtractor(Extractor):
Currently only works for /wall posts Currently only works for /wall posts
""" """
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.vks = VkScraper(self.username, self.password, session_file=self.session_file) self.vks = VkScraper(self.username, self.password, session_file=self.session_file)
def download(self, item: Metadata) -> Metadata: def download(self, item: Metadata) -> Metadata:

View File

@@ -18,8 +18,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
When used as an archiver it will extract the media from the .WACZ archive so it can be enriched. When used as an archiver it will extract the media from the .WACZ archive so it can be enriched.
""" """
def setup(self, configs) -> None: def module_setup(self) -> None:
super().setup(configs)
self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER') self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER') self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER')

View File

@@ -13,8 +13,7 @@ class WhisperEnricher(Enricher):
Only works if an S3 compatible storage is used Only works if an S3 compatible storage is used
""" """
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.stores = self.config['steps']['storages'] self.stores = self.config['steps']['storages']
self.s3 = get_module("s3_storage", self.config) self.s3 = get_module("s3_storage", self.config)
if not "s3_storage" in self.stores: if not "s3_storage" in self.stores: