Separate setup() and module_setup().

This commit is contained in:
erinhmclark
2025-02-10 18:07:47 +00:00
parent 2c3d1f591f
commit e97ccf8a73
14 changed files with 18 additions and 18 deletions

View File

@@ -14,7 +14,7 @@ class BaseModule(ABC):
Base module class. All modules should inherit from this class. Base module class. All modules should inherit from this class.
The exact methods a class implements will depend on the type of module it is, The exact methods a class implements will depend on the type of module it is,
however all modules have a .setup(config: dict) method to run any setup code however modules can have a .setup() method to run any setup code
(e.g. logging in to a site, spinning up a browser etc.) (e.g. logging in to a site, spinning up a browser etc.)
See BaseModule.MODULE_TYPES for the types of modules you can create, noting that See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
@@ -60,7 +60,7 @@ class BaseModule(ABC):
def storages(self) -> list: def storages(self) -> list:
return self.config.get('storages', []) return self.config.get('storages', [])
def setup(self, config: dict): def config_setup(self, config: dict):
authentication = config.get('authentication', {}) authentication = config.get('authentication', {})
# extract out concatenated sites # extract out concatenated sites
@@ -80,7 +80,7 @@ class BaseModule(ABC):
for key, val in config.get(self.name, {}).items(): for key, val in config.get(self.name, {}).items():
setattr(self, key, val) setattr(self, key, val)
def module_setup(self): def setup(self):
# For any additional setup required by modules, e.g. autehntication # For any additional setup required by modules, e.g. autehntication
pass pass

View File

@@ -58,7 +58,7 @@ def get_module_lazy(module_name: str, suppress_warnings: bool = False) -> LazyBa
This has all the information about the module, but does not load the module itself or its dependencies This has all the information about the module, but does not load the module itself or its dependencies
To load an actual module, call .setup() on a laz module To load an actual module, call .setup() on a lazy module
""" """
if module_name in _LAZY_LOADED_MODULES: if module_name in _LAZY_LOADED_MODULES:
@@ -241,8 +241,8 @@ class LazyBaseModule:
# merge the default config with the user config # merge the default config with the user config
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default')) default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
config[self.name] = default_config | config.get(self.name, {}) config[self.name] = default_config | config.get(self.name, {})
instance.setup(config) instance.config_setup(config)
instance.module_setup() instance.setup()
return instance return instance
def __repr__(self): def __repr__(self):

View File

@@ -19,7 +19,7 @@ from auto_archiver.core import Storage
class GDriveStorage(Storage): class GDriveStorage(Storage):
def module_setup(self) -> None: def setup(self) -> None:
self.scopes = ['https://www.googleapis.com/auth/drive'] self.scopes = ['https://www.googleapis.com/auth/drive']
# Initialize Google Drive service # Initialize Google Drive service
self._setup_google_drive_service() self._setup_google_drive_service()

View File

@@ -21,7 +21,7 @@ from . import GWorksheet
class GsheetsFeeder(Feeder): class GsheetsFeeder(Feeder):
def module_setup(self) -> None: def setup(self) -> None:
self.gsheets_client = gspread.service_account(filename=self.service_account) self.gsheets_client = gspread.service_account(filename=self.service_account)
# TODO mv to validators # TODO mv to validators
assert self.sheet or self.sheet_id, ( assert self.sheet or self.sheet_id, (

View File

@@ -17,7 +17,7 @@ class HtmlFormatter(Formatter):
environment: Environment = None environment: Environment = None
template: any = None template: any = None
def module_setup(self) -> None: def setup(self) -> None:
"""Sets up the Jinja2 environment and loads the template.""" """Sets up the Jinja2 environment and loads the template."""
template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/") template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")
self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True) self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True)

View File

@@ -32,7 +32,7 @@ class InstagramAPIExtractor(Extractor):
r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?" r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?"
) )
def module_setup(self) -> None: def setup(self) -> None:
if self.api_endpoint[-1] == "/": if self.api_endpoint[-1] == "/":
self.api_endpoint = self.api_endpoint[:-1] self.api_endpoint = self.api_endpoint[:-1]

View File

@@ -25,7 +25,7 @@ class InstagramExtractor(Extractor):
profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url)) profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url))
# TODO: links to stories # TODO: links to stories
def module_setup(self) -> None: def setup(self) -> None:
self.insta = instaloader.Instaloader( self.insta = instaloader.Instaloader(
download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}" download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"

View File

@@ -27,7 +27,7 @@ class InstagramTbotExtractor(Extractor):
https://t.me/instagram_load_bot https://t.me/instagram_load_bot
""" """
def module_setup(self) -> None: def setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup
2. checks if the session file is valid 2. checks if the session file is valid

View File

@@ -13,7 +13,7 @@ NO_DUPLICATES_FOLDER = "no-dups/"
class S3Storage(Storage): class S3Storage(Storage):
def module_setup(self) -> None: def setup(self) -> None:
self.s3 = boto3.client( self.s3 = boto3.client(
's3', 's3',
region_name=self.region, region_name=self.region,

View File

@@ -18,7 +18,7 @@ class TelethonExtractor(Extractor):
invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)") invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)")
def module_setup(self) -> None: def setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup

View File

@@ -15,7 +15,7 @@ class TwitterApiExtractor(Extractor):
valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)") valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
def module_setup(self) -> None: def setup(self) -> None:
self.api_index = 0 self.api_index = 0
self.apis = [] self.apis = []
if len(self.bearer_tokens): if len(self.bearer_tokens):

View File

@@ -12,7 +12,7 @@ class VkExtractor(Extractor):
Currently only works for /wall posts Currently only works for /wall posts
""" """
def module_setup(self) -> None: def setup(self) -> None:
self.vks = VkScraper(self.username, self.password, session_file=self.session_file) self.vks = VkScraper(self.username, self.password, session_file=self.session_file)
def download(self, item: Metadata) -> Metadata: def download(self, item: Metadata) -> Metadata:

View File

@@ -18,7 +18,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
When used as an archiver it will extract the media from the .WACZ archive so it can be enriched. When used as an archiver it will extract the media from the .WACZ archive so it can be enriched.
""" """
def module_setup(self) -> None: def setup(self) -> None:
self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER') self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER') self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER')

View File

@@ -13,7 +13,7 @@ class WhisperEnricher(Enricher):
Only works if an S3 compatible storage is used Only works if an S3 compatible storage is used
""" """
def module_setup(self) -> None: def setup(self) -> None:
self.stores = self.config['steps']['storages'] self.stores = self.config['steps']['storages']
self.s3 = get_module("s3_storage", self.config) self.s3 = get_module("s3_storage", self.config)
if not "s3_storage" in self.stores: if not "s3_storage" in self.stores: