From 9befb9776c3fb311d570d03140de3ab4e22878ef Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Thu, 23 Jan 2025 21:08:54 +0100 Subject: [PATCH] Fix loading modules when entry_point isn't set --- src/auto_archiver/core/config.py | 11 +++++----- src/auto_archiver/core/loader.py | 21 ++++++++++++++++--- src/auto_archiver/core/orchestrator.py | 2 -- .../modules/generic_extractor/__manifest__.py | 1 - tests/test_config.py | 4 ++++ 5 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/auto_archiver/core/config.py b/src/auto_archiver/core/config.py index 3811e2b..1c19ae2 100644 --- a/src/auto_archiver/core/config.py +++ b/src/auto_archiver/core/config.py @@ -100,16 +100,17 @@ def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap: # first deal with lists, since 'update' replaces lists from a in b, but we want to extend def update_dict(subdict, yaml_subdict): - for key, value in yaml_subdict.items(): - if not subdict.get(key): + for key, value in subdict.items(): + if not yaml_subdict.get(key): + yaml_subdict[key] = value continue if is_dict_type(value): - update_dict(subdict[key], value) + update_dict(value, yaml_subdict[key]) elif is_list_type(value): - yaml_subdict[key].extend(s for s in subdict[key] if s not in yaml_subdict[key]) + yaml_subdict[key].extend(s for s in value if s not in yaml_subdict[key]) else: - yaml_subdict[key] = subdict[key] + yaml_subdict[key] = value update_dict(from_dot_notation(dotdict), yaml_dict) diff --git a/src/auto_archiver/core/loader.py b/src/auto_archiver/core/loader.py index 310e0e6..bbd686e 100644 --- a/src/auto_archiver/core/loader.py +++ b/src/auto_archiver/core/loader.py @@ -25,6 +25,7 @@ MANIFEST_FILE = "__manifest__.py" _DEFAULT_MANIFEST = { 'name': '', 'author': 'Bellingcat', + 'type': [], 'requires_setup': True, 'description': '', 'dependencies': {}, @@ -90,8 +91,18 @@ def load_module(module: str) -> object: # TODO: change return type to Step qualname = f'auto_archiver.modules.{module.name}' logger.info(f"Loading module '{module.display_name}'...") - loaded_module = __import__(qualname) - instance = getattr(sys.modules[qualname], module.entry_point)() + # first import the whole module, to make sure it's working properly + __import__(qualname) + + + # then import the file for the entry point + file_name, class_name = module.entry_point.split('::') + sub_qualname = f'{qualname}.{file_name}' + + __import__(f'{qualname}.{file_name}', fromlist=[module.entry_point]) + + # finally, get the class instance + instance = getattr(sys.modules[sub_qualname], class_name)() if not getattr(instance, 'name', None): instance.name = module.name @@ -107,7 +118,11 @@ def load_manifest(module_path): manifest = copy.deepcopy(_DEFAULT_MANIFEST) with open(join(module_path, MANIFEST_FILE)) as f: - manifest.update(ast.literal_eval(f.read())) + try: + manifest.update(ast.literal_eval(f.read())) + except ( ValueError, TypeError, SyntaxError, MemoryError, RecursionError) as e: + logger.error(f"Error loading manifest from file {module_path}/{MANIFEST_FILE}: {e}") + return manifest return manifest def get_module(module_name): diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index b17dcec..2c9841e 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -109,7 +109,6 @@ class ArchivingOrchestrator: parser.set_defaults(**to_dot_notation(yaml_config)) - breakpoint() # reload the parser with the new arguments, now that we have them parsed, unknown = parser.parse_known_args(unused_args) @@ -180,7 +179,6 @@ class ArchivingOrchestrator: def setup_logging(self): # setup loguru logging logger.remove() # remove the default logger - logging_config = self.config['logging'] logger.add(sys.stderr, level=logging_config['level']) if log_file := logging_config['file']: diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 6f469c9..f46c13c 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -3,7 +3,6 @@ 'version': '0.1.0', 'author': 'Bellingcat', 'type': ['extractor', 'feeder', 'enricher'], - 'entry_point': 'GenericExtractor', # this class should be present in the __init__.py 'requires_setup': False, 'dependencies': { 'python': ['yt_dlp', 'requests', 'loguru', 'slugify'], diff --git a/tests/test_config.py b/tests/test_config.py index 97793a0..75fe515 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -49,17 +49,21 @@ def test_merge_dicts(): "key1": ["a"], "key2": "old_value", "key3": ["a", "b", "c"], + "key5": "value5", }) dotdict = { "settings.key1": ["b", "c"], "settings.key2": "new_value", "settings.key3": ["b", "c", "d"], + "settings.key4": "value4", } merged = config.merge_dicts(dotdict, yaml_dict) assert merged["settings"]["key1"] == ["a", "b", "c"] assert merged["settings"]["key2"] == "new_value" assert merged["settings"]["key3"] == ["a", "b", "c", "d"] + assert merged["settings"]["key4"] == "value4" + assert merged["settings"]["key5"] == "value5" def test_check_types():