diff --git a/src/auto_archiver/core/module.py b/src/auto_archiver/core/module.py index 3fdc3ae..168550d 100644 --- a/src/auto_archiver/core/module.py +++ b/src/auto_archiver/core/module.py @@ -277,6 +277,9 @@ class LazyBaseModule: # finally, get the class instance instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)() + # save the instance for future easy loading + self._instance = instance + # set the name, display name and module factory instance.name = self.name instance.display_name = self.display_name @@ -289,8 +292,6 @@ class LazyBaseModule: instance.config_setup(config) instance.setup() - # save the instance for future easy loading - self._instance = instance return instance def __repr__(self): diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index cbd1af5..b637878 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -387,8 +387,10 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_ except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt) and not isinstance(e, SetupError): logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}") - if loaded_module and module_type == "extractor": - loaded_module.cleanup() + + # access the _instance here because loaded_module may not return if there's an error + if lazy_module._instance and module_type == "extractor": + lazy_module._instance.cleanup() raise e if not loaded_module: diff --git a/tests/data/test_modules/example_extractor/example_extractor.py b/tests/data/test_modules/example_extractor/example_extractor.py index 1c63383..ade26e4 100644 --- a/tests/data/test_modules/example_extractor/example_extractor.py +++ b/tests/data/test_modules/example_extractor/example_extractor.py @@ -1,6 +1,11 @@ from auto_archiver.core import Extractor +from loguru import logger + class ExampleExtractor(Extractor): def download(self, item): - print("download") + logger.info("download") + + def cleanup(self): + logger.info("cleanup") diff --git a/tests/data/test_modules/example_module/example_module.py b/tests/data/test_modules/example_module/example_module.py index 392abe0..898df96 100644 --- a/tests/data/test_modules/example_module/example_module.py +++ b/tests/data/test_modules/example_module/example_module.py @@ -1,27 +1,29 @@ from auto_archiver.core import Extractor, Enricher, Feeder, Database, Storage, Formatter, Metadata +from loguru import logger + class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter): def download(self, item): - print("download") + logger.info("download") def __iter__(self): yield Metadata().set_url("https://example.com") def done(self, result): - print("done") + logger.info("done") def enrich(self, to_enrich): - print("enrich") + logger.info("enrich") def get_cdn_url(self, media): return "nice_url" def save(self, item): - print("save") + logger.info("save") def uploadf(self, file, key, **kwargs): - print("uploadf") + logger.info("uploadf") def format(self, item): - print("format") + logger.info("format") diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 3367ce0..86e3125 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -237,3 +237,23 @@ def test_wrong_step_type(test_args, caplog): with pytest.raises(SetupError) as err: orchestrator.setup(args) assert "Module 'example_extractor' is not a feeder" in str(err.value) + + +def test_load_failed_extractor_cleanup(test_args, mocker, caplog): + orchestrator = ArchivingOrchestrator() + + # hack to set up the paths so we can patch properly + orchestrator.module_factory.setup_paths([TEST_MODULES]) + + # patch example_module.setup to throw an exception + mocker.patch( + "auto_archiver.modules.example_extractor.example_extractor.ExampleExtractor.setup", + side_effect=Exception("Test exception"), + ) + + with pytest.raises(Exception): + orchestrator.setup(test_args + ["--extractors", "example_extractor"]) + + assert "Error during setup of modules: Test exception" in caplog.text + # make sure the 'cleanup' is called + assert "cleanup" in caplog.text