Merge pull request #210 from bellingcat/logger_fix

Fix issue #200 + Refactor _LAZY_LOADED_MODULES
This commit is contained in:
Patrick Robertson
2025-02-19 15:11:42 +00:00
committed by GitHub
15 changed files with 232 additions and 187 deletions

View File

@@ -10,7 +10,7 @@ import hashlib
import pytest
from auto_archiver.core.metadata import Metadata
from auto_archiver.core.module import get_module, _LAZY_LOADED_MODULES
from auto_archiver.core.module import ModuleFactory
# Test names inserted into this list will be run last. This is useful for expensive/costly tests
# that you only want to run if everything else succeeds (e.g. API calls). The order here is important
@@ -22,19 +22,19 @@ TESTS_TO_RUN_LAST = ['test_twitter_api_archiver']
def setup_module(request):
def _setup_module(module_name, config={}):
module_factory = ModuleFactory()
if isinstance(module_name, type):
# get the module name:
# if the class does not have a .name, use the name of the parent folder
module_name = module_name.__module__.rsplit(".",2)[-2]
m = get_module(module_name, {module_name: config})
m = module_factory.get_module(module_name, {module_name: config})
# add the tmp_dir to the module
tmp_dir = TemporaryDirectory()
m.tmp_dir = tmp_dir.name
def cleanup():
_LAZY_LOADED_MODULES.pop(module_name)
tmp_dir.cleanup()
request.addfinalizer(cleanup)

View File

@@ -2,7 +2,7 @@ import pytest
from auto_archiver.modules.hash_enricher import HashEnricher
from auto_archiver.core import Metadata, Media
from auto_archiver.core.module import get_module_lazy
from auto_archiver.core.module import ModuleFactory
@pytest.mark.parametrize("algorithm, filename, expected_hash", [
("SHA-256", "tests/data/testfile_1.txt", "1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"),
@@ -22,7 +22,7 @@ def test_default_config_values(setup_module):
def test_config():
# test default config
c = get_module_lazy('hash_enricher').configs
c = ModuleFactory().get_module_lazy('hash_enricher').configs
assert c["algorithm"]["default"] == "SHA-256"
assert c["chunksize"]["default"] == 16000000
assert c["algorithm"]["choices"] == ["SHA-256", "SHA3-512"]

View File

@@ -1,24 +1,18 @@
import sys
import pytest
from auto_archiver.core.module import get_module_lazy, BaseModule, LazyBaseModule, _LAZY_LOADED_MODULES
from auto_archiver.core.module import ModuleFactory, LazyBaseModule
from auto_archiver.core.base_module import BaseModule
@pytest.fixture
def example_module():
import auto_archiver
module_factory = ModuleFactory()
previous_path = auto_archiver.modules.__path__
auto_archiver.modules.__path__.append("tests/data/test_modules/")
module = get_module_lazy("example_module")
yield module
# cleanup
try:
del module._manifest
except AttributeError:
pass
del _LAZY_LOADED_MODULES["example_module"]
sys.modules.pop("auto_archiver.modules.example_module.example_module", None)
auto_archiver.modules.__path__ = previous_path
return module_factory.get_module_lazy("example_module")
def test_get_module_lazy(example_module):
assert example_module.name == "example_module"
@@ -46,12 +40,14 @@ def test_module_dependency_check_loads_module(example_module):
# monkey patch the manifest to include a nonexistnet dependency
example_module.manifest["dependencies"]["python"] = ["hash_enricher"]
module_factory = example_module.module_factory
loaded_module = example_module.load({})
assert loaded_module is not None
# check the dependency is loaded
assert _LAZY_LOADED_MODULES["hash_enricher"] is not None
assert _LAZY_LOADED_MODULES["hash_enricher"]._instance is not None
assert module_factory._lazy_modules["hash_enricher"] is not None
assert module_factory._lazy_modules["hash_enricher"]._instance is not None
def test_load_module(example_module):
@@ -69,7 +65,7 @@ def test_load_module(example_module):
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
def test_load_modules(module_name):
# test that specific modules can be loaded
module = get_module_lazy(module_name)
module = ModuleFactory().get_module_lazy(module_name)
assert module is not None
assert isinstance(module, LazyBaseModule)
assert module.name == module_name
@@ -86,7 +82,7 @@ def test_load_modules(module_name):
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
def test_lazy_base_module(module_name):
lazy_module = get_module_lazy(module_name)
lazy_module = ModuleFactory().get_module_lazy(module_name)
assert lazy_module is not None
assert isinstance(lazy_module, LazyBaseModule)

View File

@@ -4,7 +4,7 @@ from argparse import ArgumentParser, ArgumentTypeError
from auto_archiver.core.orchestrator import ArchivingOrchestrator
from auto_archiver.version import __version__
from auto_archiver.core.config import read_yaml, store_yaml
from auto_archiver.core.module import _LAZY_LOADED_MODULES
TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
TEST_MODULES = "tests/data/test_modules/"
@@ -17,22 +17,7 @@ def test_args():
@pytest.fixture
def orchestrator():
yield ArchivingOrchestrator()
# hack - the loguru logger starts with one logger, but if orchestrator has run before
# it'll remove the default logger, add it back in:
from loguru import logger
if not logger._core.handlers.get(0):
logger._core.handlers_count = 0
logger.add(sys.stderr)
# and remove the custom logger
if logger._core.handlers.get(1):
logger.remove(1)
# delete out any loaded modules
_LAZY_LOADED_MODULES.clear()
return ArchivingOrchestrator()
@pytest.fixture
def basic_parser(orchestrator) -> ArgumentParser: