Fix up dependency checking (use 'dependencies' instead of 'external_dependencies' -> simpler/easier to remember

This commit is contained in:
Patrick Robertson
2025-01-29 19:25:22 +01:00
parent 3d37c494aa
commit 00a7018f36
38 changed files with 81 additions and 49 deletions

View File

@@ -143,6 +143,7 @@ def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= []
if _LAZY_LOADED_MODULES.get(possible_module): if _LAZY_LOADED_MODULES.get(possible_module):
continue continue
lazy_module = LazyBaseModule(possible_module, possible_module_path) lazy_module = LazyBaseModule(possible_module, possible_module_path)
_LAZY_LOADED_MODULES[possible_module] = lazy_module _LAZY_LOADED_MODULES[possible_module] = lazy_module
all_modules.append(lazy_module) all_modules.append(lazy_module)
@@ -229,6 +230,9 @@ class LazyBaseModule:
# check external dependencies are installed # check external dependencies are installed
def check_deps(deps, check): def check_deps(deps, check):
for dep in deps: for dep in deps:
if not len(dep):
# clear out any empty strings that a user may have erroneously added
continue
if not check(dep): if not check(dep):
logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.") logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
exit(1) exit(1)

View File

@@ -3,7 +3,7 @@
"type": ["database"], "type": ["database"],
"entry_point": "api_db:AAApiDb", "entry_point": "api_db:AAApiDb",
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["requests", "python": ["requests",
"loguru"], "loguru"],
}, },

View File

@@ -2,7 +2,7 @@
"name": "atlos_storage", "name": "atlos_storage",
"type": ["storage"], "type": ["storage"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": {"python": ["loguru", "requests"], "bin": [""]}, "dependencies": {"python": ["loguru", "requests"], "bin": [""]},
"configs": { "configs": {
"path_generator": { "path_generator": {
"default": "url", "default": "url",

View File

@@ -3,7 +3,7 @@
"type": ["database"], "type": ["database"],
"entry_point": "atlos_db:AtlosDb", "entry_point": "atlos_db:AtlosDb",
"requires_setup": True, "requires_setup": True,
"external_dependencies": "dependencies":
{"python": ["loguru", {"python": ["loguru",
""], ""],
"bin": [""]}, "bin": [""]},

View File

@@ -2,7 +2,7 @@
"name": "Atlos Feeder", "name": "Atlos Feeder",
"type": ["feeder"], "type": ["feeder"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["loguru", "requests"], "python": ["loguru", "requests"],
}, },
"configs": { "configs": {

View File

@@ -2,7 +2,7 @@
"name": "CLI Feeder", "name": "CLI Feeder",
"type": ["feeder"], "type": ["feeder"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru"], "python": ["loguru"],
}, },
'entry_point': 'cli_feeder::CLIFeeder', 'entry_point': 'cli_feeder::CLIFeeder',

View File

@@ -2,7 +2,7 @@
"name": "Console Database", "name": "Console Database",
"type": ["database"], "type": ["database"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru"], "python": ["loguru"],
}, },
"description": """ "description": """

View File

@@ -2,7 +2,7 @@
"name": "CSV Database", "name": "CSV Database",
"type": ["database"], "type": ["database"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": {"python": ["loguru"] "dependencies": {"python": ["loguru"]
}, },
'entry_point': 'csv_db::CSVDb', 'entry_point': 'csv_db::CSVDb',
"configs": { "configs": {

View File

@@ -2,7 +2,7 @@
"name": "CSV Feeder", "name": "CSV Feeder",
"type": ["feeder"], "type": ["feeder"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru"], "python": ["loguru"],
"bin": [""] "bin": [""]
}, },

View File

@@ -2,7 +2,7 @@
"name": "Google Drive Storage", "name": "Google Drive Storage",
"type": ["storage"], "type": ["storage"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": [ "python": [
"loguru", "loguru",
"google-api-python-client", "google-api-python-client",

View File

@@ -3,7 +3,7 @@
"type": ["database"], "type": ["database"],
"entry_point": "gsheet_db::GsheetsDb", "entry_point": "gsheet_db::GsheetsDb",
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["loguru", "gspread", "python-slugify"], "python": ["loguru", "gspread", "python-slugify"],
}, },
"configs": { "configs": {

View File

@@ -3,7 +3,7 @@
"type": ["feeder"], "type": ["feeder"],
"entry_point": "gsheet_feeder::GsheetsFeeder", "entry_point": "gsheet_feeder::GsheetsFeeder",
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["loguru", "gspread", "python-slugify"], "python": ["loguru", "gspread", "python-slugify"],
}, },
"configs": { "configs": {

View File

@@ -2,7 +2,7 @@
"name": "Hash Enricher", "name": "Hash Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru"], "python": ["loguru"],
}, },
"configs": { "configs": {

View File

@@ -2,8 +2,8 @@
"name": "HTML Formatter", "name": "HTML Formatter",
"type": ["formatter"], "type": ["formatter"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru", "jinja2"], "python": ["hash_enricher", "loguru", "jinja2"],
"bin": [""] "bin": [""]
}, },
"configs": { "configs": {

View File

@@ -53,6 +53,7 @@ class HtmlFormatter(Formatter):
outf.write(content) outf.write(content)
final_media = Media(filename=html_path, _mimetype="text/html") final_media = Media(filename=html_path, _mimetype="text/html")
# get the already instantiated hash_enricher module
he = get_module('hash_enricher', self.config) he = get_module('hash_enricher', self.config)
if len(hd := he.calculate_hash(final_media.filename)): if len(hd := he.calculate_hash(final_media.filename)):
final_media.set("hash", f"{he.algorithm}:{hd}") final_media.set("hash", f"{he.algorithm}:{hd}")

View File

@@ -1,7 +1,7 @@
{ {
"name": "Instagram API Extractor", "name": "Instagram API Extractor",
"type": ["extractor"], "type": ["extractor"],
"external_dependencies": "dependencies":
{"python": ["requests", {"python": ["requests",
"loguru", "loguru",
"retrying", "retrying",

View File

@@ -1,7 +1,7 @@
{ {
"name": "Instagram Extractor", "name": "Instagram Extractor",
"type": ["extractor"], "type": ["extractor"],
"external_dependencies": { "dependencies": {
"python": [ "python": [
"instaloader", "instaloader",
"loguru", "loguru",

View File

@@ -1,7 +1,7 @@
{ {
"name": "Instagram Telegram Bot Extractor", "name": "Instagram Telegram Bot Extractor",
"type": ["extractor"], "type": ["extractor"],
"external_dependencies": {"python": ["loguru", "dependencies": {"python": ["loguru",
"telethon",], "telethon",],
}, },
"requires_setup": True, "requires_setup": True,

View File

@@ -2,7 +2,7 @@
"name": "Local Storage", "name": "Local Storage",
"type": ["storage"], "type": ["storage"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru"], "python": ["loguru"],
}, },
"configs": { "configs": {

View File

@@ -2,7 +2,7 @@
"name": "Archive Metadata Enricher", "name": "Archive Metadata Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru"], "python": ["loguru"],
}, },
"description": """ "description": """

View File

@@ -2,7 +2,7 @@
"name": "Media Metadata Enricher", "name": "Media Metadata Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["loguru"], "python": ["loguru"],
"bin": ["exiftool"] "bin": ["exiftool"]
}, },

View File

@@ -2,7 +2,7 @@
"name": "Mute Formatter", "name": "Mute Formatter",
"type": ["formatter"], "type": ["formatter"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
}, },
"description": """ Default formatter. "description": """ Default formatter.
""", """,

View File

@@ -2,7 +2,7 @@
"name": "PDQ Hash Enricher", "name": "PDQ Hash Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru", "pdqhash", "numpy", "Pillow"], "python": ["loguru", "pdqhash", "numpy", "Pillow"],
}, },
"description": """ "description": """

View File

@@ -2,7 +2,7 @@
"name": "S3 Storage", "name": "S3 Storage",
"type": ["storage"], "type": ["storage"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["boto3", "loguru"], "python": ["boto3", "loguru"],
}, },
"configs": { "configs": {

View File

@@ -2,7 +2,7 @@
"name": "Screenshot Enricher", "name": "Screenshot Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["loguru", "selenium"], "python": ["loguru", "selenium"],
"bin": ["chromedriver"] "bin": ["chromedriver"]
}, },

View File

@@ -2,7 +2,7 @@
"name": "SSL Certificate Enricher", "name": "SSL Certificate Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru", "python-slugify"], "python": ["loguru", "python-slugify"],
}, },
'entry_point': 'ssl_enricher::SSLEnricher', 'entry_point': 'ssl_enricher::SSLEnricher',

View File

@@ -2,7 +2,7 @@
"name": "Telegram Extractor", "name": "Telegram Extractor",
"type": ["extractor"], "type": ["extractor"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": [ "python": [
"requests", "requests",
"bs4", "bs4",

View File

@@ -2,7 +2,7 @@
"name": "telethon_extractor", "name": "telethon_extractor",
"type": ["extractor"], "type": ["extractor"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["telethon", "python": ["telethon",
"loguru", "loguru",
"tqdm", "tqdm",

View File

@@ -2,7 +2,7 @@
"name": "Thumbnail Enricher", "name": "Thumbnail Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": { "dependencies": {
"python": ["loguru", "ffmpeg-python"], "python": ["loguru", "ffmpeg-python"],
"bin": ["ffmpeg"] "bin": ["ffmpeg"]
}, },

View File

@@ -2,7 +2,7 @@
"name": "Timestamping Enricher", "name": "Timestamping Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": [ "python": [
"loguru", "loguru",
"slugify", "slugify",

View File

@@ -2,7 +2,7 @@
"name": "Twitter API Extractor", "name": "Twitter API Extractor",
"type": ["extractor"], "type": ["extractor"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["requests", "python": ["requests",
"loguru", "loguru",
"pytwitter", "pytwitter",

View File

@@ -3,7 +3,7 @@
"type": ["extractor"], "type": ["extractor"],
"requires_setup": True, "requires_setup": True,
"depends": ["core", "utils"], "depends": ["core", "utils"],
"external_dependencies": { "dependencies": {
"python": ["loguru", "python": ["loguru",
"vk_url_scraper"], "vk_url_scraper"],
}, },

View File

@@ -2,7 +2,7 @@
"name": "WACZ Enricher", "name": "WACZ Enricher",
"type": ["enricher", "archiver"], "type": ["enricher", "archiver"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": [ "python": [
"loguru", "loguru",
"jsonlines", "jsonlines",

View File

@@ -2,7 +2,7 @@
"name": "Wayback Machine Enricher", "name": "Wayback Machine Enricher",
"type": ["enricher", "archiver"], "type": ["enricher", "archiver"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["loguru", "requests"], "python": ["loguru", "requests"],
}, },
"entry_point": "wayback_enricher::WaybackExtractorEnricher", "entry_point": "wayback_enricher::WaybackExtractorEnricher",

View File

@@ -2,7 +2,7 @@
"name": "Whisper Enricher", "name": "Whisper Enricher",
"type": ["enricher"], "type": ["enricher"],
"requires_setup": True, "requires_setup": True,
"external_dependencies": { "dependencies": {
"python": ["loguru", "requests"], "python": ["loguru", "requests"],
}, },
"configs": { "configs": {

View File

@@ -2,7 +2,7 @@
"name": "Example Module", "name": "Example Module",
"type": ["extractor"], "type": ["extractor"],
"requires_setup": False, "requires_setup": False,
"external_dependencies": {"python": ["loguru"] "dependencies": {"python": ["loguru"]
}, },
"configs": { "configs": {
"csv_file": {"default": "db.csv", "help": "CSV file name"} "csv_file": {"default": "db.csv", "help": "CSV file name"}

View File

@@ -1,4 +1,4 @@
from auto_archiver.core.extractor import Extractor from auto_archiver.core.extractor import Extractor
class ExampleModule(Extractor): class ExampleModule(Extractor):
pass def download(self, item):
print("do something")

View File

@@ -1,13 +1,24 @@
import sys import sys
import pytest import pytest
from auto_archiver.core.module import get_module_lazy, BaseModule, LazyBaseModule, _LAZY_LOADED_MODULES from auto_archiver.core.module import get_module_lazy, BaseModule, LazyBaseModule, _LAZY_LOADED_MODULES
from auto_archiver.core.extractor import Extractor
@pytest.fixture @pytest.fixture
def example_module(): def example_module():
yield get_module_lazy("example_module", ["tests/data/"]) import auto_archiver
previous_path = auto_archiver.modules.__path__
auto_archiver.modules.__path__.append("tests/data/")
module = get_module_lazy("example_module")
yield module
# cleanup # cleanup
_LAZY_LOADED_MODULES.pop("example_module") try:
del module._manifest
except AttributeError:
pass
del _LAZY_LOADED_MODULES["example_module"]
sys.modules.pop("auto_archiver.modules.example_module.example_module", None)
auto_archiver.modules.__path__ = previous_path
def test_get_module_lazy(example_module): def test_get_module_lazy(example_module):
assert example_module.name == "example_module" assert example_module.name == "example_module"
@@ -15,18 +26,34 @@ def test_get_module_lazy(example_module):
assert example_module.manifest is not None assert example_module.manifest is not None
def test_python_dependency_check(example_module):
# example_module requires loguru, which is not installed
# monkey patch the manifest to include a nonexistnet dependency
example_module.manifest["dependencies"]["python"] = ["does_not_exist"]
def test_load_module_abc_check(example_module): with pytest.raises(SystemExit) as load_error:
# example_module is an extractor but doesn't have the 'download' method, should raise an ABC error
with pytest.raises(TypeError) as load_error:
example_module.load({}) example_module.load({})
assert "Can't instantiate abstract class ExampleModule with abstract method download" in str(load_error.value)
assert load_error.value.code == 1
def test_load_module(example_module, monkeypatch):
# hack - remove the 'download' method from the required methods of Extractor def test_binary_dependency_check(example_module):
monkeypatch.setattr(Extractor, "__abstractmethods__", set()) # example_module requires ffmpeg, which is not installed
# monkey patch the manifest to include a nonexistnet dependency
example_module.manifest["dependencies"]["binary"] = ["does_not_exist"]
def test_module_dependency_check_loads_module(example_module):
# example_module requires cli_feeder, which is not installed
# monkey patch the manifest to include a nonexistnet dependency
example_module.manifest["dependencies"]["python"] = ["hash_enricher"]
loaded_module = example_module.load({})
assert loaded_module is not None
# check the dependency is loaded
assert _LAZY_LOADED_MODULES["hash_enricher"] is not None
assert _LAZY_LOADED_MODULES["hash_enricher"]._instance is not None
def test_load_module(example_module):
# setup the module, and check that config is set to the default values # setup the module, and check that config is set to the default values
loaded_module = example_module.load({}) loaded_module = example_module.load({})