Files
auto-archiver/src/auto_archiver/core/module.py
2025-01-27 19:03:02 +00:00

199 lines
6.3 KiB
Python

"""
Defines the Step abstract base class, which acts as a blueprint for steps in the archiving pipeline
by handling user configuration, validating the steps properties, and implementing dynamic instantiation.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import List
from abc import ABC
import shutil
import ast
import copy
import sys
from importlib.util import find_spec
import os
from os.path import join, dirname
from loguru import logger
_LAZY_LOADED_MODULES = {}
MODULE_TYPES = [
'feeder',
'extractor',
'enricher',
'database',
'storage',
'formatter'
]
MANIFEST_FILE = "__manifest__.py"
_DEFAULT_MANIFEST = {
'name': '',
'author': 'Bellingcat',
'type': [],
'requires_setup': True,
'description': '',
'dependencies': {},
'entry_point': '',
'version': '1.0',
'configs': {}
}
class BaseModule(ABC):
config: dict
name: str
def setup(self, config: dict):
self.config = config
for key, val in config.get(self.name, {}).items():
setattr(self, key, val)
def get_module(module_name: str, additional_paths: List[str] = []):
if module_name in _LAZY_LOADED_MODULES:
return _LAZY_LOADED_MODULES[module_name]
module = available_modules(additional_paths=additional_paths, limit_to_modules=[module_name])[0]
_LAZY_LOADED_MODULES[module_name] = module
return module
def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= [], additional_paths: List[str] = [], suppress_warnings: bool = False) -> List[LazyBaseModule]:
# search through all valid 'modules' paths. Default is 'modules' in the current directory
# see odoo/modules/module.py -> get_modules
def is_really_module(module_path):
if os.path.isfile(join(module_path, MANIFEST_FILE)):
return True
default_path = [join(dirname(dirname((__file__))), "modules")]
all_modules = []
for module_folder in default_path + additional_paths:
# walk through each module in module_folder and check if it has a valid manifest
try:
possible_modules = os.listdir(module_folder)
except FileNotFoundError:
logger.warning(f"Module folder {module_folder} does not exist")
continue
for possible_module in possible_modules:
if limit_to_modules and possible_module not in limit_to_modules:
continue
possible_module_path = join(module_folder, possible_module)
if not is_really_module(possible_module_path):
continue
all_modules.append(LazyBaseModule(possible_module, possible_module_path))
if not suppress_warnings:
for module in limit_to_modules:
if not any(module == m.name for m in all_modules):
logger.warning(f"Module '{module}' not found. Are you sure it's installed?")
return all_modules
@dataclass
class LazyBaseModule:
name: str
display_name: str
type: list
description: str
path: str
_manifest: dict = None
_instance: BaseModule = None
_entry_point: str = None
def __init__(self, module_name, path):
self.name = module_name
self.path = path
@property
def entry_point(self):
if not self._entry_point and not self.manifest['entry_point']:
# try to create the entry point from the module name
self._entry_point = f"{self.name}::{self.name.replace('_', ' ').title().replace(' ', '')}"
return self._entry_point
@property
def dependencies(self):
return self.manifest['dependencies']
@property
def configs(self):
return self.manifest['configs']
@property
def requires_setup(self):
return self.manifest['requires_setup']
@property
def manifest(self):
if self._manifest:
return self._manifest
# print(f"Loading manifest for module {module_path}")
# load the manifest file
manifest = copy.deepcopy(_DEFAULT_MANIFEST)
with open(join(self.path, MANIFEST_FILE)) as f:
try:
manifest.update(ast.literal_eval(f.read()))
except (ValueError, TypeError, SyntaxError, MemoryError, RecursionError) as e:
logger.error(f"Error loading manifest from file {self.path}/{MANIFEST_FILE}: {e}")
self._manifest = manifest
self.display_name = manifest['name']
self.type = manifest['type']
self._entry_point = manifest['entry_point']
self.description = manifest['description']
return manifest
def load(self):
if self._instance:
return self._instance
# check external dependencies are installed
def check_deps(deps, check):
for dep in deps:
if not check(dep):
logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
exit(1)
check_deps(self.dependencies.get('python', []), lambda dep: find_spec(dep))
check_deps(self.dependencies.get('bin', []), lambda dep: shutil.which(dep))
logger.debug(f"Loading module '{self.display_name}'...")
for qualname in [self.name, f'auto_archiver.modules.{self.name}']:
try:
# first import the whole module, to make sure it's working properly
__import__(qualname)
break
except ImportError:
pass
# then import the file for the entry point
file_name, class_name = self.entry_point.split('::')
sub_qualname = f'{qualname}.{file_name}'
__import__(f'{qualname}.{file_name}', fromlist=[self.entry_point])
# finally, get the class instance
instance = getattr(sys.modules[sub_qualname], class_name)()
if not getattr(instance, 'name', None):
instance.name = self.name
if not getattr(instance, 'display_name', None):
instance.display_name = self.display_name
self._instance = instance
return instance
def __repr__(self):
return f"Module<'{self.display_name}' ({self.name})>"