mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 20:58:29 +03:00
Merge main
This commit is contained in:
@@ -1,13 +1,18 @@
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from typing import Mapping, Any
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Mapping, Any, Type, TYPE_CHECKING
|
||||
from abc import ABC
|
||||
from copy import deepcopy, copy
|
||||
from tempfile import TemporaryDirectory
|
||||
from auto_archiver.utils import url as UrlUtil
|
||||
from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES
|
||||
|
||||
from loguru import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .module import ModuleFactory
|
||||
|
||||
class BaseModule(ABC):
|
||||
|
||||
"""
|
||||
@@ -17,41 +22,24 @@ class BaseModule(ABC):
|
||||
however modules can have a .setup() method to run any setup code
|
||||
(e.g. logging in to a site, spinning up a browser etc.)
|
||||
|
||||
See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
|
||||
See consts.MODULE_TYPES for the types of modules you can create, noting that
|
||||
a subclass can be of multiple types. For example, a module that extracts data from
|
||||
a website and stores it in a database would be both an 'extractor' and a 'database' module.
|
||||
|
||||
Each module is a python package, and should have a __manifest__.py file in the
|
||||
same directory as the module file. The __manifest__.py specifies the module information
|
||||
like name, author, version, dependencies etc. See BaseModule._DEFAULT_MANIFEST for the
|
||||
like name, author, version, dependencies etc. See DEFAULT_MANIFEST for the
|
||||
default manifest structure.
|
||||
|
||||
"""
|
||||
|
||||
MODULE_TYPES = [
|
||||
'feeder',
|
||||
'extractor',
|
||||
'enricher',
|
||||
'database',
|
||||
'storage',
|
||||
'formatter'
|
||||
]
|
||||
|
||||
_DEFAULT_MANIFEST = {
|
||||
'name': '', # the display name of the module
|
||||
'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
|
||||
'type': [], # the type of the module, can be one or more of BaseModule.MODULE_TYPES
|
||||
'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
|
||||
'description': '', # a description of the module
|
||||
'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
|
||||
'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
|
||||
'version': '1.0', # the version of the module
|
||||
'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
|
||||
}
|
||||
MODULE_TYPES = CONF_MODULE_TYPES
|
||||
|
||||
# NOTE: these here are declard as class variables, but they are overridden by the instance variables in the __init__ method
|
||||
config: Mapping[str, Any]
|
||||
authentication: Mapping[str, Mapping[str, str]]
|
||||
name: str
|
||||
module_factory: ModuleFactory
|
||||
|
||||
# this is set by the orchestrator prior to archiving
|
||||
tmp_dir: TemporaryDirectory = None
|
||||
|
||||
Reference in New Issue
Block a user