mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 20:58:29 +03:00
132 lines
4.4 KiB
Python
132 lines
4.4 KiB
Python
"""
|
|
The Config class initializes and parses configurations for all other steps.
|
|
It supports CLI argument parsing, loading from YAML file, and overrides to allow
|
|
flexible setup in various environments.
|
|
|
|
"""
|
|
|
|
import argparse
|
|
from ruamel.yaml import YAML, CommentedMap, add_representer
|
|
|
|
from copy import deepcopy
|
|
from .module import MODULE_TYPES
|
|
|
|
from typing import Any, List, Type
|
|
|
|
# configurable_parents = [
|
|
# Feeder,
|
|
# Enricher,
|
|
# Extractor,
|
|
# Database,
|
|
# Storage,
|
|
# Formatter
|
|
# # Util
|
|
# ]
|
|
# feeder: Feeder
|
|
# formatter: Formatter
|
|
# extractors: List[Extractor] = field(default_factory=[])
|
|
# enrichers: List[Enricher] = field(default_factory=[])
|
|
# storages: List[Storage] = field(default_factory=[])
|
|
# databases: List[Database] = field(default_factory=[])
|
|
|
|
# def __init__(self) -> None:
|
|
# self.defaults = {}
|
|
# self.cli_ops = {}
|
|
# self.config = {}
|
|
|
|
# def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
|
|
# """
|
|
# if yaml_config_filename is provided, the --config argument is ignored,
|
|
# useful for library usage when the config values are preloaded
|
|
# overwrite_configs is a dict that overwrites the yaml file contents
|
|
# """
|
|
# # 1. parse CLI values
|
|
# if use_cli:
|
|
# parser = argparse.ArgumentParser(
|
|
# # prog = "auto-archiver",
|
|
# description="Auto Archiver is a CLI tool to archive media/metadata from online URLs; it can read URLs from many sources (Google Sheets, Command Line, ...); and write results to many destinations too (CSV, Google Sheets, MongoDB, ...)!",
|
|
# epilog="Check the code at https://github.com/bellingcat/auto-archiver"
|
|
# )
|
|
|
|
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
|
|
# parser.add_argument('--version', action='version', version=__version__)
|
|
|
|
EMPTY_CONFIG = CommentedMap(**{
|
|
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
|
|
})
|
|
|
|
def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser:
|
|
dotdict = {}
|
|
|
|
def process_subdict(subdict, prefix=""):
|
|
for key, value in subdict.items():
|
|
if is_dict_type(value):
|
|
process_subdict(value, f"{prefix}{key}.")
|
|
else:
|
|
dotdict[f"{prefix}{key}"] = value
|
|
|
|
process_subdict(yaml_conf)
|
|
return dotdict
|
|
|
|
def from_dot_notation(dotdict: dict) -> dict:
|
|
normal_dict = {}
|
|
|
|
def add_part(key, value, current_dict):
|
|
if "." in key:
|
|
key_parts = key.split(".")
|
|
current_dict.setdefault(key_parts[0], {})
|
|
add_part(".".join(key_parts[1:]), value, current_dict[key_parts[0]])
|
|
else:
|
|
current_dict[key] = value
|
|
|
|
for key, value in dotdict.items():
|
|
add_part(key, value, normal_dict)
|
|
|
|
return normal_dict
|
|
|
|
|
|
def is_list_type(value):
|
|
return isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set)
|
|
|
|
def is_dict_type(value):
|
|
return isinstance(value, dict) or isinstance(value, CommentedMap)
|
|
|
|
def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap:
|
|
yaml_dict: CommentedMap = deepcopy(yaml_dict)
|
|
|
|
# first deal with lists, since 'update' replaces lists from a in b, but we want to extend
|
|
def update_dict(subdict, yaml_subdict):
|
|
for key, value in subdict.items():
|
|
if not yaml_subdict.get(key):
|
|
yaml_subdict[key] = value
|
|
continue
|
|
|
|
if is_dict_type(value):
|
|
update_dict(value, yaml_subdict[key])
|
|
elif is_list_type(value):
|
|
yaml_subdict[key].extend(s for s in value if s not in yaml_subdict[key])
|
|
else:
|
|
yaml_subdict[key] = value
|
|
|
|
update_dict(from_dot_notation(dotdict), yaml_dict)
|
|
|
|
return yaml_dict
|
|
|
|
yaml = YAML()
|
|
|
|
def read_yaml(yaml_filename: str) -> CommentedMap:
|
|
config = None
|
|
try:
|
|
with open(yaml_filename, "r", encoding="utf-8") as inf:
|
|
config = yaml.load(inf)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
if not config:
|
|
config = EMPTY_CONFIG
|
|
|
|
return config
|
|
|
|
def store_yaml(config: CommentedMap, yaml_filename: str):
|
|
with open(yaml_filename, "w", encoding="utf-8") as outf:
|
|
yaml.dump(config, outf) |