More manifests, base modules and rename from archiver to extractor.

This commit is contained in:
erinhmclark
2025-01-23 16:40:48 +00:00
parent 9db26cdfc2
commit 1274a1b231
93 changed files with 378 additions and 238 deletions

View File

@@ -8,9 +8,4 @@ from .context import ArchivingContext
# cannot import ArchivingOrchestrator/Config to avoid circular dep
# from .orchestrator import ArchivingOrchestrator
# from .config import Config
from .media import Media
from .step import Step
from .context import ArchivingContext
from .metadata import Metadata
# from .config import Config

View File

@@ -15,7 +15,7 @@ from .loader import MODULE_TYPES
# configurable_parents = [
# Feeder,
# Enricher,
# Archiver,
# Extractor,
# Database,
# Storage,
# Formatter
@@ -23,7 +23,7 @@ from .loader import MODULE_TYPES
# ]
# feeder: Feeder
# formatter: Formatter
# archivers: List[Archiver] = field(default_factory=[])
# extractors: List[Extractor] = field(default_factory=[])
# enrichers: List[Enricher] = field(default_factory=[])
# storages: List[Storage] = field(default_factory=[])
# databases: List[Database] = field(default_factory=[])

View File

@@ -33,7 +33,7 @@ class ArchivingOrchestrator:
# self.feeder: Feeder = config.feeder
# self.formatter: Formatter = config.formatter
# self.enrichers: List[Enricher] = config.enrichers
# self.archivers: List[Archiver] = config.archivers
# self.extractors: List[Extractor] = config.extractors
# self.databases: List[Database] = config.databases
# self.storages: List[Storage] = config.storages
# ArchivingContext.set("storages", self.storages, keep_on_reset=True)
@@ -80,7 +80,7 @@ class ArchivingOrchestrator:
for module_type in MODULE_TYPES:
enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
# add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter'
# add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'extractors', 'databases', 'storages', 'formatter'
for module_type in MODULE_TYPES:
if modules := getattr(basic_config, f"{module_type}s", []):
enabled_modules.extend(modules)
@@ -98,7 +98,7 @@ class ArchivingOrchestrator:
self.add_module_args(available_modules(with_manifest=True), parser)
breakpoint()
# breakpoint()
parser.set_defaults(**to_dot_notation(yaml_config))
# reload the parser with the new arguments, now that we have them
@@ -165,7 +165,8 @@ class ArchivingOrchestrator:
for module_type in MODULE_TYPES:
if module_type == 'enricher':
breakpoint()
pass
# breakpoint()
step_items = []
modules_to_load = self.config['steps'][f"{module_type}s"]
@@ -228,7 +229,7 @@ class ArchivingOrchestrator:
def cleanup(self)->None:
logger.info("Cleaning up")
for e in self.config['steps']['extractors']:
breakpoint()
# breakpoint()
e.cleanup()
def feed(self) -> Generator[Metadata]: