mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Set up feeder manifests (not merged by source yet)
This commit is contained in:
0
src/auto_archiver/modules/cli_feeder/__init__.py
Normal file
0
src/auto_archiver/modules/cli_feeder/__init__.py
Normal file
24
src/auto_archiver/modules/cli_feeder/__manifest__.py
Normal file
24
src/auto_archiver/modules/cli_feeder/__manifest__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"name": "CLI Feeder",
|
||||
"type": ["feeder"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"python": ["loguru"],
|
||||
},
|
||||
"configs": {
|
||||
"urls": {
|
||||
"default": None,
|
||||
"help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
|
||||
"cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
|
||||
},
|
||||
},
|
||||
"description": """
|
||||
Processes URLs to archive passed via the command line and feeds them into the archiving pipeline.
|
||||
|
||||
### Features
|
||||
- Takes a single URL or a list of URLs provided via the command line.
|
||||
- Converts each URL into a `Metadata` object and yields it for processing.
|
||||
- Ensures URLs are processed only if they are explicitly provided.
|
||||
|
||||
"""
|
||||
}
|
||||
32
src/auto_archiver/modules/cli_feeder/cli_feeder.py
Normal file
32
src/auto_archiver/modules/cli_feeder/cli_feeder.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.feeders import Feeder
|
||||
from auto_archiver.core import Metadata, ArchivingContext
|
||||
|
||||
|
||||
class CLIFeeder(Feeder):
|
||||
name = "cli_feeder"
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
if type(self.urls) != list or len(self.urls) == 0:
|
||||
raise Exception("CLI Feeder did not receive any URL to process")
|
||||
|
||||
# @staticmethod
|
||||
# def configs() -> dict:
|
||||
# return {
|
||||
# "urls": {
|
||||
# "default": None,
|
||||
# "help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
|
||||
# "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
|
||||
# },
|
||||
# }
|
||||
|
||||
def __iter__(self) -> Metadata:
|
||||
for url in self.urls:
|
||||
logger.debug(f"Processing {url}")
|
||||
yield Metadata().set_url(url)
|
||||
ArchivingContext.set("folder", "cli")
|
||||
|
||||
logger.success(f"Processed {len(self.urls)} URL(s)")
|
||||
Reference in New Issue
Block a user