Set up feeder manifests (not merged by source yet)

This commit is contained in:
erinhmclark
2025-01-23 09:16:42 +00:00
parent c517d35bdf
commit 79684f8348
82 changed files with 721 additions and 730 deletions

View File

@@ -0,0 +1,24 @@
{
"name": "CLI Feeder",
"type": ["feeder"],
"requires_setup": False,
"external_dependencies": {
"python": ["loguru"],
},
"configs": {
"urls": {
"default": None,
"help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
"cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
},
},
"description": """
Processes URLs to archive passed via the command line and feeds them into the archiving pipeline.
### Features
- Takes a single URL or a list of URLs provided via the command line.
- Converts each URL into a `Metadata` object and yields it for processing.
- Ensures URLs are processed only if they are explicitly provided.
"""
}

View File

@@ -0,0 +1,32 @@
from loguru import logger
from auto_archiver.feeders import Feeder
from auto_archiver.core import Metadata, ArchivingContext
class CLIFeeder(Feeder):
name = "cli_feeder"
def __init__(self, config: dict) -> None:
# without this STEP.__init__ is not called
super().__init__(config)
if type(self.urls) != list or len(self.urls) == 0:
raise Exception("CLI Feeder did not receive any URL to process")
# @staticmethod
# def configs() -> dict:
# return {
# "urls": {
# "default": None,
# "help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
# "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
# },
# }
def __iter__(self) -> Metadata:
for url in self.urls:
logger.debug(f"Processing {url}")
yield Metadata().set_url(url)
ArchivingContext.set("folder", "cli")
logger.success(f"Processed {len(self.urls)} URL(s)")