Ruff format with defaults.

This commit is contained in:
erinhmclark
2025-03-10 18:44:54 +00:00
parent cbb0414e5f
commit 85abe1837a
155 changed files with 2539 additions and 1908 deletions

View File

@@ -1 +1 @@
from .csv_feeder import CSVFeeder
from .csv_feeder import CSVFeeder

View File

@@ -2,26 +2,23 @@
"name": "CSV Feeder",
"type": ["feeder"],
"requires_setup": False,
"dependencies": {
"python": ["loguru"],
"bin": [""]
},
'requires_setup': True,
'entry_point': "csv_feeder::CSVFeeder",
"dependencies": {"python": ["loguru"], "bin": [""]},
"requires_setup": True,
"entry_point": "csv_feeder::CSVFeeder",
"configs": {
"files": {
"default": None,
"help": "Path to the input file(s) to read the URLs from, comma separated. \
"files": {
"default": None,
"help": "Path to the input file(s) to read the URLs from, comma separated. \
Input files should be formatted with one URL per line",
"required": True,
"type": "valid_file",
"nargs": "+",
},
"column": {
"default": None,
"help": "Column number or name to read the URLs from, 0-indexed",
}
"required": True,
"type": "valid_file",
"nargs": "+",
},
"column": {
"default": None,
"help": "Column number or name to read the URLs from, 0-indexed",
},
},
"description": """
Reads URLs from CSV files and feeds them into the archiving process.
@@ -33,5 +30,5 @@
### Setup
- Input files should be formatted with one URL per line, with or without a header row.
- If you have a header row, you can specify the column number or name to read URLs from using the 'column' config option.
"""
""",
}

View File

@@ -5,11 +5,10 @@ from auto_archiver.core import Feeder
from auto_archiver.core import Metadata
from auto_archiver.utils import url_or_none
class CSVFeeder(Feeder):
column = None
def __iter__(self) -> Metadata:
for file in self.files:
with open(file, "r") as f:
@@ -20,9 +19,11 @@ class CSVFeeder(Feeder):
try:
url_column = first_row.index(url_column)
except ValueError:
logger.error(f"Column {url_column} not found in header row: {first_row}. Did you set the 'column' config correctly?")
logger.error(
f"Column {url_column} not found in header row: {first_row}. Did you set the 'column' config correctly?"
)
return
elif not(url_or_none(first_row[url_column])):
elif not (url_or_none(first_row[url_column])):
# it's a header row, but we've been given a column number already
logger.debug(f"Skipping header row: {first_row}")
else:
@@ -35,4 +36,4 @@ class CSVFeeder(Feeder):
continue
url = row[url_column]
logger.debug(f"Processing {url}")
yield Metadata().set_url(url)
yield Metadata().set_url(url)