From 0453d95f569639baa490f6159d739e066bd82002 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 24 Jan 2025 13:24:54 +0000 Subject: [PATCH] fix config parsing in manifests --- src/auto_archiver/modules/api_db/__manifest__.py | 2 +- src/auto_archiver/modules/cli_feeder/__manifest__.py | 2 +- src/auto_archiver/modules/csv_feeder/__manifest__.py | 2 +- src/auto_archiver/modules/csv_feeder/csv_feeder.py | 2 +- src/auto_archiver/modules/gsheet_db/__manifest__.py | 4 ++-- src/auto_archiver/modules/gsheet_feeder/__manifest__.py | 5 +++-- .../modules/telethon_extractor/__manifest__.py | 2 +- .../modules/timestamping_enricher/__manifest__.py | 2 +- .../modules/twitter_api_extractor/__manifest__.py | 2 +- src/auto_archiver/utils/misc.py | 9 ++++++++- 10 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/auto_archiver/modules/api_db/__manifest__.py b/src/auto_archiver/modules/api_db/__manifest__.py index c422b49..4c85541 100644 --- a/src/auto_archiver/modules/api_db/__manifest__.py +++ b/src/auto_archiver/modules/api_db/__manifest__.py @@ -16,7 +16,7 @@ "allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"}, "store_results": {"default": True, "help": "when set, will send the results to the API database."}, "tags": {"default": [], "help": "what tags to add to the archived URL", - "type": lambda val: set(val.split(",")), + "type": "auto_archiver.utils.parse_csv_to_set", } }, "description": """ diff --git a/src/auto_archiver/modules/cli_feeder/__manifest__.py b/src/auto_archiver/modules/cli_feeder/__manifest__.py index 2e2c53e..6f62cd2 100644 --- a/src/auto_archiver/modules/cli_feeder/__manifest__.py +++ b/src/auto_archiver/modules/cli_feeder/__manifest__.py @@ -9,7 +9,7 @@ "urls": { "default": None, "help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml", - "type": lambda val: set(val.split(",")), + "type": "auto_archiver.utils.parse_csv_to_set", }, }, "description": """ diff --git a/src/auto_archiver/modules/csv_feeder/__manifest__.py b/src/auto_archiver/modules/csv_feeder/__manifest__.py index fb644ec..7e84a43 100644 --- a/src/auto_archiver/modules/csv_feeder/__manifest__.py +++ b/src/auto_archiver/modules/csv_feeder/__manifest__.py @@ -11,7 +11,7 @@ "default": None, "help": "Path to the input file(s) to read the URLs from, comma separated. \ Input files should be formatted with one URL per line", - "type": lambda val: set(val.split(",")), + "type": "auto_archiver.utils.parse_csv_to_set", }, "column": { "default": None, diff --git a/src/auto_archiver/modules/csv_feeder/csv_feeder.py b/src/auto_archiver/modules/csv_feeder/csv_feeder.py index a830791..91a2b97 100644 --- a/src/auto_archiver/modules/csv_feeder/csv_feeder.py +++ b/src/auto_archiver/modules/csv_feeder/csv_feeder.py @@ -17,7 +17,7 @@ class CSVFeeder(Feeder): "default": None, "help": "Path to the input file(s) to read the URLs from, comma separated. \ Input files should be formatted with one URL per line", - "type": lambda val: set(val.split(",")), + "type": "auto_archiver.utils.parse_csv_to_set", }, "column": { "default": None, diff --git a/src/auto_archiver/modules/gsheet_db/__manifest__.py b/src/auto_archiver/modules/gsheet_db/__manifest__.py index df7fb6a..2f4f9b4 100644 --- a/src/auto_archiver/modules/gsheet_db/__manifest__.py +++ b/src/auto_archiver/modules/gsheet_db/__manifest__.py @@ -9,12 +9,12 @@ "allow_worksheets": { "default": set(), "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed", - "type": lambda val: set(val.split(",")), + "type": "auto_archiver.utils.parse_csv_to_set", }, "block_worksheets": { "default": set(), "help": "(CSV) explicitly block some worksheets from being processed", - "type": lambda val: set(val.split(",")), + "type": auto_archiver.utils.parse_csv_to_set, }, "use_sheet_names_in_stored_paths": { "default": True, diff --git a/src/auto_archiver/modules/gsheet_feeder/__manifest__.py b/src/auto_archiver/modules/gsheet_feeder/__manifest__.py index c6790ca..cb58035 100644 --- a/src/auto_archiver/modules/gsheet_feeder/__manifest__.py +++ b/src/auto_archiver/modules/gsheet_feeder/__manifest__.py @@ -1,6 +1,7 @@ { "name": "Google Sheets Procesor", "type": ["feeder"], + "entry_point": "gsheet_feeder::GsheetsFeeder", "requires_setup": True, "external_dependencies": { "python": ["loguru", "gspread", "python-slugify"], @@ -9,12 +10,12 @@ "allow_worksheets": { "default": set(), "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed", - "type": lambda val: set(val.split(",")), + "type": "auto_archiver.utils.parse_csv_to_set", }, "block_worksheets": { "default": set(), "help": "(CSV) explicitly block some worksheets from being processed", - "type": lambda val: set(val.split(",")), + "type": "auto_archiver.utils.parse_csv_to_set", }, "use_sheet_names_in_stored_paths": { "default": True, diff --git a/src/auto_archiver/modules/telethon_extractor/__manifest__.py b/src/auto_archiver/modules/telethon_extractor/__manifest__.py index bb49882..5d71fdd 100644 --- a/src/auto_archiver/modules/telethon_extractor/__manifest__.py +++ b/src/auto_archiver/modules/telethon_extractor/__manifest__.py @@ -19,7 +19,7 @@ import json "channel_invites": { "default": {}, "help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup", - "type": lambda x: json.loads(x), + "type": "auto_archiver.utils.json_loader", } }, "description": """ diff --git a/src/auto_archiver/modules/timestamping_enricher/__manifest__.py b/src/auto_archiver/modules/timestamping_enricher/__manifest__.py index b49b61b..904fde6 100644 --- a/src/auto_archiver/modules/timestamping_enricher/__manifest__.py +++ b/src/auto_archiver/modules/timestamping_enricher/__manifest__.py @@ -21,7 +21,7 @@ "http://tss.accv.es:8318/tsa" ], "help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.", - "type": lambda val: set(val.split(",")), + "type": auto_archiver.utils.parse_csv_to_set, } }, "description": """ diff --git a/src/auto_archiver/modules/twitter_api_extractor/__manifest__.py b/src/auto_archiver/modules/twitter_api_extractor/__manifest__.py index 0a314b5..239a0bb 100644 --- a/src/auto_archiver/modules/twitter_api_extractor/__manifest__.py +++ b/src/auto_archiver/modules/twitter_api_extractor/__manifest__.py @@ -12,7 +12,7 @@ "configs": { "bearer_token": {"default": None, "help": "[deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret"}, "bearer_tokens": {"default": [], "help": " a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line", - "type": lambda val: set(val.split(",")),}, + "type": auto_archiver.utils.parse_csv_to_set,}, "consumer_key": {"default": None, "help": "twitter API consumer_key"}, "consumer_secret": {"default": None, "help": "twitter API consumer_secret"}, "access_token": {"default": None, "help": "twitter API access_token"}, diff --git a/src/auto_archiver/utils/misc.py b/src/auto_archiver/utils/misc.py index e312fc6..ad16401 100644 --- a/src/auto_archiver/utils/misc.py +++ b/src/auto_archiver/utils/misc.py @@ -53,4 +53,11 @@ def update_nested_dict(dictionary, update_dict): def random_str(length: int = 32) -> str: assert length <= 32, "length must be less than 32 as UUID4 is used" - return str(uuid.uuid4()).replace("-", "")[:length] \ No newline at end of file + return str(uuid.uuid4()).replace("-", "")[:length] + + +def parse_csv_to_set(cli_val, cur_val): + return set(cli_val.split(",")) + +def json_loader(cli_val): + return json.loads(cli_val)