mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
fix config parsing in manifests
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
"allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
|
||||
"store_results": {"default": True, "help": "when set, will send the results to the API database."},
|
||||
"tags": {"default": [], "help": "what tags to add to the archived URL",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": "auto_archiver.utils.parse_csv_to_set",
|
||||
}
|
||||
},
|
||||
"description": """
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"urls": {
|
||||
"default": None,
|
||||
"help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": "auto_archiver.utils.parse_csv_to_set",
|
||||
},
|
||||
},
|
||||
"description": """
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"default": None,
|
||||
"help": "Path to the input file(s) to read the URLs from, comma separated. \
|
||||
Input files should be formatted with one URL per line",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": "auto_archiver.utils.parse_csv_to_set",
|
||||
},
|
||||
"column": {
|
||||
"default": None,
|
||||
|
||||
@@ -17,7 +17,7 @@ class CSVFeeder(Feeder):
|
||||
"default": None,
|
||||
"help": "Path to the input file(s) to read the URLs from, comma separated. \
|
||||
Input files should be formatted with one URL per line",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": "auto_archiver.utils.parse_csv_to_set",
|
||||
},
|
||||
"column": {
|
||||
"default": None,
|
||||
|
||||
@@ -9,12 +9,12 @@
|
||||
"allow_worksheets": {
|
||||
"default": set(),
|
||||
"help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": "auto_archiver.utils.parse_csv_to_set",
|
||||
},
|
||||
"block_worksheets": {
|
||||
"default": set(),
|
||||
"help": "(CSV) explicitly block some worksheets from being processed",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": auto_archiver.utils.parse_csv_to_set,
|
||||
},
|
||||
"use_sheet_names_in_stored_paths": {
|
||||
"default": True,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"name": "Google Sheets Procesor",
|
||||
"type": ["feeder"],
|
||||
"entry_point": "gsheet_feeder::GsheetsFeeder",
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"python": ["loguru", "gspread", "python-slugify"],
|
||||
@@ -9,12 +10,12 @@
|
||||
"allow_worksheets": {
|
||||
"default": set(),
|
||||
"help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": "auto_archiver.utils.parse_csv_to_set",
|
||||
},
|
||||
"block_worksheets": {
|
||||
"default": set(),
|
||||
"help": "(CSV) explicitly block some worksheets from being processed",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": "auto_archiver.utils.parse_csv_to_set",
|
||||
},
|
||||
"use_sheet_names_in_stored_paths": {
|
||||
"default": True,
|
||||
|
||||
@@ -19,7 +19,7 @@ import json
|
||||
"channel_invites": {
|
||||
"default": {},
|
||||
"help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup",
|
||||
"type": lambda x: json.loads(x),
|
||||
"type": "auto_archiver.utils.json_loader",
|
||||
}
|
||||
},
|
||||
"description": """
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
"http://tss.accv.es:8318/tsa"
|
||||
],
|
||||
"help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.",
|
||||
"type": lambda val: set(val.split(",")),
|
||||
"type": auto_archiver.utils.parse_csv_to_set,
|
||||
}
|
||||
},
|
||||
"description": """
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"configs": {
|
||||
"bearer_token": {"default": None, "help": "[deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret"},
|
||||
"bearer_tokens": {"default": [], "help": " a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line",
|
||||
"type": lambda val: set(val.split(",")),},
|
||||
"type": auto_archiver.utils.parse_csv_to_set,},
|
||||
"consumer_key": {"default": None, "help": "twitter API consumer_key"},
|
||||
"consumer_secret": {"default": None, "help": "twitter API consumer_secret"},
|
||||
"access_token": {"default": None, "help": "twitter API access_token"},
|
||||
|
||||
@@ -53,4 +53,11 @@ def update_nested_dict(dictionary, update_dict):
|
||||
|
||||
def random_str(length: int = 32) -> str:
|
||||
assert length <= 32, "length must be less than 32 as UUID4 is used"
|
||||
return str(uuid.uuid4()).replace("-", "")[:length]
|
||||
return str(uuid.uuid4()).replace("-", "")[:length]
|
||||
|
||||
|
||||
def parse_csv_to_set(cli_val, cur_val):
|
||||
return set(cli_val.split(","))
|
||||
|
||||
def json_loader(cli_val):
|
||||
return json.loads(cli_val)
|
||||
|
||||
Reference in New Issue
Block a user