mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 05:08:28 +03:00
Implementing ruff suggestions.
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
"type": ["feeder"],
|
||||
"entry_point": "cli_feeder::CLIFeeder",
|
||||
"requires_setup": False,
|
||||
"description": "Feeds URLs to orchestrator from the command line",
|
||||
"configs": {
|
||||
"urls": {
|
||||
"default": None,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
{
|
||||
"name": "CSV Feeder",
|
||||
"type": ["feeder"],
|
||||
"requires_setup": False,
|
||||
"dependencies": {"python": ["loguru"], "bin": [""]},
|
||||
"requires_setup": True,
|
||||
"entry_point": "csv_feeder::CSVFeeder",
|
||||
|
||||
@@ -12,7 +12,9 @@
|
||||
"default": None,
|
||||
"help": "the id of the sheet to archive (alternative to 'sheet' config)",
|
||||
},
|
||||
"header": {"default": 1, "type": "int", "help": "index of the header row (starts at 1)", "type": "int"},
|
||||
"header": {"default": 1,
|
||||
"help": "index of the header row (starts at 1)",
|
||||
"type": "int"},
|
||||
"service_account": {
|
||||
"default": "secrets/service_account.json",
|
||||
"help": "service account JSON file path. Learn how to create one: https://gspread.readthedocs.io/en/latest/oauth2.html",
|
||||
@@ -51,19 +53,6 @@
|
||||
"help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
|
||||
"type": "bool",
|
||||
},
|
||||
"allow_worksheets": {
|
||||
"default": set(),
|
||||
"help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
|
||||
},
|
||||
"block_worksheets": {
|
||||
"default": set(),
|
||||
"help": "(CSV) explicitly block some worksheets from being processed",
|
||||
},
|
||||
"use_sheet_names_in_stored_paths": {
|
||||
"default": True,
|
||||
"type": "bool",
|
||||
"help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
|
||||
},
|
||||
},
|
||||
"description": """
|
||||
GsheetsFeederDatabase
|
||||
|
||||
@@ -68,7 +68,7 @@ class GWorksheet:
|
||||
|
||||
if fresh:
|
||||
return self.wks.cell(row, col_index + 1).value
|
||||
if type(row) == int:
|
||||
if isinstance(row, int):
|
||||
row = self.get_row(row)
|
||||
|
||||
if col_index >= len(row):
|
||||
@@ -84,7 +84,7 @@ class GWorksheet:
|
||||
if when_empty_use_default and val.strip() == "":
|
||||
return default
|
||||
return val
|
||||
except:
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
def set_cell(self, row: int, col: str, val):
|
||||
|
||||
@@ -74,9 +74,9 @@ class InstagramAPIExtractor(Extractor):
|
||||
# repeats 3 times to remove nested empty values
|
||||
if not self.minimize_json_output:
|
||||
return d
|
||||
if type(d) == list:
|
||||
if isinstance(d, list):
|
||||
return [self.cleanup_dict(v) for v in d]
|
||||
if type(d) != dict:
|
||||
if not isinstance(d, dict):
|
||||
return d
|
||||
return {
|
||||
k: clean_v
|
||||
@@ -220,7 +220,7 @@ class InstagramAPIExtractor(Extractor):
|
||||
post_count = 0
|
||||
while end_cursor != "":
|
||||
posts = self.call_api("v1/user/medias/chunk", {"user_id": user_id, "end_cursor": end_cursor})
|
||||
if not len(posts) or not type(posts) == list or len(posts) != 2:
|
||||
if not posts or not isinstance(posts, list) or len(posts) != 2:
|
||||
break
|
||||
posts, end_cursor = posts[0], posts[1]
|
||||
logger.info(f"parsing {len(posts)} posts, next {end_cursor=}")
|
||||
@@ -243,7 +243,7 @@ class InstagramAPIExtractor(Extractor):
|
||||
pbar = tqdm(desc="downloading tagged posts")
|
||||
|
||||
tagged_count = 0
|
||||
while next_page_id != None:
|
||||
while next_page_id is not None:
|
||||
resp = self.call_api("v2/user/tag/medias", {"user_id": user_id, "page_id": next_page_id})
|
||||
posts = resp.get("response", {}).get("items", [])
|
||||
if not len(posts):
|
||||
|
||||
@@ -61,7 +61,7 @@ class TelegramExtractor(Extractor):
|
||||
else:
|
||||
duration = float(duration)
|
||||
m_video.set("duration", duration)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
result.add_media(m_video)
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ class TwitterApiExtractor(Extractor):
|
||||
r = requests.get(url, timeout=30)
|
||||
logger.debug(f"Expanded url {url} to {r.url}")
|
||||
url = r.url
|
||||
except:
|
||||
except Exception:
|
||||
logger.error(f"Failed to expand url {url}")
|
||||
return url
|
||||
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
"help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles).",
|
||||
},
|
||||
"docker_commands": {"default": None, "help": "if a custom docker invocation is needed"},
|
||||
"timeout": {"default": 120, "type": "int", "help": "timeout for WACZ generation in seconds", "type": "int"},
|
||||
"timeout": {"default": 120,
|
||||
"help": "timeout for WACZ generation in seconds",
|
||||
"type": "int"},
|
||||
"extract_media": {
|
||||
"default": False,
|
||||
"type": "bool",
|
||||
|
||||
@@ -88,7 +88,7 @@ class WhisperEnricher(Enricher):
|
||||
while not all_completed and (time.time() - start_time) <= self.timeout:
|
||||
all_completed = True
|
||||
for job_id in job_results:
|
||||
if job_results[job_id] != False:
|
||||
if job_results[job_id] is not False:
|
||||
continue
|
||||
all_completed = False # at least one not ready
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user