Implementing ruff suggestions.

This commit is contained in:
erinhmclark
2025-03-10 21:45:30 +00:00
parent ca44a40b88
commit e7fa88f1c7
16 changed files with 36 additions and 48 deletions

View File

@@ -3,7 +3,6 @@
"type": ["feeder"],
"entry_point": "cli_feeder::CLIFeeder",
"requires_setup": False,
"description": "Feeds URLs to orchestrator from the command line",
"configs": {
"urls": {
"default": None,

View File

@@ -1,7 +1,6 @@
{
"name": "CSV Feeder",
"type": ["feeder"],
"requires_setup": False,
"dependencies": {"python": ["loguru"], "bin": [""]},
"requires_setup": True,
"entry_point": "csv_feeder::CSVFeeder",

View File

@@ -12,7 +12,9 @@
"default": None,
"help": "the id of the sheet to archive (alternative to 'sheet' config)",
},
"header": {"default": 1, "type": "int", "help": "index of the header row (starts at 1)", "type": "int"},
"header": {"default": 1,
"help": "index of the header row (starts at 1)",
"type": "int"},
"service_account": {
"default": "secrets/service_account.json",
"help": "service account JSON file path. Learn how to create one: https://gspread.readthedocs.io/en/latest/oauth2.html",
@@ -51,19 +53,6 @@
"help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
"type": "bool",
},
"allow_worksheets": {
"default": set(),
"help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
},
"block_worksheets": {
"default": set(),
"help": "(CSV) explicitly block some worksheets from being processed",
},
"use_sheet_names_in_stored_paths": {
"default": True,
"type": "bool",
"help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
},
},
"description": """
GsheetsFeederDatabase

View File

@@ -68,7 +68,7 @@ class GWorksheet:
if fresh:
return self.wks.cell(row, col_index + 1).value
if type(row) == int:
if isinstance(row, int):
row = self.get_row(row)
if col_index >= len(row):
@@ -84,7 +84,7 @@ class GWorksheet:
if when_empty_use_default and val.strip() == "":
return default
return val
except:
except Exception:
return default
def set_cell(self, row: int, col: str, val):

View File

@@ -74,9 +74,9 @@ class InstagramAPIExtractor(Extractor):
# repeats 3 times to remove nested empty values
if not self.minimize_json_output:
return d
if type(d) == list:
if isinstance(d, list):
return [self.cleanup_dict(v) for v in d]
if type(d) != dict:
if not isinstance(d, dict):
return d
return {
k: clean_v
@@ -220,7 +220,7 @@ class InstagramAPIExtractor(Extractor):
post_count = 0
while end_cursor != "":
posts = self.call_api("v1/user/medias/chunk", {"user_id": user_id, "end_cursor": end_cursor})
if not len(posts) or not type(posts) == list or len(posts) != 2:
if not posts or not isinstance(posts, list) or len(posts) != 2:
break
posts, end_cursor = posts[0], posts[1]
logger.info(f"parsing {len(posts)} posts, next {end_cursor=}")
@@ -243,7 +243,7 @@ class InstagramAPIExtractor(Extractor):
pbar = tqdm(desc="downloading tagged posts")
tagged_count = 0
while next_page_id != None:
while next_page_id is not None:
resp = self.call_api("v2/user/tag/medias", {"user_id": user_id, "page_id": next_page_id})
posts = resp.get("response", {}).get("items", [])
if not len(posts):

View File

@@ -61,7 +61,7 @@ class TelegramExtractor(Extractor):
else:
duration = float(duration)
m_video.set("duration", duration)
except:
except Exception:
pass
result.add_media(m_video)

View File

@@ -46,7 +46,7 @@ class TwitterApiExtractor(Extractor):
r = requests.get(url, timeout=30)
logger.debug(f"Expanded url {url} to {r.url}")
url = r.url
except:
except Exception:
logger.error(f"Failed to expand url {url}")
return url

View File

@@ -14,7 +14,9 @@
"help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles).",
},
"docker_commands": {"default": None, "help": "if a custom docker invocation is needed"},
"timeout": {"default": 120, "type": "int", "help": "timeout for WACZ generation in seconds", "type": "int"},
"timeout": {"default": 120,
"help": "timeout for WACZ generation in seconds",
"type": "int"},
"extract_media": {
"default": False,
"type": "bool",

View File

@@ -88,7 +88,7 @@ class WhisperEnricher(Enricher):
while not all_completed and (time.time() - start_time) <= self.timeout:
all_completed = True
for job_id in job_results:
if job_results[job_id] != False:
if job_results[job_id] is not False:
continue
all_completed = False # at least one not ready
try: