Implementing ruff suggestions.

2026-06-12 05:08:28 +03:00 · 2025-03-10 21:45:30 +00:00
parent ca44a40b88
commit e7fa88f1c7
16 changed files with 36 additions and 48 deletions
--- a/src/auto_archiver/modules/cli_feeder/manifest.py
+++ b/src/auto_archiver/modules/cli_feeder/manifest.py
@@ -3,7 +3,6 @@
    "type": ["feeder"],
    "entry_point": "cli_feeder::CLIFeeder",
    "requires_setup": False,
-    "description": "Feeds URLs to orchestrator from the command line",
    "configs": {
        "urls": {
            "default": None,
--- a/src/auto_archiver/modules/csv_feeder/manifest.py
+++ b/src/auto_archiver/modules/csv_feeder/manifest.py
@@ -1,7 +1,6 @@
 {
    "name": "CSV Feeder",
    "type": ["feeder"],
-    "requires_setup": False,
    "dependencies": {"python": ["loguru"], "bin": [""]},
    "requires_setup": True,
    "entry_point": "csv_feeder::CSVFeeder",
--- a/src/auto_archiver/modules/gsheet_feeder_db/manifest.py
+++ b/src/auto_archiver/modules/gsheet_feeder_db/manifest.py
@@ -12,7 +12,9 @@
            "default": None,
            "help": "the id of the sheet to archive (alternative to 'sheet' config)",
        },
-        "header": {"default": 1, "type": "int", "help": "index of the header row (starts at 1)", "type": "int"},
+        "header": {"default": 1,
+                   "help": "index of the header row (starts at 1)",
+                   "type": "int"},
        "service_account": {
            "default": "secrets/service_account.json",
            "help": "service account JSON file path. Learn how to create one: https://gspread.readthedocs.io/en/latest/oauth2.html",
@@ -51,19 +53,6 @@
            "help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
            "type": "bool",
        },
-        "allow_worksheets": {
-            "default": set(),
-            "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
-        },
-        "block_worksheets": {
-            "default": set(),
-            "help": "(CSV) explicitly block some worksheets from being processed",
-        },
-        "use_sheet_names_in_stored_paths": {
-            "default": True,
-            "type": "bool",
-            "help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
-        },
    },
    "description": """
    GsheetsFeederDatabase
--- a/src/auto_archiver/modules/gsheet_feeder_db/gworksheet.py
+++ b/src/auto_archiver/modules/gsheet_feeder_db/gworksheet.py
@@ -68,7 +68,7 @@ class GWorksheet:

        if fresh:
            return self.wks.cell(row, col_index + 1).value
-        if type(row) == int:
+        if isinstance(row, int):
            row = self.get_row(row)

        if col_index >= len(row):
@@ -84,7 +84,7 @@ class GWorksheet:
            if when_empty_use_default and val.strip() == "":
                return default
            return val
-        except:
+        except Exception:
            return default

    def set_cell(self, row: int, col: str, val):
--- a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
@@ -74,9 +74,9 @@ class InstagramAPIExtractor(Extractor):
        # repeats 3 times to remove nested empty values
        if not self.minimize_json_output:
            return d
-        if type(d) == list:
+        if isinstance(d, list):
            return [self.cleanup_dict(v) for v in d]
-        if type(d) != dict:
+        if not isinstance(d, dict):
            return d
        return {
            k: clean_v
@@ -220,7 +220,7 @@ class InstagramAPIExtractor(Extractor):
        post_count = 0
        while end_cursor != "":
            posts = self.call_api("v1/user/medias/chunk", {"user_id": user_id, "end_cursor": end_cursor})
-            if not len(posts) or not type(posts) == list or len(posts) != 2:
+            if not posts or not isinstance(posts, list) or len(posts) != 2:
                break
            posts, end_cursor = posts[0], posts[1]
            logger.info(f"parsing {len(posts)} posts, next {end_cursor=}")
@@ -243,7 +243,7 @@ class InstagramAPIExtractor(Extractor):
        pbar = tqdm(desc="downloading tagged posts")

        tagged_count = 0
-        while next_page_id != None:
+        while next_page_id is not None:
            resp = self.call_api("v2/user/tag/medias", {"user_id": user_id, "page_id": next_page_id})
            posts = resp.get("response", {}).get("items", [])
            if not len(posts):
--- a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
+++ b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
@@ -61,7 +61,7 @@ class TelegramExtractor(Extractor):
                else:
                    duration = float(duration)
                m_video.set("duration", duration)
-            except:
+            except Exception:
                pass
            result.add_media(m_video)

--- a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
@@ -46,7 +46,7 @@ class TwitterApiExtractor(Extractor):
                r = requests.get(url, timeout=30)
                logger.debug(f"Expanded url {url} to {r.url}")
                url = r.url
-            except:
+            except Exception:
                logger.error(f"Failed to expand url {url}")
        return url

--- a/src/auto_archiver/modules/wacz_extractor_enricher/manifest.py
+++ b/src/auto_archiver/modules/wacz_extractor_enricher/manifest.py
@@ -14,7 +14,9 @@
            "help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles).",
        },
        "docker_commands": {"default": None, "help": "if a custom docker invocation is needed"},
-        "timeout": {"default": 120, "type": "int", "help": "timeout for WACZ generation in seconds", "type": "int"},
+        "timeout": {"default": 120,
+                    "help": "timeout for WACZ generation in seconds",
+                    "type": "int"},
        "extract_media": {
            "default": False,
            "type": "bool",
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@@ -88,7 +88,7 @@ class WhisperEnricher(Enricher):
        while not all_completed and (time.time() - start_time) <= self.timeout:
            all_completed = True
            for job_id in job_results:
-                if job_results[job_id] != False:
+                if job_results[job_id] is not False:
                    continue
                all_completed = False  # at least one not ready
                try: