From 527438826c65cf5340b1d3560e1f001b77017324 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Thu, 30 Jan 2025 13:04:51 +0000 Subject: [PATCH] Fix manifests for required configs. --- .../modules/api_db/__manifest__.py | 1 - .../modules/atlos_feeder/__manifest__.py | 1 - .../modules/gdrive_storage/__manifest__.py | 3 +- .../instagram_api_extractor/__manifest__.py | 3 +- .../instagram_extractor/__manifest__.py | 4 +- .../modules/vk_extractor/__manifest__.py | 6 +- .../modules/wayback_enricher/__init__.py | 1 - .../modules/wayback_enricher/__manifest__.py | 30 ---------- .../wayback_extractor_enricher/__init__.py | 1 + .../__manifest__.py | 56 +++++++++++++++++++ .../wayback_extractor_enricher.py} | 0 11 files changed, 62 insertions(+), 44 deletions(-) delete mode 100644 src/auto_archiver/modules/wayback_enricher/__init__.py delete mode 100644 src/auto_archiver/modules/wayback_enricher/__manifest__.py create mode 100644 src/auto_archiver/modules/wayback_extractor_enricher/__init__.py create mode 100644 src/auto_archiver/modules/wayback_extractor_enricher/__manifest__.py rename src/auto_archiver/modules/{wayback_enricher/wayback_enricher.py => wayback_extractor_enricher/wayback_extractor_enricher.py} (100%) diff --git a/src/auto_archiver/modules/api_db/__manifest__.py b/src/auto_archiver/modules/api_db/__manifest__.py index 3874496..698c2e4 100644 --- a/src/auto_archiver/modules/api_db/__manifest__.py +++ b/src/auto_archiver/modules/api_db/__manifest__.py @@ -8,7 +8,6 @@ }, "configs": { "api_endpoint": { - "default": None, "required": True, "help": "API endpoint where calls are made to", }, diff --git a/src/auto_archiver/modules/atlos_feeder/__manifest__.py b/src/auto_archiver/modules/atlos_feeder/__manifest__.py index 5ae3540..d59f420 100644 --- a/src/auto_archiver/modules/atlos_feeder/__manifest__.py +++ b/src/auto_archiver/modules/atlos_feeder/__manifest__.py @@ -7,7 +7,6 @@ }, "configs": { "api_token": { - "default": None, "type": "str", "required": True, "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/", diff --git a/src/auto_archiver/modules/gdrive_storage/__manifest__.py b/src/auto_archiver/modules/gdrive_storage/__manifest__.py index 2ca7e27..632e52b 100644 --- a/src/auto_archiver/modules/gdrive_storage/__manifest__.py +++ b/src/auto_archiver/modules/gdrive_storage/__manifest__.py @@ -22,8 +22,7 @@ "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.", "choices": ["random", "static"], }, - "root_folder_id": {"default": None, - # "required": True, + "root_folder_id": {"required": True, "help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'"}, "oauth_token": {"default": None, "help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account."}, diff --git a/src/auto_archiver/modules/instagram_api_extractor/__manifest__.py b/src/auto_archiver/modules/instagram_api_extractor/__manifest__.py index a958a99..2d8f1d9 100644 --- a/src/auto_archiver/modules/instagram_api_extractor/__manifest__.py +++ b/src/auto_archiver/modules/instagram_api_extractor/__manifest__.py @@ -12,8 +12,7 @@ "configs": { "access_token": {"default": None, "help": "a valid instagrapi-api token"}, - "api_endpoint": {"default": None, - # "required": True, + "api_endpoint": {"required": True, "help": "API endpoint to use"}, "full_profile": { "default": False, diff --git a/src/auto_archiver/modules/instagram_extractor/__manifest__.py b/src/auto_archiver/modules/instagram_extractor/__manifest__.py index d8e4a9b..05cae19 100644 --- a/src/auto_archiver/modules/instagram_extractor/__manifest__.py +++ b/src/auto_archiver/modules/instagram_extractor/__manifest__.py @@ -9,11 +9,9 @@ }, "requires_setup": True, "configs": { - "username": {"default": None, - "required": True, + "username": {"required": True, "help": "a valid Instagram username"}, "password": { - "default": None, "required": True, "help": "the corresponding Instagram account password", }, diff --git a/src/auto_archiver/modules/vk_extractor/__manifest__.py b/src/auto_archiver/modules/vk_extractor/__manifest__.py index 033fe50..61e454e 100644 --- a/src/auto_archiver/modules/vk_extractor/__manifest__.py +++ b/src/auto_archiver/modules/vk_extractor/__manifest__.py @@ -7,11 +7,9 @@ "python": ["loguru", "vk_url_scraper"], }, "configs": { - "username": {"default": None, - "required": True, + "username": {"required": True, "help": "valid VKontakte username"}, - "password": {"default": None, - "required": True, + "password": {"required": True, "help": "valid VKontakte password"}, "session_file": { "default": "secrets/vk_config.v2.json", diff --git a/src/auto_archiver/modules/wayback_enricher/__init__.py b/src/auto_archiver/modules/wayback_enricher/__init__.py deleted file mode 100644 index 9782831..0000000 --- a/src/auto_archiver/modules/wayback_enricher/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .wayback_enricher import WaybackExtractorEnricher \ No newline at end of file diff --git a/src/auto_archiver/modules/wayback_enricher/__manifest__.py b/src/auto_archiver/modules/wayback_enricher/__manifest__.py deleted file mode 100644 index 5d1fe25..0000000 --- a/src/auto_archiver/modules/wayback_enricher/__manifest__.py +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Wayback Machine Enricher", - "type": ["enricher", "archiver"], - "requires_setup": True, - "dependencies": { - "python": ["loguru", "requests"], - }, - "entry_point": "wayback_enricher::WaybackExtractorEnricher", - "configs": { - "timeout": {"default": 15, "help": "seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually."}, - "if_not_archived_within": {"default": None, "help": "only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA"}, - "key": {"default": None, "required": True, "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php"}, - "secret": {"default": None, "required": True, "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php"}, - "proxy_http": {"default": None, "help": "http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port"}, - "proxy_https": {"default": None, "help": "https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port"}, - }, - "description": """ - Submits the current URL to the Wayback Machine for archiving and returns either a job ID or the completed archive URL. - - ### Features - - Archives URLs using the Internet Archive's Wayback Machine API. - - Supports conditional archiving based on the existence of prior archives within a specified time range. - - Provides proxies for HTTP and HTTPS requests. - - Fetches and confirms the archive URL or provides a job ID for later status checks. - - ### Notes - - Requires a valid Wayback Machine API key and secret. - - Handles rate-limiting by Wayback Machine and retries status checks with exponential backoff. - """ -} diff --git a/src/auto_archiver/modules/wayback_extractor_enricher/__init__.py b/src/auto_archiver/modules/wayback_extractor_enricher/__init__.py new file mode 100644 index 0000000..b69332d --- /dev/null +++ b/src/auto_archiver/modules/wayback_extractor_enricher/__init__.py @@ -0,0 +1 @@ +from .wayback_extractor_enricher import WaybackExtractorEnricher \ No newline at end of file diff --git a/src/auto_archiver/modules/wayback_extractor_enricher/__manifest__.py b/src/auto_archiver/modules/wayback_extractor_enricher/__manifest__.py new file mode 100644 index 0000000..baecc14 --- /dev/null +++ b/src/auto_archiver/modules/wayback_extractor_enricher/__manifest__.py @@ -0,0 +1,56 @@ +{ + "name": "Wayback Machine Enricher", + "type": ["enricher", "archiver"], + "entry_point": "wayback_extractor_enricher::WaybackExtractorEnricher", + "requires_setup": True, + "dependencies": { + "python": ["loguru", "requests"], + }, + "configs": { + "timeout": { + "default": 15, + "help": "seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually.", + }, + "if_not_archived_within": { + "default": None, + "help": "only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA", + }, + "key": { + "required": True, + "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php", + }, + "secret": { + "required": True, + "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php", + }, + "proxy_http": { + "default": None, + "help": "http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port", + }, + "proxy_https": { + "default": None, + "help": "https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port", + }, + }, + "description": """ + Submits the current URL to the Wayback Machine for archiving and returns either a job ID or the completed archive URL. + + ### Features + - Archives URLs using the Internet Archive's Wayback Machine API. + - Supports conditional archiving based on the existence of prior archives within a specified time range. + - Provides proxies for HTTP and HTTPS requests. + - Fetches and confirms the archive URL or provides a job ID for later status checks. + + ### Notes + - Requires a valid Wayback Machine API key and secret. + - Handles rate-limiting by Wayback Machine and retries status checks with exponential backoff. + + ### Steps to Get an Wayback API Key: + - Sign up for an account at [Internet Archive](https://archive.org/account/signup). + - Log in to your account. + - Navigte to your [account settings](https://archive.org/account). + - or: https://archive.org/developers/tutorial-get-ia-credentials.html + - Under Wayback Machine API Keys, generate a new key. + - Note down your API key and secret, as they will be required for authentication. + """, +} diff --git a/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py b/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py similarity index 100% rename from src/auto_archiver/modules/wayback_enricher/wayback_enricher.py rename to src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py