|
|
|
@@ -16,6 +16,7 @@ class AAApiDb(Database):
|
|
|
|
# without this STEP.__init__ is not called
|
|
|
|
# without this STEP.__init__ is not called
|
|
|
|
super().__init__(config)
|
|
|
|
super().__init__(config)
|
|
|
|
self.allow_rearchive = bool(self.allow_rearchive)
|
|
|
|
self.allow_rearchive = bool(self.allow_rearchive)
|
|
|
|
|
|
|
|
self.store_results = bool(self.store_results)
|
|
|
|
self.assert_valid_string("api_endpoint")
|
|
|
|
self.assert_valid_string("api_endpoint")
|
|
|
|
self.assert_valid_string("api_secret")
|
|
|
|
self.assert_valid_string("api_secret")
|
|
|
|
|
|
|
|
|
|
|
|
@@ -29,21 +30,22 @@ class AAApiDb(Database):
|
|
|
|
"author_id": {"default": None, "help": "which email to assign as author"},
|
|
|
|
"author_id": {"default": None, "help": "which email to assign as author"},
|
|
|
|
"group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
|
|
|
|
"group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
|
|
|
|
"allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
|
|
|
|
"allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
|
|
|
|
|
|
|
|
"store_results": {"default": True, "help": "when set, will send the results to the API database."},
|
|
|
|
"tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
|
|
|
|
"tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
def fetch(self, item: Metadata) -> Union[Metadata, bool]:
|
|
|
|
def fetch(self, item: Metadata) -> Union[Metadata, bool]:
|
|
|
|
""" query the database for the existence of this item"""
|
|
|
|
""" query the database for the existence of this item"""
|
|
|
|
if not self.allow_rearchive: return
|
|
|
|
if not self.allow_rearchive: return
|
|
|
|
|
|
|
|
|
|
|
|
params = {"url": item.get_url(), "limit": 1}
|
|
|
|
params = {"url": item.get_url(), "limit": 15}
|
|
|
|
headers = {"Authorization": f"Bearer {self.api_token}", "accept": "application/json"}
|
|
|
|
headers = {"Authorization": f"Bearer {self.api_token}", "accept": "application/json"}
|
|
|
|
response = requests.get(os.path.join(self.api_endpoint, "tasks/search-url"), params=params, headers=headers)
|
|
|
|
response = requests.get(os.path.join(self.api_endpoint, "tasks/search-url"), params=params, headers=headers)
|
|
|
|
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
if response.status_code == 200:
|
|
|
|
if len(response.json()):
|
|
|
|
if len(response.json()):
|
|
|
|
logger.success(f"API returned a previously archived instance: {response.json()}")
|
|
|
|
logger.success(f"API returned {len(response.json())} previously archived instance(s)")
|
|
|
|
# TODO: can we do better than just returning the most recent result?
|
|
|
|
fetched_metadata = [Metadata.from_dict(r["result"]) for r in response.json()]
|
|
|
|
return Metadata.from_dict(response.json()[0]["result"])
|
|
|
|
return Metadata.choose_most_complete(fetched_metadata)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
|
|
|
|
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
|
|
|
|
return False
|
|
|
|
return False
|
|
|
|
@@ -51,6 +53,7 @@ class AAApiDb(Database):
|
|
|
|
|
|
|
|
|
|
|
|
def done(self, item: Metadata, cached: bool=False) -> None:
|
|
|
|
def done(self, item: Metadata, cached: bool=False) -> None:
|
|
|
|
"""archival result ready - should be saved to DB"""
|
|
|
|
"""archival result ready - should be saved to DB"""
|
|
|
|
|
|
|
|
if not self.store_results: return
|
|
|
|
if cached:
|
|
|
|
if cached:
|
|
|
|
logger.debug(f"skipping saving archive of {item.get_url()} to the AA API because it was cached")
|
|
|
|
logger.debug(f"skipping saving archive of {item.get_url()} to the AA API because it was cached")
|
|
|
|
return
|
|
|
|
return
|
|
|
|
|