new wayback error action

This commit is contained in:
msramalho
2022-06-08 18:25:58 +02:00
parent bd5146ac3e
commit c8a02cb93a

View File

@@ -40,10 +40,7 @@ class WaybackArchiver(Archiver):
return ArchiveResult(status="Internet archive failed", screenshot=screenshot)
if 'job_id' not in r.json() and 'message' in r.json():
if "please try again" in str(r.json()).lower():
return self.signal_retry_in(screenshot=screenshot)
logger.warning(f"Internet archive failed json \n {r.json()}")
return ArchiveResult(status=f"Internet archive failed: {r.json()['message']}", screenshot=screenshot)
return self.custom_retry(r.json(), screenshot=screenshot)
job_id = r.json()['job_id']
logger.debug(f"GETting status for {job_id=} on {url=}")
@@ -66,10 +63,7 @@ class WaybackArchiver(Archiver):
status_json = status_r.json()
if status_json['status'] != 'success':
logger.info(f'please try again" in str(status_json).lower(): {("please try again" in str(status_json).lower())}')
if "please try again" in str(status_json).lower():
return self.signal_retry_in(screenshot=screenshot)
return ArchiveResult(status='Internet Archive failed: ' + str(status_json), screenshot=screenshot)
return self.custom_retry(status_json, screenshot=screenshot)
archive_url = f"https://web.archive.org/web/{status_json['timestamp']}/{status_json['original_url']}"
return self.if_archived_return_with_screenshot(archive_url)
@@ -87,3 +81,11 @@ class WaybackArchiver(Archiver):
screenshot = screenshot or self.get_screenshot(url)
self.seen_urls[url] = ArchiveResult(status=status, cdn_url=archive_url, title=title, screenshot=screenshot)
return self.seen_urls[url]
def custom_retry(self, json_data, **kwargs):
logger.warning(f"Internet archive failed json \n {json_data}")
if "please try again" in str(json_data).lower():
return self.signal_retry_in(**kwargs)
if "this host has been already captured" in str(json_data).lower():
return self.signal_retry_in(**kwargs, min_seconds=86400, max_seconds=129600) # 24h to 36h later
return ArchiveResult(status=f"Internet archive failed: {json_data}", **kwargs)