mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 20:58:29 +03:00
adds better debug for wayback failures (#161)
This commit is contained in:
committed by
GitHub
parent
928518cda7
commit
ef471f41e1
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
from loguru import logger
|
||||
import time, requests
|
||||
|
||||
@@ -70,11 +71,16 @@ class WaybackArchiverEnricher(Enricher, Archiver):
|
||||
return False
|
||||
|
||||
# check job status
|
||||
job_id = r.json().get('job_id')
|
||||
if not job_id:
|
||||
logger.error(f"Wayback failed with {r.json()}")
|
||||
try:
|
||||
job_id = r.json().get('job_id')
|
||||
if not job_id:
|
||||
logger.error(f"Wayback failed with {r.json()}")
|
||||
return False
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
logger.error(f"Expected a JSON with job_id from Wayback and got {r.text}")
|
||||
return False
|
||||
|
||||
|
||||
# waits at most timeout seconds until job is completed, otherwise only enriches the job_id information
|
||||
start_time = time.time()
|
||||
wayback_url = False
|
||||
@@ -92,6 +98,9 @@ class WaybackArchiverEnricher(Enricher, Archiver):
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"RequestException: fetching status for {url=} due to: {e}")
|
||||
break
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
logger.error(f"Expected a JSON from Wayback and got {r.text} for {url=}")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"error fetching status for {url=} due to: {e}")
|
||||
if not wayback_url:
|
||||
|
||||
Reference in New Issue
Block a user