adds better debug for wayback failures (#161)

This commit is contained in:
Miguel Sozinho Ramalho
2025-01-06 16:49:11 +00:00
committed by GitHub
parent 928518cda7
commit ef471f41e1

View File

@@ -1,3 +1,4 @@
import json
from loguru import logger
import time, requests
@@ -70,11 +71,16 @@ class WaybackArchiverEnricher(Enricher, Archiver):
return False
# check job status
job_id = r.json().get('job_id')
if not job_id:
logger.error(f"Wayback failed with {r.json()}")
try:
job_id = r.json().get('job_id')
if not job_id:
logger.error(f"Wayback failed with {r.json()}")
return False
except json.decoder.JSONDecodeError as e:
logger.error(f"Expected a JSON with job_id from Wayback and got {r.text}")
return False
# waits at most timeout seconds until job is completed, otherwise only enriches the job_id information
start_time = time.time()
wayback_url = False
@@ -92,6 +98,9 @@ class WaybackArchiverEnricher(Enricher, Archiver):
except requests.exceptions.RequestException as e:
logger.warning(f"RequestException: fetching status for {url=} due to: {e}")
break
except json.decoder.JSONDecodeError as e:
logger.error(f"Expected a JSON from Wayback and got {r.text} for {url=}")
break
except Exception as e:
logger.warning(f"error fetching status for {url=} due to: {e}")
if not wayback_url: