bug fixes

This commit is contained in:
msramalho
2022-06-08 18:17:25 +02:00
parent 9e871b3bbc
commit bd5146ac3e
4 changed files with 15 additions and 11 deletions

View File

@@ -229,14 +229,14 @@ class Archiver(ABC):
return (key_thumb, thumb_index_cdn_url)
def signal_retry_in(self, min_seconds=1800, max_seconds=7200):
def signal_retry_in(self, min_seconds=1800, max_seconds=7200, **kwargs):
"""
sets state to retry in random between (min_seconds, max_seconds)
"""
now = datetime.datetime.now().timestamp()
retry_at = int(now + randrange(min_seconds, max_seconds))
logger.debug(f"signaling {retry_at=}")
return ArchiveResult(status=f'retrying at {retry_at}')
return ArchiveResult(status=f'retrying at {retry_at}', **kwargs)
def is_retry(status):
return re.search(Archiver.retry_regex, status) is not None

View File

@@ -80,7 +80,7 @@ class TelethonArchiver(Archiver):
message = post.message
for mp in media_posts:
if len(mp.message) > len(message): message = mp.message
filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', mp.id)
filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', str(mp.id))
filename = self.client.download_media(mp.media, filename_dest)
key = filename.split(Storage.TMP_FOLDER)[1]
self.storage.upload(filename, key)

View File

@@ -27,6 +27,7 @@ class WaybackArchiver(Archiver):
if req.status_code == 200:
return self.if_archived_return_with_screenshot(url, archive_url, req=req, status='already archived')
screenshot = self.get_screenshot(url)
logger.debug(f"POSTing {url=} to web.archive.org")
ia_headers = {
"Accept": "application/json",
@@ -36,11 +37,13 @@ class WaybackArchiver(Archiver):
if r.status_code != 200:
logger.warning(f"Internet archive failed with status of {r.status_code}")
return ArchiveResult(status="Internet archive failed")
return ArchiveResult(status="Internet archive failed", screenshot=screenshot)
if 'job_id' not in r.json() and 'message' in r.json():
if "please try again" in str(r.json()).lower():
return self.signal_retry_in(screenshot=screenshot)
logger.warning(f"Internet archive failed json \n {r.json()}")
return ArchiveResult(status=f"Internet archive failed: {r.json()['message']}")
return ArchiveResult(status=f"Internet archive failed: {r.json()['message']}", screenshot=screenshot)
job_id = r.json()['job_id']
logger.debug(f"GETting status for {job_id=} on {url=}")
@@ -59,18 +62,19 @@ class WaybackArchiver(Archiver):
retries += 1
if status_r.status_code != 200:
return ArchiveResult(status="Internet archive failed")
return ArchiveResult(status="Internet archive failed", screenshot=screenshot)
status_json = status_r.json()
if status_json['status'] != 'success':
logger.info(f'please try again" in str(status_json).lower(): {("please try again" in str(status_json).lower())}')
if "please try again" in str(status_json).lower():
return self.signal_retry_in()
return ArchiveResult(status='Internet Archive failed: ' + str(status_json))
return self.signal_retry_in(screenshot=screenshot)
return ArchiveResult(status='Internet Archive failed: ' + str(status_json), screenshot=screenshot)
archive_url = f"https://web.archive.org/web/{status_json['timestamp']}/{status_json['original_url']}"
return self.if_archived_return_with_screenshot(archive_url)
def if_archived_return_with_screenshot(self, url, archive_url, req=None, status='success'):
def if_archived_return_with_screenshot(self, url, archive_url, screenshot=None, req=None, status='success'):
try:
if req is None:
req = requests.get(archive_url)
@@ -80,6 +84,6 @@ class WaybackArchiver(Archiver):
title = 'Could not get title'
except:
title = "Could not get title"
screenshot = self.get_screenshot(url)
screenshot = screenshot or self.get_screenshot(url)
self.seen_urls[url] = ArchiveResult(status=status, cdn_url=archive_url, title=title, screenshot=screenshot)
return self.seen_urls[url]

View File

@@ -220,9 +220,9 @@ class Config:
"selenium_config": asdict(self.selenium_config),
"selenium_webdriver": self.webdriver != None,
"s3_config": hasattr(self, "s3_config"),
"s3_private": getattr_or(getattr(self, "s3_config", {}), "private", None),
"gd_config": hasattr(self, "gd_config"),
"local_config": hasattr(self, "local_config"),
"s3_private": getattr_or(getattr(self, "s3_config", {}), "private", None),
"wayback_config": self.wayback_config != None,
"telegram_config": self.telegram_config != None,
"gsheets_client": self.gsheets_client != None,