fix telethon exception

This commit is contained in:
msramalho
2022-06-27 14:36:58 +02:00
parent 34536e7f14
commit 4b423dfc34
2 changed files with 29 additions and 11 deletions

View File

@@ -93,16 +93,19 @@ class Archiver(ABC):
return mime.split("/")[0]
return ""
# eg images in a tweet save to cloud storage
def download_from_url(self, url, to_filename):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
d = requests.get(url, headers=headers)
with open(to_filename, 'wb') as f:
f.write(d.content)
def generate_media_page(self, urls, url, object):
"""
For a list of media urls, fetch them, upload them
and call self.generate_media_page_html with them
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
thumbnail = None
uploaded_media = []
@@ -110,11 +113,7 @@ class Archiver(ABC):
key = self._get_key_from_url(media_url, ".jpg")
filename = os.path.join(Storage.TMP_FOLDER, key)
d = requests.get(media_url, headers=headers)
with open(filename, 'wb') as f:
f.write(d.content)
self.download_from_url(media_url, filename)
self.storage.upload(filename, key)
hash = self.get_hash(filename)
cdn_url = self.storage.get_cdn_url(key)

View File

@@ -82,19 +82,38 @@ class TelethonArchiver(Archiver):
cdn_url = self.storage.get_cdn_url(key)
return ArchiveResult(status='already archived', cdn_url=cdn_url, title=post.message, timestamp=post.date, screenshot=screenshot)
key_thumb, thumb_index = None, None
group_id = post.grouped_id if post.grouped_id is not None else post.id
uploaded_media = []
message = post.message
for i, mp in enumerate(media_posts):
for mp in media_posts:
if len(mp.message) > len(message): message = mp.message
# media can also be in entities
if mp.entities:
other_media_urls = [e.url for e in mp.entities if hasattr(e, "url") and e.url and self._guess_file_type(e.url) in ["video", "image"]]
logger.debug(f"Got {len(other_media_urls)} other medial urls from {mp.id=}: {other_media_urls}")
for om_url in other_media_urls:
filename = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}_{self._get_key_from_url(om_url)}')
self.download_from_url(om_url, filename)
key = filename.split(Storage.TMP_FOLDER)[1]
self.storage.upload(filename, key)
hash = self.get_hash(filename)
cdn_url = self.storage.get_cdn_url(key)
uploaded_media.append({'cdn_url': cdn_url, 'key': key, 'hash': hash})
filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', str(mp.id))
filename = self.client.download_media(mp.media, filename_dest)
if not filename:
logger.debug(f"Empty media found, skipping {str(mp)=}")
continue
key = filename.split(Storage.TMP_FOLDER)[1]
self.storage.upload(filename, key)
hash = self.get_hash(filename)
cdn_url = self.storage.get_cdn_url(key)
uploaded_media.append({'cdn_url': cdn_url, 'key': key, 'hash': hash})
if i == 0:
if key_thumb is None:
key_thumb, thumb_index = self.get_thumbnails(filename, key)
os.remove(filename)