mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 12:48:28 +03:00
fix telethon exception
This commit is contained in:
@@ -93,16 +93,19 @@ class Archiver(ABC):
|
||||
return mime.split("/")[0]
|
||||
return ""
|
||||
|
||||
# eg images in a tweet save to cloud storage
|
||||
def download_from_url(self, url, to_filename):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
||||
}
|
||||
d = requests.get(url, headers=headers)
|
||||
with open(to_filename, 'wb') as f:
|
||||
f.write(d.content)
|
||||
|
||||
def generate_media_page(self, urls, url, object):
|
||||
"""
|
||||
For a list of media urls, fetch them, upload them
|
||||
and call self.generate_media_page_html with them
|
||||
"""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
||||
}
|
||||
|
||||
thumbnail = None
|
||||
uploaded_media = []
|
||||
@@ -110,11 +113,7 @@ class Archiver(ABC):
|
||||
key = self._get_key_from_url(media_url, ".jpg")
|
||||
|
||||
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||
|
||||
d = requests.get(media_url, headers=headers)
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(d.content)
|
||||
|
||||
self.download_from_url(media_url, filename)
|
||||
self.storage.upload(filename, key)
|
||||
hash = self.get_hash(filename)
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
|
||||
@@ -82,19 +82,38 @@ class TelethonArchiver(Archiver):
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
return ArchiveResult(status='already archived', cdn_url=cdn_url, title=post.message, timestamp=post.date, screenshot=screenshot)
|
||||
|
||||
key_thumb, thumb_index = None, None
|
||||
group_id = post.grouped_id if post.grouped_id is not None else post.id
|
||||
uploaded_media = []
|
||||
message = post.message
|
||||
for i, mp in enumerate(media_posts):
|
||||
for mp in media_posts:
|
||||
if len(mp.message) > len(message): message = mp.message
|
||||
|
||||
# media can also be in entities
|
||||
if mp.entities:
|
||||
other_media_urls = [e.url for e in mp.entities if hasattr(e, "url") and e.url and self._guess_file_type(e.url) in ["video", "image"]]
|
||||
logger.debug(f"Got {len(other_media_urls)} other medial urls from {mp.id=}: {other_media_urls}")
|
||||
for om_url in other_media_urls:
|
||||
filename = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}_{self._get_key_from_url(om_url)}')
|
||||
self.download_from_url(om_url, filename)
|
||||
key = filename.split(Storage.TMP_FOLDER)[1]
|
||||
self.storage.upload(filename, key)
|
||||
hash = self.get_hash(filename)
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
uploaded_media.append({'cdn_url': cdn_url, 'key': key, 'hash': hash})
|
||||
|
||||
filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', str(mp.id))
|
||||
filename = self.client.download_media(mp.media, filename_dest)
|
||||
if not filename:
|
||||
logger.debug(f"Empty media found, skipping {str(mp)=}")
|
||||
continue
|
||||
|
||||
key = filename.split(Storage.TMP_FOLDER)[1]
|
||||
self.storage.upload(filename, key)
|
||||
hash = self.get_hash(filename)
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
uploaded_media.append({'cdn_url': cdn_url, 'key': key, 'hash': hash})
|
||||
if i == 0:
|
||||
if key_thumb is None:
|
||||
key_thumb, thumb_index = self.get_thumbnails(filename, key)
|
||||
os.remove(filename)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user