diff --git a/archivers/base_archiver.py b/archivers/base_archiver.py
index 687e762..7ab5a9c 100644
--- a/archivers/base_archiver.py
+++ b/archivers/base_archiver.py
@@ -8,6 +8,8 @@ from urllib.parse import urlparse
import hashlib
import time
import requests
+from loguru import logger
+from selenium.common.exceptions import TimeoutException
from storages import Storage
from utils import mkdir_if_not_exists
@@ -46,7 +48,7 @@ class Archiver(ABC):
return self.get_key(urlparse(url).path.replace("/", "_") + ".html")
def generate_media_page_html(self, url, urls_info: dict, object, thumbnail=None):
- page = f'''
{url}
+ page = f'''{url}
Archived media from {self.name}
'''
@@ -98,6 +100,7 @@ class Archiver(ABC):
uploaded_media.append({'cdn_url': cdn_url, 'key': key, 'hash': hash})
return self.generate_media_page_html(url, uploaded_media, object, thumbnail=thumbnail)
+
def get_key(self, filename):
"""
returns a key in the format "[archiverName]_[filename]" includes extension
@@ -125,8 +128,11 @@ class Archiver(ABC):
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
filename = 'tmp/' + key
- self.driver.get(url)
- time.sleep(6)
+ try:
+ self.driver.get(url)
+ time.sleep(6)
+ except TimeoutException:
+ logger.info("TimeoutException loading page for screenshot")
self.driver.save_screenshot(filename)
self.storage.upload(filename, key, extra_args={
@@ -172,7 +178,7 @@ class Archiver(ABC):
key_thumb = cdn_urls[int(len(cdn_urls) * 0.1)]
- index_page = f'''{filename}
+ index_page = f'''{filename}
'''
for t in cdn_urls:
diff --git a/archivers/telethon_archiver.py b/archivers/telethon_archiver.py
index c332854..88bec58 100644
--- a/archivers/telethon_archiver.py
+++ b/archivers/telethon_archiver.py
@@ -2,7 +2,6 @@ import os
import re
import html
from dataclasses import dataclass
-from urllib.parse import urlparse
from loguru import logger
from storages import Storage
@@ -38,7 +37,7 @@ class TelethonArchiver(Archiver):
posts = self.client.get_messages(chat, ids=search_ids)
media = []
for post in posts:
- if post.grouped_id == original_post.grouped_id and post.media is not None:
+ if post is not None and post.grouped_id == original_post.grouped_id and post.media is not None:
media.append(post)
return media
@@ -76,7 +75,7 @@ class TelethonArchiver(Archiver):
uploaded_media = []
message = post.message
for mp in media_posts:
- if len(mp.message) > message: message = mp.message
+ if len(mp.message) > len(message): message = mp.message
filename = self.client.download_media(mp.media, f'tmp/{chat}_{group_id}/{mp.id}')
key = filename.split('tmp/')[1]
self.storage.upload(filename, key)
diff --git a/auto_archive.py b/auto_archive.py
index 1f00b2f..d3db9a2 100644
--- a/auto_archive.py
+++ b/auto_archive.py
@@ -78,8 +78,11 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
options = webdriver.FirefoxOptions()
options.headless = True
+ options.set_preference('network.protocol-handler.external.tg', False)
+
driver = webdriver.Firefox(options=options)
driver.set_window_size(1400, 2000)
+ driver.set_page_load_timeout(10)
# loop through worksheets to check
for ii, wks in enumerate(sh.worksheets()):
diff --git a/storages/s3_storage.py b/storages/s3_storage.py
index 53bb151..d7c9644 100644
--- a/storages/s3_storage.py
+++ b/storages/s3_storage.py
@@ -20,6 +20,7 @@ class S3Storage(Storage):
self.bucket = config.bucket
self.region = config.region
self.folder = config.folder
+ self.private = config.private
if len(self.folder) and self.folder[-1] != '/':
self.folder += '/'