Separate logging; limit Telegram archive file size

This commit is contained in:
Logan Williams
2022-04-14 10:43:27 +00:00
parent 214a4d7d19
commit 38e0104078
3 changed files with 12 additions and 3 deletions

5
app.py
View File

@@ -152,7 +152,6 @@ def init_db():
if __name__ == "__main__":
logger.remove()
logger.add(sys.stdout, level="DEBUG", catch=True)
logger.add("logs/cisticola.log", level="TRACE", rotation="100 MB")
parser = argparse.ArgumentParser(description="Cisticola command line tools")
parser.add_argument(
@@ -172,12 +171,16 @@ if __name__ == "__main__":
if args.command == "init-db":
init_db()
elif args.command == "sync-channels":
logger.add("logs/sync-channels.log", level="TRACE", rotation="100 MB")
sync_channels(args)
elif args.command == "scrape-channels":
logger.add("logs/scrape-channels.log", level="TRACE", rotation="100 MB")
scrape_channels(args)
elif args.command == "archive-media":
logger.add("logs/archive-media.log", level="TRACE", rotation="100 MB")
archive_media(args)
elif args.command == "channel-info":
logger.add("logs/channel-info.log", level="TRACE", rotation="100 MB")
scrape_channel_info(args)
else:
logger.error(f"Unrecognized command {args.command}")

View File

@@ -449,7 +449,8 @@ class ScraperController:
handled = False
for scraper in self.scrapers:
if scraper.__version__ == post.scraper:
# compare major versions
if scraper.__version__.split('.')[0] == post.scraper.split('.')[0]:
handled = True
logger.debug(f"{scraper} is archiving media for ID {post.id}")
post = scraper.archive_files(post)

View File

@@ -18,7 +18,7 @@ MEDIA_TYPES = ['photo', 'video', 'document', 'webpage']
class TelegramTelethonScraper(Scraper):
"""An implementation of a Scraper for Telegram, using Telethon library"""
__version__ = "TelegramTelethonScraper 0.0.1"
__version__ = "TelegramTelethonScraper 0.0.2"
def get_username_from_url(self, url):
username = url.split('https://t.me/')[1]
@@ -62,6 +62,7 @@ class TelegramTelethonScraper(Scraper):
result.media_archived = datetime.now(timezone.utc)
else:
logger.warning("Downloaded blob was None")
result.archived_urls = {}
result.media_archived = datetime.now(timezone.utc)
return result
@@ -80,6 +81,10 @@ class TelegramTelethonScraper(Scraper):
return self.archive_post_media(post, client=client)
if type(post.media) == types.MessageMediaDocument:
if post.media.document.size/(1024*1024) > 50:
logger.info(f"Skipping archive of large {type(post.media)} with size {post.media.document.size/(1024*1024)} MB")
return None, None
logger.debug(f"Archiving {type(post.media)} with size {post.media.document.size/(1024*1024)} MB")
else:
logger.debug(f"Archiving {type(post.media)}")