mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
Separate logging; limit Telegram archive file size
This commit is contained in:
5
app.py
5
app.py
@@ -152,7 +152,6 @@ def init_db():
|
||||
if __name__ == "__main__":
|
||||
logger.remove()
|
||||
logger.add(sys.stdout, level="DEBUG", catch=True)
|
||||
logger.add("logs/cisticola.log", level="TRACE", rotation="100 MB")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Cisticola command line tools")
|
||||
parser.add_argument(
|
||||
@@ -172,12 +171,16 @@ if __name__ == "__main__":
|
||||
if args.command == "init-db":
|
||||
init_db()
|
||||
elif args.command == "sync-channels":
|
||||
logger.add("logs/sync-channels.log", level="TRACE", rotation="100 MB")
|
||||
sync_channels(args)
|
||||
elif args.command == "scrape-channels":
|
||||
logger.add("logs/scrape-channels.log", level="TRACE", rotation="100 MB")
|
||||
scrape_channels(args)
|
||||
elif args.command == "archive-media":
|
||||
logger.add("logs/archive-media.log", level="TRACE", rotation="100 MB")
|
||||
archive_media(args)
|
||||
elif args.command == "channel-info":
|
||||
logger.add("logs/channel-info.log", level="TRACE", rotation="100 MB")
|
||||
scrape_channel_info(args)
|
||||
else:
|
||||
logger.error(f"Unrecognized command {args.command}")
|
||||
|
||||
@@ -449,7 +449,8 @@ class ScraperController:
|
||||
handled = False
|
||||
|
||||
for scraper in self.scrapers:
|
||||
if scraper.__version__ == post.scraper:
|
||||
# compare major versions
|
||||
if scraper.__version__.split('.')[0] == post.scraper.split('.')[0]:
|
||||
handled = True
|
||||
logger.debug(f"{scraper} is archiving media for ID {post.id}")
|
||||
post = scraper.archive_files(post)
|
||||
|
||||
@@ -18,7 +18,7 @@ MEDIA_TYPES = ['photo', 'video', 'document', 'webpage']
|
||||
|
||||
class TelegramTelethonScraper(Scraper):
|
||||
"""An implementation of a Scraper for Telegram, using Telethon library"""
|
||||
__version__ = "TelegramTelethonScraper 0.0.1"
|
||||
__version__ = "TelegramTelethonScraper 0.0.2"
|
||||
|
||||
def get_username_from_url(self, url):
|
||||
username = url.split('https://t.me/')[1]
|
||||
@@ -62,6 +62,7 @@ class TelegramTelethonScraper(Scraper):
|
||||
result.media_archived = datetime.now(timezone.utc)
|
||||
else:
|
||||
logger.warning("Downloaded blob was None")
|
||||
result.archived_urls = {}
|
||||
result.media_archived = datetime.now(timezone.utc)
|
||||
|
||||
return result
|
||||
@@ -80,6 +81,10 @@ class TelegramTelethonScraper(Scraper):
|
||||
return self.archive_post_media(post, client=client)
|
||||
|
||||
if type(post.media) == types.MessageMediaDocument:
|
||||
if post.media.document.size/(1024*1024) > 50:
|
||||
logger.info(f"Skipping archive of large {type(post.media)} with size {post.media.document.size/(1024*1024)} MB")
|
||||
return None, None
|
||||
|
||||
logger.debug(f"Archiving {type(post.media)} with size {post.media.document.size/(1024*1024)} MB")
|
||||
else:
|
||||
logger.debug(f"Archiving {type(post.media)}")
|
||||
|
||||
Reference in New Issue
Block a user