From 63633617d2daf348809317e549389fe135b9c24e Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sat, 2 Apr 2022 18:34:14 +0000 Subject: [PATCH 01/10] Configure with Telethon and VK only --- app.py | 17 +++++------------ cisticola/base.py | 6 +++--- cisticola/scraper/base.py | 7 +++---- cisticola/scraper/telegram_telethon.py | 4 ++-- cisticola/scraper/vkontakte.py | 8 ++++---- 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/app.py b/app.py index 570bf71..9b87873 100644 --- a/app.py +++ b/app.py @@ -6,19 +6,12 @@ from sqlalchemy.orm import sessionmaker import os import time import sys -import telethon.errors.rpcerrorlist -from cisticola.base import Channel, RawChannelInfo, mapper_registry +from cisticola.base import Channel, mapper_registry from cisticola.scraper import ( ScraperController, - BitchuteScraper, - GabScraper, - GettrScraper, - OdyseeScraper, - RumbleScraper, - TelegramSnscrapeScraper, - TelegramTelethonScraper, - TwitterScraper) + VkontakteScraper, + TelegramTelethonScraper) def sync_channels(args): logger.info("Synchronizing channels") @@ -52,7 +45,7 @@ def sync_channels(args): if c['platform_id'] != '': platform_id = c['platform_id'] - channel = session.query(Channel).filter_by(platform_id=platform_id, platform=c['platform'], url=c['url']).first() + channel = session.query(Channel).filter_by(platform_id=str(platform_id), platform=c['platform'], url=c['url']).first() if not channel: channel = Channel(**c, source='researcher') @@ -85,7 +78,7 @@ def get_scraper_controller(): scrapers = [ TelegramTelethonScraper(), - TwitterScraper()] + VkontakteScraper()] controller.register_scrapers(scrapers) diff --git a/cisticola/base.py b/cisticola/base.py index 37c897b..b53ed35 100644 --- a/cisticola/base.py +++ b/cisticola/base.py @@ -42,8 +42,8 @@ class ScraperResult: #: Dict in which the keys are the original media URLs from the post, and the corresponding values are the URLs of the archived media files. archived_urls: dict - #: Has the media in this post been archived? - media_archived: bool + #: What date was the media archived? (None if not archived) + media_archived: datetime @dataclass class Channel: @@ -252,7 +252,7 @@ raw_posts_table = Table('raw_posts', mapper_registry.metadata, Column('raw_posts', String), Column('date_archived', DateTime), Column('archived_urls', JSON), - Column('media_archived', Boolean)) + Column('media_archived', DateTime)) raw_channel_info_table = Table('raw_channel_info', mapper_registry.metadata, Column('id', Integer, primary_key=True), diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 0762c16..f246605 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -9,6 +9,7 @@ from loguru import logger import ffmpeg from sqlalchemy.orm import sessionmaker import yt_dlp +from sqlalchemy.sql.expression import func from cisticola.base import Channel, ScraperResult, mapper_registry from cisticola.utils import make_request @@ -397,11 +398,9 @@ class ScraperController: for post in posts: session.add(post) + session.commit() added += 1 - if added > 100: - break - session.commit() logger.info( f"{scraper} found {added} new posts from {channel}") @@ -418,7 +417,7 @@ class ScraperController: session = self.session() - posts = session.query(ScraperResult).where(ScraperResult.media_archived == False).all() + posts = session.query(ScraperResult).where(ScraperResult.media_archived == False).order_by(func.random()).all() logger.info(f"Found {len(posts)} posts without media. Archiving now") diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py index c27f8c8..6c7eb16 100644 --- a/cisticola/scraper/telegram_telethon.py +++ b/cisticola/scraper/telegram_telethon.py @@ -62,7 +62,7 @@ class TelegramTelethonScraper(Scraper): else: logger.warning("Downloaded blob was None") - result.media_archived = True + result.media_archived = datetime.now(timezone.utc) return result def archive_post_media(self, post : types.Message, client : TelegramClient = None): @@ -146,7 +146,7 @@ class TelegramTelethonScraper(Scraper): date_archived=datetime.now(timezone.utc), raw_posts=json.dumps(post.to_dict(), default=str), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def get_profile(self, channel: Channel) -> RawChannelInfo: username = channel.screenname diff --git a/cisticola/scraper/vkontakte.py b/cisticola/scraper/vkontakte.py index f36ac12..7b735da 100644 --- a/cisticola/scraper/vkontakte.py +++ b/cisticola/scraper/vkontakte.py @@ -64,14 +64,14 @@ class VkontakteScraper(Scraper): yield ScraperResult( scraper=self.__version__, - platform="Vkontatke", + platform="VK", channel=channel.id, platform_id=post.url.split('/')[-1], date=datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc), date_archived=datetime.now(timezone.utc), raw_posts=post.json(), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def archive_files(self, result: ScraperResult) -> ScraperResult: for url in result.archived_urls: @@ -84,12 +84,12 @@ class VkontakteScraper(Scraper): archived_url = self.archive_blob(media_blob, content_type, key) result.archived_urls[url] = archived_url - result.media_archived = True + result.media_archived = datetime.now(timezone.utc) return result def can_handle(self, channel): - if channel.platform == "Vkontakte" and channel.platform_id: + if channel.platform == "VK": return True def url_to_key(self, url: str, content_type: str) -> str: From 01bbabe0cb112cf8cc3fd4821271331f92f94a60 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sat, 2 Apr 2022 18:45:08 +0000 Subject: [PATCH 02/10] Fix issues with new datetime baed 'media_archived' column --- cisticola/scraper/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index f246605..8fa212b 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -3,6 +3,7 @@ import os from io import BytesIO from urllib.parse import urlparse import tempfile +from datetime import datetime, timezone import boto3 from loguru import logger @@ -417,7 +418,7 @@ class ScraperController: session = self.session() - posts = session.query(ScraperResult).where(ScraperResult.media_archived == False).order_by(func.random()).all() + posts = session.query(ScraperResult).where(ScraperResult.media_archived == None).order_by(func.random()).all() logger.info(f"Found {len(posts)} posts without media. Archiving now") @@ -431,7 +432,7 @@ class ScraperController: post = scraper.archive_files(post) if post: - session.query(ScraperResult).where(ScraperResult.id == post.id).update({'archived_urls': post.archived_urls, 'media_archived': True}) + session.query(ScraperResult).where(ScraperResult.id == post.id).update({'archived_urls': post.archived_urls, 'media_archived': datetime.now(timezone.utc)}) session.commit() break From a82ec15f0ec28a74e6c09043d916e314152cfa30 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 12:02:27 +0200 Subject: [PATCH 03/10] Change archived_media to be timestamp for all scrapers --- app.py | 11 +++++++++-- cisticola/base.py | 4 ++-- cisticola/scraper/base.py | 5 ++++- cisticola/scraper/bitchute.py | 4 ++-- cisticola/scraper/gab.py | 4 ++-- cisticola/scraper/gettr.py | 4 ++-- cisticola/scraper/instagram.py | 8 ++++---- cisticola/scraper/odysee.py | 10 +++++----- cisticola/scraper/rumble.py | 6 +++--- cisticola/scraper/telegram_snscrape.py | 4 ++-- cisticola/scraper/telegram_telethon.py | 4 ++-- cisticola/scraper/twitter.py | 4 ++-- cisticola/scraper/vkontakte.py | 2 +- cisticola/scraper/youtube.py | 6 +++--- cisticola/transformer/bitchute.py | 4 ++-- cisticola/transformer/twitter.py | 2 +- 16 files changed, 46 insertions(+), 36 deletions(-) diff --git a/app.py b/app.py index 9b87873..d5ff286 100644 --- a/app.py +++ b/app.py @@ -11,7 +11,10 @@ from cisticola.base import Channel, mapper_registry from cisticola.scraper import ( ScraperController, VkontakteScraper, - TelegramTelethonScraper) + TelegramTelethonScraper, + GettrScraper, + OdyseeScraper, + RumbleScraper) def sync_channels(args): logger.info("Synchronizing channels") @@ -78,7 +81,11 @@ def get_scraper_controller(): scrapers = [ TelegramTelethonScraper(), - VkontakteScraper()] + VkontakteScraper(), + GettrScraper(), + OdyseeScraper(), + RumbleScraper() + ] controller.register_scrapers(scrapers) diff --git a/cisticola/base.py b/cisticola/base.py index b53ed35..bcc4618 100644 --- a/cisticola/base.py +++ b/cisticola/base.py @@ -34,7 +34,7 @@ class ScraperResult: date: datetime #: JSON dump of dict that contains all data scraped for the post. - raw_posts: str + raw_data: str #: Datetime (relative to UTC) that the scraped post was archived at. date_archived: datetime @@ -249,7 +249,7 @@ raw_posts_table = Table('raw_posts', mapper_registry.metadata, Column('channel', Integer, ForeignKey('channels.id')), Column('platform_id', String), Column('date', DateTime), - Column('raw_posts', String), + Column('raw_data', String), Column('date_archived', DateTime), Column('archived_urls', JSON), Column('media_archived', DateTime)) diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 8fa212b..51c5f8e 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -257,7 +257,7 @@ class Scraper: archived_url = self.archive_blob(media_blob, content_type, key) result.archived_urls[url] = archived_url - result.media_archived = True + result.media_archived = datetime.now(timezone.utc) return result def can_handle(self, channel: Channel) -> bool: @@ -402,6 +402,9 @@ class ScraperController: session.commit() added += 1 + if added >= 200: + break + session.commit() logger.info( f"{scraper} found {added} new posts from {channel}") diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index 5afa4e1..fda24ff 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -62,9 +62,9 @@ class BitchuteScraper(Scraper): platform_id=post['id'], date=datetime.fromtimestamp(post['timestamp']), date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(post), + raw_data=json.dumps(post), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def can_handle(self, channel): if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None: diff --git a/cisticola/scraper/gab.py b/cisticola/scraper/gab.py index ab1cdf3..2f25655 100644 --- a/cisticola/scraper/gab.py +++ b/cisticola/scraper/gab.py @@ -81,9 +81,9 @@ class GabScraper(Scraper): platform_id=post['id'], date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc), date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(post), + raw_data=json.dumps(post), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def can_handle(self, channel: Channel) -> bool: if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None: diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py index f785771..6f28a75 100644 --- a/cisticola/scraper/gettr.py +++ b/cisticola/scraper/gettr.py @@ -59,9 +59,9 @@ class GettrScraper(Scraper): platform_id=post['_id'], date=datetime.fromtimestamp(post['cdate']/1000.), date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(post), + raw_data=json.dumps(post), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def can_handle(self, channel): if channel.platform == "Gettr" and self.get_username_from_url(channel.url) is not None: diff --git a/cisticola/scraper/instagram.py b/cisticola/scraper/instagram.py index 4dbc205..1f25cbb 100644 --- a/cisticola/scraper/instagram.py +++ b/cisticola/scraper/instagram.py @@ -66,9 +66,9 @@ class InstagramScraper(Scraper): platform_id=post.mediaid, date=post.date_utc, date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(post._asdict(), default=str), + raw_data=json.dumps(post._asdict(), default=str), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) for comment in post.get_comments(): @@ -83,9 +83,9 @@ class InstagramScraper(Scraper): platform_id=post.mediaid, date=comment.created_at_utc, date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(comment_dict, default=str), + raw_data=json.dumps(comment_dict, default=str), archived_urls={}, - media_archived=True) + media_archived=datetime.now(timezone.utc)) def can_handle(self, channel): if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None: diff --git a/cisticola/scraper/odysee.py b/cisticola/scraper/odysee.py index 0f7a3fe..25788fe 100644 --- a/cisticola/scraper/odysee.py +++ b/cisticola/scraper/odysee.py @@ -62,9 +62,9 @@ class OdyseeScraper(Scraper): platform_id=video.info['claim_id'], date=datetime.fromtimestamp(video.info['created']), date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(video.info), + raw_data=json.dumps(video.info), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) for comment in all_comments: @@ -75,9 +75,9 @@ class OdyseeScraper(Scraper): platform_id=comment.info['claim_id'], date=datetime.fromtimestamp(comment.info['created']), date_archived=datetime.now(), - raw_posts=json.dumps(comment.info), + raw_data=json.dumps(comment.info), archived_urls={}, - media_archived=True) + media_archived=datetime.now(timezone.utc)) def archive_files(self, result: ScraperResult) -> ScraperResult: for url in result.archived_urls: @@ -91,7 +91,7 @@ class OdyseeScraper(Scraper): archived_url = self.archive_blob(media_blob, content_type, key) result.archived_urls[url] = archived_url - result.media_archived = True + result.media_archived = datetime.now(timezone.utc) return result def can_handle(self, channel): diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index 37f1b53..c75c947 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -41,9 +41,9 @@ class RumbleScraper(Scraper): platform_id=post['media_url'].split('/')[-2], date=post['datetime'].replace(tzinfo=timezone.utc), date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(post, default = str), + raw_data=json.dumps(post, default = str), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def url_to_key(self, url: str, content_type: str) -> str: ext = '.' + content_type.split('/')[-1] @@ -57,7 +57,7 @@ class RumbleScraper(Scraper): archived_url = self.archive_blob(media_blob, content_type, key) result.archived_urls[url] = archived_url - result.media_archived = True + result.media_archived = datetime.now(timezone.utc) return result def can_handle(self, channel): diff --git a/cisticola/scraper/telegram_snscrape.py b/cisticola/scraper/telegram_snscrape.py index 9b91203..6593917 100644 --- a/cisticola/scraper/telegram_snscrape.py +++ b/cisticola/scraper/telegram_snscrape.py @@ -50,9 +50,9 @@ class TelegramSnscrapeScraper(Scraper): platform_id=post.url, date=post.date, date_archived=datetime.now(timezone.utc), - raw_posts=post.json(), + raw_data=post.json(), archived_urls=archived_urls, - media_archived=archive_media + media_archived=datetime.now(timezone.utc) if archive_media else None ) def get_profile(self, channel: Channel) -> RawChannelInfo: diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py index 6c7eb16..a2d0b7d 100644 --- a/cisticola/scraper/telegram_telethon.py +++ b/cisticola/scraper/telegram_telethon.py @@ -44,7 +44,7 @@ class TelegramTelethonScraper(Scraper): key = list(result.archived_urls.keys())[0] if result.archived_urls[key] is None: - raw = json.loads(result.raw_posts) + raw = json.loads(result.raw_data) message = client.get_messages(raw['peer_id']['channel_id'], ids=[raw['id']]) @@ -144,7 +144,7 @@ class TelegramTelethonScraper(Scraper): platform_id=post_url, date=post.date.replace(tzinfo=timezone.utc), date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(post.to_dict(), default=str), + raw_data=json.dumps(post.to_dict(), default=str), archived_urls=archived_urls, media_archived=datetime.now(timezone.utc) if archive_media else None) diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py index a361252..b59aaf6 100644 --- a/cisticola/scraper/twitter.py +++ b/cisticola/scraper/twitter.py @@ -72,9 +72,9 @@ class TwitterScraper(Scraper): platform_id=tweet.id, date=tweet.date, date_archived=datetime.now(timezone.utc), - raw_posts=tweet.json(), + raw_data=tweet.json(), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def can_handle(self, channel): if channel.platform == "Twitter" and (channel.platform_id or channel.screenname): diff --git a/cisticola/scraper/vkontakte.py b/cisticola/scraper/vkontakte.py index 7b735da..cdce5b2 100644 --- a/cisticola/scraper/vkontakte.py +++ b/cisticola/scraper/vkontakte.py @@ -69,7 +69,7 @@ class VkontakteScraper(Scraper): platform_id=post.url.split('/')[-1], date=datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc), date_archived=datetime.now(timezone.utc), - raw_posts=post.json(), + raw_data=post.json(), archived_urls=archived_urls, media_archived=datetime.now(timezone.utc) if archive_media else None) diff --git a/cisticola/scraper/youtube.py b/cisticola/scraper/youtube.py index 6b14d98..445b8f4 100644 --- a/cisticola/scraper/youtube.py +++ b/cisticola/scraper/youtube.py @@ -75,9 +75,9 @@ class YoutubeScraper(Scraper): platform_id=video_id, date=datetime.strptime(video['upload_date'], '%Y%m%d').replace(tzinfo=timezone.utc), date_archived=datetime.now(timezone.utc), - raw_posts=json.dumps(video, default = str), + raw_data=json.dumps(video, default = str), archived_urls=archived_urls, - media_archived=archive_media) + media_archived=datetime.now(timezone.utc) if archive_media else None) def can_handle(self, channel): if channel.platform == "Youtube" and channel.url: @@ -115,7 +115,7 @@ class YoutubeScraper(Scraper): archived_url = self.archive_blob(media_blob, content_type, key) result.archived_urls[url] = archived_url - result.media_archived = True + result.media_archived = datetime.now(timezone.utc) return result def get_profile(self, channel: Channel) -> RawChannelInfo: diff --git a/cisticola/transformer/bitchute.py b/cisticola/transformer/bitchute.py index 61b327d..d0c5fe0 100644 --- a/cisticola/transformer/bitchute.py +++ b/cisticola/transformer/bitchute.py @@ -20,7 +20,7 @@ class BitchuteTransformer(Transformer): return False def transform_media(self, data: ScraperResult, transformed: Post) -> Generator[Media, None, None]: - raw = json.loads(data.raw_posts) + raw = json.loads(data.raw_data) orig = raw['video_url'] new = data.archived_urls[orig] @@ -30,7 +30,7 @@ class BitchuteTransformer(Transformer): yield m def transform(self, data: ScraperResult) -> Post: - raw = json.loads(data.raw_posts) + raw = json.loads(data.raw_data) soup = BeautifulSoup(raw['body'], features = 'html.parser') content = soup.find_all('p')[-1].text diff --git a/cisticola/transformer/twitter.py b/cisticola/transformer/twitter.py index 8fa2e68..85ada05 100644 --- a/cisticola/transformer/twitter.py +++ b/cisticola/transformer/twitter.py @@ -47,7 +47,7 @@ class TwitterTransformer(Transformer): def transform(self, data: ScraperResult, insert: Callable) -> Generator[Union[Post, Channel, Media], None, None]: - raw = json.loads(data.raw_posts) + raw = json.loads(data.raw_data) transformed = Post( raw_id=data.id, From 57b908227158be1532e3c4d6e4c1dbcd94d87a7f Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 13:26:05 +0200 Subject: [PATCH 04/10] Remove Odysee scraper due to errors --- app.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/app.py b/app.py index d5ff286..b950697 100644 --- a/app.py +++ b/app.py @@ -13,7 +13,6 @@ from cisticola.scraper import ( VkontakteScraper, TelegramTelethonScraper, GettrScraper, - OdyseeScraper, RumbleScraper) def sync_channels(args): @@ -83,7 +82,6 @@ def get_scraper_controller(): TelegramTelethonScraper(), VkontakteScraper(), GettrScraper(), - OdyseeScraper(), RumbleScraper() ] From 9c838aae393c24941cae55a85a08d348cdf3e4a9 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 13:29:10 +0200 Subject: [PATCH 05/10] Update media_archived column even when TG post has no media --- cisticola/scraper/telegram_telethon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py index a2d0b7d..1c1ee8a 100644 --- a/cisticola/scraper/telegram_telethon.py +++ b/cisticola/scraper/telegram_telethon.py @@ -58,11 +58,11 @@ class TelegramTelethonScraper(Scraper): # TODO specify Content-Type archived_url = self.archive_blob(blob = blob, content_type = '', key = output_file_with_ext) result.archived_urls[key] = archived_url - return result + result.media_archived = datetime.now(timezone.utc) else: logger.warning("Downloaded blob was None") + result.media_archived = datetime.now(timezone.utc) - result.media_archived = datetime.now(timezone.utc) return result def archive_post_media(self, post : types.Message, client : TelegramClient = None): From ecae1aad050e37ace9eb5f96b78adac2d004f89e Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 14:12:23 +0200 Subject: [PATCH 06/10] Catch exceptions in archive_files so that archiver continues to run --- cisticola/scraper/base.py | 1 + cisticola/scraper/odysee.py | 1 + cisticola/scraper/rumble.py | 1 + cisticola/scraper/telegram_telethon.py | 1 + cisticola/scraper/vkontakte.py | 1 + cisticola/scraper/youtube.py | 1 + 6 files changed, 6 insertions(+) diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 51c5f8e..e1eaa6c 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -236,6 +236,7 @@ class Scraper: return archived_url + @logger.catch def archive_files(self, result: ScraperResult) -> ScraperResult: """Archive files corresponding to ``archived_url`` dict keys, if the files have not previously been archived. diff --git a/cisticola/scraper/odysee.py b/cisticola/scraper/odysee.py index 25788fe..22b8dae 100644 --- a/cisticola/scraper/odysee.py +++ b/cisticola/scraper/odysee.py @@ -79,6 +79,7 @@ class OdyseeScraper(Scraper): archived_urls={}, media_archived=datetime.now(timezone.utc)) + @logger.catch def archive_files(self, result: ScraperResult) -> ScraperResult: for url in result.archived_urls: if result.archived_urls[url] is None: diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index c75c947..c9ba6d8 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -50,6 +50,7 @@ class RumbleScraper(Scraper): key = urlparse(url).path.split('/')[-2] + ext return key + @logger.catch def archive_files(self, result: ScraperResult) -> ScraperResult: for url in result.archived_urls: if result.archived_urls[url] is None: diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py index 1c1ee8a..426ab4e 100644 --- a/cisticola/scraper/telegram_telethon.py +++ b/cisticola/scraper/telegram_telethon.py @@ -26,6 +26,7 @@ class TelegramTelethonScraper(Scraper): username = username.split('s/')[1] return username + @logger.catch def archive_files(self, result: ScraperResult, client : TelegramClient = None) -> ScraperResult: if len(result.archived_urls.keys()) == 0: return result diff --git a/cisticola/scraper/vkontakte.py b/cisticola/scraper/vkontakte.py index cdce5b2..cf427ba 100644 --- a/cisticola/scraper/vkontakte.py +++ b/cisticola/scraper/vkontakte.py @@ -73,6 +73,7 @@ class VkontakteScraper(Scraper): archived_urls=archived_urls, media_archived=datetime.now(timezone.utc) if archive_media else None) + @logger.catch def archive_files(self, result: ScraperResult) -> ScraperResult: for url in result.archived_urls: if result.archived_urls[url] is None: diff --git a/cisticola/scraper/youtube.py b/cisticola/scraper/youtube.py index 445b8f4..2e8d9af 100644 --- a/cisticola/scraper/youtube.py +++ b/cisticola/scraper/youtube.py @@ -83,6 +83,7 @@ class YoutubeScraper(Scraper): if channel.platform == "Youtube" and channel.url: return True + @logger.catch def archive_files(self, result: ScraperResult) -> ScraperResult: for url in result.archived_urls: if result.archived_urls[url] is None: From 96db66257219814c306b83b1db76a3f15b69dcd7 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 14:16:03 +0200 Subject: [PATCH 07/10] Don't add a timestamp to media that failed to archive --- cisticola/scraper/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index e1eaa6c..a71bd67 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -436,7 +436,7 @@ class ScraperController: post = scraper.archive_files(post) if post: - session.query(ScraperResult).where(ScraperResult.id == post.id).update({'archived_urls': post.archived_urls, 'media_archived': datetime.now(timezone.utc)}) + session.query(ScraperResult).where(ScraperResult.id == post.id).update({'archived_urls': post.archived_urls, 'media_archived': post.media_archived}) session.commit() break From 0140b09ee8094c58078148f0c784de21ec7725a0 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 15:29:24 +0200 Subject: [PATCH 08/10] Release Telethon, VK, and Gettr as 0.0.1; specify unrelease 0.0.0 otherwise --- cisticola/scraper/bitchute.py | 2 +- cisticola/scraper/gab.py | 2 +- cisticola/scraper/instagram.py | 2 +- cisticola/scraper/odysee.py | 2 +- cisticola/scraper/rumble.py | 2 +- cisticola/scraper/telegram_snscrape.py | 2 +- cisticola/scraper/twitter.py | 2 +- cisticola/scraper/youtube.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index fda24ff..643d013 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -16,7 +16,7 @@ from cisticola.scraper.base import Scraper class BitchuteScraper(Scraper): """An implementation of a Scraper for Bitchute, using classes from the 4cat library""" - __version__ = "BitchuteScraper 0.0.1" + __version__ = "BitchuteScraper 0.0.0" def get_username_from_url(self, url): username = url.split('bitchute.com/channel/')[-1].strip('/') diff --git a/cisticola/scraper/gab.py b/cisticola/scraper/gab.py index 2f25655..4a0fb51 100644 --- a/cisticola/scraper/gab.py +++ b/cisticola/scraper/gab.py @@ -11,7 +11,7 @@ from cisticola.scraper.base import Scraper class GabScraper(Scraper): """An implementation of a Scraper for Gab, using gabber library""" - __version__ = "GabScraper 0.0.2" + __version__ = "GabScraper 0.0.0" def get_username_from_url(self, url): username = url.split('https://gab.com/')[-1] diff --git a/cisticola/scraper/instagram.py b/cisticola/scraper/instagram.py index 1f25cbb..3eab56c 100644 --- a/cisticola/scraper/instagram.py +++ b/cisticola/scraper/instagram.py @@ -19,7 +19,7 @@ CONTENT_TYPES = { class InstagramScraper(Scraper): """An implementation of a Scraper for Instagram, using instaloader library""" - __version__ = "InstagramScraper 0.0.1" + __version__ = "InstagramScraper 0.0.0" def get_username_from_url(self, url): username = url.split(BASE_URL)[1].strip('/') diff --git a/cisticola/scraper/odysee.py b/cisticola/scraper/odysee.py index 22b8dae..0823e8e 100644 --- a/cisticola/scraper/odysee.py +++ b/cisticola/scraper/odysee.py @@ -13,7 +13,7 @@ from cisticola.scraper.base import Scraper class OdyseeScraper(Scraper): """An implementation of a Scraper for Odysee, using polyphemus library""" - __version__ = "OdyseeScraper 0.0.1" + __version__ = "OdyseeScraper 0.0.0" def __init__(self): super().__init__() diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index c9ba6d8..8bba169 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -13,7 +13,7 @@ BASE_URL = 'https://rumble.com' class RumbleScraper(Scraper): """An implementation of a Scraper for Rumble, using custom functions""" - __version__ = "RumbleScraper 0.0.1" + __version__ = "RumbleScraper 0.0.0" @logger.catch def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: diff --git a/cisticola/scraper/telegram_snscrape.py b/cisticola/scraper/telegram_snscrape.py index 6593917..4dd3f44 100644 --- a/cisticola/scraper/telegram_snscrape.py +++ b/cisticola/scraper/telegram_snscrape.py @@ -9,7 +9,7 @@ from cisticola.scraper.base import Scraper class TelegramSnscrapeScraper(Scraper): """An implementation of a Scraper for Telegram, using snscrape library""" - __version__ = "TelegramSnscrapeScraper 0.0.1" + __version__ = "TelegramSnscrapeScraper 0.0.0" def can_handle(self, channel): if channel.platform == "Telegram" and channel.public and not channel.chat: diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py index b59aaf6..ebbdb95 100644 --- a/cisticola/scraper/twitter.py +++ b/cisticola/scraper/twitter.py @@ -10,7 +10,7 @@ from cisticola.scraper.base import Scraper, ChannelDoesNotExistError class TwitterScraper(Scraper): """An implementation of a Scraper for Twitter, using snscrape library""" - __version__ = "TwitterScraper 0.0.1" + __version__ = "TwitterScraper 0.0.0" @logger.catch def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: diff --git a/cisticola/scraper/youtube.py b/cisticola/scraper/youtube.py index 2e8d9af..3ef6fa2 100644 --- a/cisticola/scraper/youtube.py +++ b/cisticola/scraper/youtube.py @@ -13,7 +13,7 @@ from cisticola.scraper import Scraper class YoutubeScraper(Scraper): """An implementation of a Scraper for Youtube, using youtube-dl""" - __version__ = "YoutubeScraper 0.0.1" + __version__ = "YoutubeScraper 0.0.0" @logger.catch def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: From 4c580519ddcf14bcf14cdcbfe840d591c671ecff Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 15:59:39 +0200 Subject: [PATCH 09/10] Remove Rumble scraper --- app.py | 99 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/app.py b/app.py index b950697..160dd25 100644 --- a/app.py +++ b/app.py @@ -13,14 +13,15 @@ from cisticola.scraper import ( VkontakteScraper, TelegramTelethonScraper, GettrScraper, - RumbleScraper) +) + def sync_channels(args): logger.info("Synchronizing channels") session = get_db_session() - gc = gspread.service_account(filename='service_account.json') + gc = gspread.service_account(filename="service_account.json") # Open a sheet from a spreadsheet in one go wks = gc.open_by_url(args.gsheet).worksheet("channels") @@ -29,33 +30,44 @@ def sync_channels(args): for c in channels: # only adding channels, so skip everything with an ID - if c['id'] == '': - del c['id'] - del c['followers'] + if c["id"] == "": + del c["id"] + del c["followers"] + + if c["public"] == "": + c["public"] = False + if c["chat"] == "": + c["chat"] = False - if c['public'] == '': c['public'] = False - if c['chat'] == '': c['chat'] = False - for k in c.keys(): - if c[k] == 'TRUE' or c[k] == 'yes': c[k] = True - if c[k] == 'FALSE' or c[k] == 'no': c[k] = False + if c[k] == "TRUE" or c[k] == "yes": + c[k] = True + if c[k] == "FALSE" or c[k] == "no": + c[k] = False - if c[k] == '': c[k] = None + if c[k] == "": + c[k] = None - # check to see if this already exists, + # check to see if this already exists, platform_id = None - if c['platform_id'] != '': - platform_id = c['platform_id'] + if c["platform_id"] != "": + platform_id = c["platform_id"] - channel = session.query(Channel).filter_by(platform_id=str(platform_id), platform=c['platform'], url=c['url']).first() + channel = ( + session.query(Channel) + .filter_by( + platform_id=str(platform_id), platform=c["platform"], url=c["url"] + ) + .first() + ) if not channel: - channel = Channel(**c, source='researcher') + channel = Channel(**c, source="researcher") logger.debug(f"{channel} does not exist, adding") session.add(channel) session.flush() session.commit() - + wks.update_cell(row, 1, channel.id) time.sleep(1) @@ -63,37 +75,36 @@ def sync_channels(args): session.commit() + def get_db_session(): - engine = create_engine(os.environ['DB']) - + engine = create_engine(os.environ["DB"]) + session_generator = sessionmaker() session_generator.configure(bind=engine) session = session_generator() return session + def get_scraper_controller(): - engine = create_engine(os.environ['DB']) + engine = create_engine(os.environ["DB"]) controller = ScraperController() controller.connect_to_db(engine) - scrapers = [ - TelegramTelethonScraper(), - VkontakteScraper(), - GettrScraper(), - RumbleScraper() - ] + scrapers = [TelegramTelethonScraper(), VkontakteScraper(), GettrScraper()] controller.register_scrapers(scrapers) return controller + def scrape_channels(args): logger.info(f"Scraping channels, media: {args.media}") controller = get_scraper_controller() - controller.scrape_all_channels(archive_media = args.media) + controller.scrape_all_channels(archive_media=args.media) + def scrape_channel_info(args): logger.info(f"Scraping channel info") @@ -101,38 +112,48 @@ def scrape_channel_info(args): controller = get_scraper_controller() controller.scrape_all_channel_info() + def archive_media(args): logger.info(f"Archiving unarchived media") controller = get_scraper_controller() controller.archive_unarchived_media() + def init_db(): - engine = create_engine(os.environ['DB']) + engine = create_engine(os.environ["DB"]) mapper_registry.metadata.create_all(bind=engine) -if __name__ == '__main__': + +if __name__ == "__main__": logger.remove() logger.add(sys.stdout, level="DEBUG", catch=True) logger.add("./test.log", level="TRACE") - parser = argparse.ArgumentParser(description = 'Cisticola command line tools') - parser.add_argument('command', type=str, help='Command to run: "sync-channels", "scrape-channels", or "archive-media"') - parser.add_argument('--gsheet', type=str, help='[sync-channels] URL of Google Sheet to synchronize') - parser.add_argument('--media', action='store_true', help='[scrape-channels] Add this flag to media') + parser = argparse.ArgumentParser(description="Cisticola command line tools") + parser.add_argument( + "command", + type=str, + help='Command to run: "sync-channels", "scrape-channels", or "archive-media"', + ) + parser.add_argument( + "--gsheet", type=str, help="[sync-channels] URL of Google Sheet to synchronize" + ) + parser.add_argument( + "--media", action="store_true", help="[scrape-channels] Add this flag to media" + ) args = parser.parse_args() - - if args.command == 'init-db': + if args.command == "init-db": init_db() - elif args.command == 'sync-channels': + elif args.command == "sync-channels": sync_channels(args) - elif args.command == 'scrape-channels': + elif args.command == "scrape-channels": scrape_channels(args) - elif args.command == 'archive-media': + elif args.command == "archive-media": archive_media(args) - elif args.command == 'channel-info': + elif args.command == "channel-info": scrape_channel_info(args) else: logger.error(f"Unrecognized command {args.command}") From fccbad7a93ff6d0fe81f4594c707f97daa2be9b2 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Sun, 3 Apr 2022 16:32:00 +0000 Subject: [PATCH 10/10] Remove 200 post limit; add log rotation --- Pipfile | 28 ++- Pipfile.lock | 488 ++++++++++---------------------------- app.py | 2 +- cisticola/scraper/base.py | 5 +- 4 files changed, 145 insertions(+), 378 deletions(-) diff --git a/Pipfile b/Pipfile index 34fa842..5e4e56b 100644 --- a/Pipfile +++ b/Pipfile @@ -4,25 +4,21 @@ verify_ssl = true name = "pypi" [packages] -sqlalchemy = "*" +SQLAlchemy = "*" loguru = "*" gogettr = "*" requests = "*" bs4 = "*" dateparser = "*" boto3 = "*" -snscrape = {git = "https://github.com/bellingcat/snscrape.git"} ffmpeg-python = "*" -polyphemus = {git = "https://github.com/bellingcat/polyphemus.git"} -yt-dlp = "*" -telethon = "*" +yt-dlp = "*" +Telethon = "*" pytesseract = "*" -pyexiftool = {git = "https://github.com/smarnach/pyexiftool.git"} instaloader = "*" gspread = "*" cryptg = "*" -gabber = {git = "https://github.com/stanfordio/gabber.git"} -psycopg2-binary = "*" +psycopg2 = "*" tqdm = "*" ratelimit = "*" pytz = "*" @@ -33,11 +29,23 @@ pytest-cov = "*" pytest-html = "*" pytest-metadata = "*" black = "*" -sphinx = "*" -sphinx_rtd_theme = "*" +Sphinx = "*" +sphinx-rtd-theme = "*" [requires] python_version = "3.9" [pipenv] allow_prereleases = true + +[packages.polyphemus] +git = "https://github.com/bellingcat/polyphemus.git" + +[packages.PyExifTool] +git = "https://github.com/smarnach/pyexiftool.git" + +[packages.gabber] +git = "https://github.com/stanfordio/gabber.git" + +[packages.snscrape] +git = "https://github.com/bellingcat/snscrape" diff --git a/Pipfile.lock b/Pipfile.lock index 4629c08..662c53e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "89ac092ac8c8321f199f199da0c0867803a44b080538a43e1a57ae7713683616" + "sha256": "bd884a30c799fc7b881926bd6a894fb36cc6710f1221c84c0e6be34b8836fa7d" }, "pipfile-spec": 6, "requires": { @@ -21,24 +21,22 @@ "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf", "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891" ], - "markers": "python_version >= '3.1'", "version": "==4.10.0" }, "boto3": { "hashes": [ - "sha256:35f68b60652bff50e7bc926238443cb578f29f120908bb945e5640e90c6dd53e", - "sha256:7f3f93ee97215862ccd1a216f37deb7d64055c71f826b821805904df7b84ee6a" + "sha256:05f4438607e560624caadb073c6c63181eda9bf74f8a9e4581e7b43d641cc683", + "sha256:8e7a6d70cefa4e125466e0c467b489649cb72e3632997844bc3c382f948c46e4" ], "index": "pypi", - "version": "==1.21.31" + "version": "==1.21.32" }, "botocore": { "hashes": [ - "sha256:3bb21e3ee5e4de3ed76bb99b4496a46e9b5c82e7b7fdb62702f11dda1b57b769", - "sha256:424fd94bef86a11f5340dc15eb50602dedec2ecc01c3a25c4fea23a2c8195500" + "sha256:4f08eaaa93ee03f14de760031dd060cda3bd6aab734d194a916dbb8f7e5c7085", + "sha256:5c2dab84f21b2a8c00bdab2150149be0ca0c8e8dd0b38712fa3562af5cfe53a2" ], - "markers": "python_version >= '3.6'", - "version": "==1.24.31" + "version": "==1.24.32" }, "brotli": { "hashes": [ @@ -120,7 +118,6 @@ "sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6", "sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4" ], - "markers": "python_version ~= '3.7'", "version": "==5.0.0" }, "certifi": { @@ -198,7 +195,6 @@ "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e", "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72" ], - "markers": "python_version >= '3.7'", "version": "==8.1.2" }, "cryptg": { @@ -257,24 +253,14 @@ "index": "pypi", "version": "==0.2.0" }, - "filelock": { - "hashes": [ - "sha256:9cd540a9352e432c7246a48fe4e8712b10acb1df2ad1f30e8c070b82ae1fed85", - "sha256:f8314284bfffbdcfa0ff3d7992b023d4c628ced6feb957351d4c48d059f56bc0" - ], - "markers": "python_version >= '3.7'", - "version": "==3.6.0" - }, "future": { "hashes": [ "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.18.2" }, "gabber": { - "git": "https://github.com/stanfordio/gabber.git", - "ref": "d80c44c488ad4e087ba4c8f033802fe2071843bd" + "git": "https://github.com/stanfordio/gabber.git" }, "gogettr": { "hashes": [ @@ -289,7 +275,6 @@ "sha256:3ba4d63cb29c1e6d5ffcc1c0623c03cf02ede6240a072f213084749574e691ab", "sha256:60d449f8142c742db760f4c0be39121bc8d9be855555d784c252deaca1ced3f5" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==2.6.2" }, "google-auth-oauthlib": { @@ -297,68 +282,68 @@ "sha256:24f67735513c4c7134dbde2f1dee5a1deb6acc8dfcb577d7bff30d213a28e7b0", "sha256:30596b824fc6808fdaca2f048e4998cc40fb4b3599eaea66d28dc7085b36c5b8" ], - "markers": "python_version >= '3.6'", "version": "==0.5.1" }, "greenlet": { "hashes": [ - "sha256:004aed447382d80a56ecc354a6d807f305e6c808714ce6ccbca4839c94fae81d", - "sha256:068d68fad6bd623e29a2d36e74538c9b9d6dc6464931cd27d93da6cfc6a7f242", - "sha256:06fd4075754009c9817c6b4e1dc0af4616de52757b6ca973a81c3c1aadc28257", - "sha256:1004cb542451814b12a4f38e835a47734e2b2c683acbf463d5ae76282a3974cf", - "sha256:10c358633a8b27bfc32d27114ef2ca2ddc9f1f89f1643d1157b85e1fdd695315", - "sha256:115bc25fefbdc692c4483e9ddb9011ccd0251590ed59dbfff0f4eb7050bf99c4", - "sha256:1d987a2579336792f73ae6b106c2f087e32afc8573fbf9566f123ac6d8cfb72f", - "sha256:2128d727fd1e8afba8e68feb2cdcf88c90163b69ddc9707722a3e491c5280720", - "sha256:230132c241fe284f93f2e7b3969e9b22bbd76ef98cf93e382c945d378907f5a4", - "sha256:23558f7bd08a663386c032ab8d302d613d2d02ae0c9758ad410bab6035b58d3d", - "sha256:255d520d3e4a5f16883b182e1a94219fe455ab4f50aaaf534bfd6d64ee728397", - "sha256:2a6bc19a728f6f643cfc89b876159a1a25a8f7d8700c013d48a73691f80b4550", - "sha256:379bed346ef8ba0a0e698b3c5975a44d15dd4a5bbff40bbd7fd548b445d5550b", - "sha256:3b12d0866759db93b0a893b4e50a7d7d1681519d2346c26695bb8bb2c652230e", - "sha256:40d491944f69e350e1e8b25f6ca49459824ede1678ec0cd4b5541f41edc06614", - "sha256:471484c7b9d7b7867263051aa81cdeed6e06b455e629a7f05eb91a6cb8bd0836", - "sha256:488c557080557bc01aabb3e1bda7225c68455b853733a8652857ac0d810dad1b", - "sha256:49c2e76e7aa81ba889b3c183e2341af3cc6161ee38852085110ae49d5b5d9a40", - "sha256:52d13ec90236e5935ed6da044e78faa1371d5116cc43fe6d7ca8994dd619ef96", - "sha256:57898c69a253d81f487787bdd538629fabd671fab8a9e31b041ca30965fd9556", - "sha256:5d577eef5beb5730ef01ab39983eb852a97c359b7a546809adf70c409f4b2ecc", - "sha256:6a41987c1474c9158a0c0c96611530a8f299bc547d35bee8add981b8b2534f74", - "sha256:6ae67b7df8db3626af8e042e9c6949cfa27d1a3bbbfdff29e45b72bb6673a650", - "sha256:6c42c27e9d12e8a481aff469ffe8dd4ce0484c354a418470960f760f6ae41e7c", - "sha256:6c4a90c9f6128b4d0905a89930bd325e0491574e5cb453f606bb7094a3197587", - "sha256:6e64518e5833ac2d9359b6d9bd4df2c0cf441a0f3a4eca9e735fbea99009fa70", - "sha256:6fd3a270c23c5b42d86a9c7c6b0229f23ee4a7a4cabdaaa1693ad7a0982d13cb", - "sha256:70db73351e0fcf11a76288c47a0469d9a330bcb2e7618c5eb57432b8caa82403", - "sha256:771f401692046845626cbdf1dd0f04e999413ede0ee9ad39033fe30b5fa2e845", - "sha256:7935026ec61b967cbc6b746c0ca75c1651ea118d7fee4d259cff9e6866153374", - "sha256:7b76b1cac9baac1980210e29145800954e7b42e91ef69c4d695de1cab87ce41f", - "sha256:7e3f37c11b6699b1a1e0fcc0e88829dba4f2866546381b05ab8b3f4db645a823", - "sha256:8370fa65ad421484894f559055f951843754153b72b9bca2ebdc5288efe2e3f0", - "sha256:8ae9c443d44a4e23252632e4d7775f419f992d0df3eff923e23775f5cc551d39", - "sha256:8b31d85f2781e44f1ffaaf7ea07f484e7d42317c677c355fa77b4a1a4bea7394", - "sha256:8b450336b27f3b375cadc474c6704838eaa8dd3ca312aac3bb69d92264a8e638", - "sha256:9ce84357388a76d886febff4e50e321c212ffd3248b590960b2da6e02404a5c9", - "sha256:a23e986fb0ba8e7407286add41fa0d4207be44e3dce1b04789f4757800eca1cf", - "sha256:a81610ee00d0da9cd2c8679479b7791149365b6dfb3971b01b22ee29b04787ce", - "sha256:b4e40444975e5ab0ed3004369209c39a28e084951daaeee4919f164b6b849b14", - "sha256:b66600de16702b9dfa74bea34524b55183a2183e5fd92f20fe6c2fcae550a64c", - "sha256:ba6ee18694d3673796b7a31b7d21254e87e9e43ca5be56f323fd396111255315", - "sha256:bd03837da28293baa39bdfc3cada69e2f8807f423ae06168aa28d2b32c63a6b6", - "sha256:bd2192070f88c0778ae1d68a0980fdece3473498c1db37f3794e3454f91e3ecf", - "sha256:c1f6f1a3cc013012cd1da913c40b13e6d721046a8c8a0ea0cde94069645a75db", - "sha256:ce10a8e7e067bde3c1fbf494d2b8859db510206030b0b67bc3af90b0eb1887b9", - "sha256:d31386d208303a5a6cf0819ef9f6db6680bab9e4ca8e48adb3d4b26ead89beb7", - "sha256:d83b3af53b201970973c5574b39df226746194063bb248a53fd12b470ac34319", - "sha256:df9657b212c054ac6d803290d7c4bcd7790af0b725984fce1eeb0a1e3f2d9798", - "sha256:e576e5fd3f129e6b3595dc734ac7f2b8c548f19ef07781194bc538dc9c0cdbbc", - "sha256:e7400358558094c1bcedc75f3b3c4f400c53130b44833848890a99968dee6a64", - "sha256:eb6a385f8577d30e4cb43dd555fb134ddaae1edeb84205e09dabec332bf49fd0", - "sha256:f27f0875e0873f6bf5df09a456bfcac0667824cabac4cad30b43f36e0382ffe7", - "sha256:fcd4a6d04995f1d66bc78b503e4e59ae72fd32aaec4f661657fe5ae5c1aa4ce3" + "sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3", + "sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711", + "sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd", + "sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073", + "sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708", + "sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67", + "sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23", + "sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1", + "sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08", + "sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd", + "sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2", + "sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa", + "sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8", + "sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40", + "sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab", + "sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6", + "sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc", + "sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b", + "sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e", + "sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963", + "sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3", + "sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d", + "sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d", + "sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe", + "sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28", + "sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3", + "sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e", + "sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c", + "sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d", + "sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0", + "sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497", + "sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee", + "sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713", + "sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58", + "sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a", + "sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06", + "sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88", + "sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965", + "sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f", + "sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4", + "sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5", + "sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c", + "sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a", + "sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1", + "sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43", + "sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627", + "sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b", + "sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168", + "sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d", + "sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5", + "sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478", + "sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf", + "sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce", + "sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c", + "sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b" ], - "markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))", - "version": "==2.0.0a2" + "markers": "python_version >= '3' and (platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32'))))))", + "version": "==1.1.2" }, "gspread": { "hashes": [ @@ -388,124 +373,28 @@ "sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e", "sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04" ], - "markers": "python_version >= '3.7'", "version": "==1.0.0" }, "loguru": { "hashes": [ - "sha256:b28e72ac7a98be3d28ad28570299a393dfcd32e5e3f6a353dec94675767b6319", - "sha256:f8087ac396b5ee5f67c963b495d615ebbceac2796379599820e324419d53667c" + "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c", + "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3" ], "index": "pypi", - "version": "==0.5.3" - }, - "lxml": { - "hashes": [ - "sha256:078306d19a33920004addeb5f4630781aaeabb6a8d01398045fcde085091a169", - "sha256:0c1978ff1fd81ed9dcbba4f91cf09faf1f8082c9d72eb122e92294716c605428", - "sha256:1010042bfcac2b2dc6098260a2ed022968dbdfaf285fc65a3acf8e4eb1ffd1bc", - "sha256:1d650812b52d98679ed6c6b3b55cbb8fe5a5460a0aef29aeb08dc0b44577df85", - "sha256:20b8a746a026017acf07da39fdb10aa80ad9877046c9182442bf80c84a1c4696", - "sha256:2403a6d6fb61c285969b71f4a3527873fe93fd0abe0832d858a17fe68c8fa507", - "sha256:24f5c5ae618395ed871b3d8ebfcbb36e3f1091fd847bf54c4de623f9107942f3", - "sha256:28d1af847786f68bec57961f31221125c29d6f52d9187c01cd34dc14e2b29430", - "sha256:31499847fc5f73ee17dbe1b8e24c6dafc4e8d5b48803d17d22988976b0171f03", - "sha256:31ba2cbc64516dcdd6c24418daa7abff989ddf3ba6d3ea6f6ce6f2ed6e754ec9", - "sha256:330bff92c26d4aee79c5bc4d9967858bdbe73fdbdbacb5daf623a03a914fe05b", - "sha256:5045ee1ccd45a89c4daec1160217d363fcd23811e26734688007c26f28c9e9e7", - "sha256:52cbf2ff155b19dc4d4100f7442f6a697938bf4493f8d3b0c51d45568d5666b5", - "sha256:530f278849031b0eb12f46cca0e5db01cfe5177ab13bd6878c6e739319bae654", - "sha256:545bd39c9481f2e3f2727c78c169425efbfb3fbba6e7db4f46a80ebb249819ca", - "sha256:5804e04feb4e61babf3911c2a974a5b86f66ee227cc5006230b00ac6d285b3a9", - "sha256:5a58d0b12f5053e270510bf12f753a76aaf3d74c453c00942ed7d2c804ca845c", - "sha256:5f148b0c6133fb928503cfcdfdba395010f997aa44bcf6474fcdd0c5398d9b63", - "sha256:5f7d7d9afc7b293147e2d506a4596641d60181a35279ef3aa5778d0d9d9123fe", - "sha256:60d2f60bd5a2a979df28ab309352cdcf8181bda0cca4529769a945f09aba06f9", - "sha256:6259b511b0f2527e6d55ad87acc1c07b3cbffc3d5e050d7e7bcfa151b8202df9", - "sha256:6268e27873a3d191849204d00d03f65c0e343b3bcb518a6eaae05677c95621d1", - "sha256:627e79894770783c129cc5e89b947e52aa26e8e0557c7e205368a809da4b7939", - "sha256:62f93eac69ec0f4be98d1b96f4d6b964855b8255c345c17ff12c20b93f247b68", - "sha256:6d6483b1229470e1d8835e52e0ff3c6973b9b97b24cd1c116dca90b57a2cc613", - "sha256:6f7b82934c08e28a2d537d870293236b1000d94d0b4583825ab9649aef7ddf63", - "sha256:6fe4ef4402df0250b75ba876c3795510d782def5c1e63890bde02d622570d39e", - "sha256:719544565c2937c21a6f76d520e6e52b726d132815adb3447ccffbe9f44203c4", - "sha256:730766072fd5dcb219dd2b95c4c49752a54f00157f322bc6d71f7d2a31fecd79", - "sha256:74eb65ec61e3c7c019d7169387d1b6ffcfea1b9ec5894d116a9a903636e4a0b1", - "sha256:7993232bd4044392c47779a3c7e8889fea6883be46281d45a81451acfd704d7e", - "sha256:80bbaddf2baab7e6de4bc47405e34948e694a9efe0861c61cdc23aa774fcb141", - "sha256:86545e351e879d0b72b620db6a3b96346921fa87b3d366d6c074e5a9a0b8dadb", - "sha256:891dc8f522d7059ff0024cd3ae79fd224752676447f9c678f2a5c14b84d9a939", - "sha256:8a31f24e2a0b6317f33aafbb2f0895c0bce772980ae60c2c640d82caac49628a", - "sha256:8b99ec73073b37f9ebe8caf399001848fced9c08064effdbfc4da2b5a8d07b93", - "sha256:986b7a96228c9b4942ec420eff37556c5777bfba6758edcb95421e4a614b57f9", - "sha256:a1547ff4b8a833511eeaceacbcd17b043214fcdb385148f9c1bc5556ca9623e2", - "sha256:a2bfc7e2a0601b475477c954bf167dee6d0f55cb167e3f3e7cefad906e7759f6", - "sha256:a3c5f1a719aa11866ffc530d54ad965063a8cbbecae6515acbd5f0fae8f48eaa", - "sha256:a9f1c3489736ff8e1c7652e9dc39f80cff820f23624f23d9eab6e122ac99b150", - "sha256:aa0cf4922da7a3c905d000b35065df6184c0dc1d866dd3b86fd961905bbad2ea", - "sha256:ad4332a532e2d5acb231a2e5d33f943750091ee435daffca3fec0a53224e7e33", - "sha256:b2582b238e1658c4061ebe1b4df53c435190d22457642377fd0cb30685cdfb76", - "sha256:b6fc2e2fb6f532cf48b5fed57567ef286addcef38c28874458a41b7837a57807", - "sha256:b92d40121dcbd74831b690a75533da703750f7041b4bf951befc657c37e5695a", - "sha256:bbab6faf6568484707acc052f4dfc3802bdb0cafe079383fbaa23f1cdae9ecd4", - "sha256:c0b88ed1ae66777a798dc54f627e32d3b81c8009967c63993c450ee4cbcbec15", - "sha256:ce13d6291a5f47c1c8dbd375baa78551053bc6b5e5c0e9bb8e39c0a8359fd52f", - "sha256:db3535733f59e5605a88a706824dfcb9bd06725e709ecb017e165fc1d6e7d429", - "sha256:dd10383f1d6b7edf247d0960a3db274c07e96cf3a3fc7c41c8448f93eac3fb1c", - "sha256:e01f9531ba5420838c801c21c1b0f45dbc9607cb22ea2cf132844453bec863a5", - "sha256:e11527dc23d5ef44d76fef11213215c34f36af1608074561fcc561d983aeb870", - "sha256:e1ab2fac607842ac36864e358c42feb0960ae62c34aa4caaf12ada0a1fb5d99b", - "sha256:e1fd7d2fe11f1cb63d3336d147c852f6d07de0d0020d704c6031b46a30b02ca8", - "sha256:e9f84ed9f4d50b74fbc77298ee5c870f67cb7e91dcdc1a6915cb1ff6a317476c", - "sha256:ec4b4e75fc68da9dc0ed73dcdb431c25c57775383fec325d23a770a64e7ebc87", - "sha256:f10ce66fcdeb3543df51d423ede7e238be98412232fca5daec3e54bcd16b8da0", - "sha256:f63f62fc60e6228a4ca9abae28228f35e1bd3ce675013d1dfb828688d50c6e23", - "sha256:fa56bb08b3dd8eac3a8c5b7d075c94e74f755fd9d8a04543ae8d37b1612dd170", - "sha256:fa9b7c450be85bfc6cd39f6df8c5b8cbd76b5d6fc1f69efec80203f9894b885f" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==4.8.0" + "version": "==0.6.0" }, "mutagen": { "hashes": [ "sha256:6397602efb3c2d7baebd2166ed85731ae1c1d475abca22090b7141ff5034b3e1", "sha256:9c9f243fcec7f410f138cb12c21c84c64fde4195481a30c9bfb05b5f003adfed" ], - "markers": "python_version >= '3.5' and python_version < '4'", "version": "==1.45.1" }, - "numpy": { - "hashes": [ - "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676", - "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4", - "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce", - "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123", - "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1", - "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e", - "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5", - "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d", - "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a", - "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab", - "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75", - "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168", - "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4", - "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f", - "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18", - "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62", - "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe", - "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430", - "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802", - "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa" - ], - "markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'", - "version": "==1.22.3" - }, "oauthlib": { "hashes": [ "sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2", "sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe" ], - "markers": "python_version >= '3.6'", "version": "==3.2.0" }, "packaging": { @@ -513,139 +402,67 @@ "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" ], - "markers": "python_version >= '3.6'", "version": "==21.3" }, - "pandas": { - "hashes": [ - "sha256:0259cd11e7e6125aaea3af823b80444f3adad6149ff4c97fef760093598b3e34", - "sha256:04dd15d9db538470900c851498e532ef28d4e56bfe72c9523acb32042de43dfb", - "sha256:0b1a13f647e4209ed7dbb5da3497891d0045da9785327530ab696417ef478f84", - "sha256:19f7c632436b1b4f84615c3b127bbd7bc603db95e3d4332ed259dc815c9aaa26", - "sha256:1b384516dbb4e6aae30e3464c2e77c563da5980440fbdfbd0968e3942f8f9d70", - "sha256:1d85d5f6be66dfd6d1d8d13b9535e342a2214260f1852654b19fa4d7b8d1218b", - "sha256:2e5a7a1e0ecaac652326af627a3eca84886da9e667d68286866d4e33f6547caf", - "sha256:3129a35d9dad1d80c234dd78f8f03141b914395d23f97cf92a366dcd19f8f8bf", - "sha256:358b0bc98a5ff067132d23bf7a2242ee95db9ea5b7bbc401cf79205f11502fd3", - "sha256:3dfb32ed50122fe8c5e7f2b8d97387edd742cc78f9ec36f007ee126cd3720907", - "sha256:4e1176f45981c8ccc8161bc036916c004ca51037a7ed73f2d2a9857e6dbe654f", - "sha256:508c99debccd15790d526ce6b1624b97a5e1e4ca5b871319fb0ebfd46b8f4dad", - "sha256:6105af6533f8b63a43ea9f08a2ede04e8f43e49daef0209ab0d30352bcf08bee", - "sha256:6d6ad1da00c7cc7d8dd1559a6ba59ba3973be6b15722d49738b2be0977eb8a0c", - "sha256:7ea47ba1d6f359680130bd29af497333be6110de8f4c35b9211eec5a5a9630fa", - "sha256:8db93ec98ac7cb5f8ac1420c10f5e3c43533153f253fe7fb6d891cf5aa2b80d2", - "sha256:96e9ece5759f9b47ae43794b6359bbc54805d76e573b161ae770c1ea59393106", - "sha256:bbb15ad79050e8b8d39ec40dd96a30cd09b886a2ae8848d0df1abba4d5502a67", - "sha256:c614001129b2a5add5e3677c3a213a9e6fd376204cb8d17c04e84ff7dfc02a73", - "sha256:e6a7bbbb7950063bfc942f8794bc3e31697c020a14f1cd8905fc1d28ec674a01", - "sha256:f02e85e6d832be37d7f16cf6ac8bb26b519ace3e5f3235564a91c7f658ab2a43" - ], - "markers": "python_version >= '3.8'", - "version": "==1.4.1" - }, "pillow": { "hashes": [ - "sha256:011233e0c42a4a7836498e98c1acf5e744c96a67dd5032a6f666cc1fb97eab97", - "sha256:0f29d831e2151e0b7b39981756d201f7108d3d215896212ffe2e992d06bfe049", - "sha256:12875d118f21cf35604176872447cdb57b07126750a33748bac15e77f90f1f9c", - "sha256:14d4b1341ac07ae07eb2cc682f459bec932a380c3b122f5540432d8977e64eae", - "sha256:1c3c33ac69cf059bbb9d1a71eeaba76781b450bc307e2291f8a4764d779a6b28", - "sha256:1d19397351f73a88904ad1aee421e800fe4bbcd1aeee6435fb62d0a05ccd1030", - "sha256:253e8a302a96df6927310a9d44e6103055e8fb96a6822f8b7f514bb7ef77de56", - "sha256:2632d0f846b7c7600edf53c48f8f9f1e13e62f66a6dbc15191029d950bfed976", - "sha256:335ace1a22325395c4ea88e00ba3dc89ca029bd66bd5a3c382d53e44f0ccd77e", - "sha256:413ce0bbf9fc6278b2d63309dfeefe452835e1c78398efb431bab0672fe9274e", - "sha256:5100b45a4638e3c00e4d2320d3193bdabb2d75e79793af7c3eb139e4f569f16f", - "sha256:514ceac913076feefbeaf89771fd6febde78b0c4c1b23aaeab082c41c694e81b", - "sha256:528a2a692c65dd5cafc130de286030af251d2ee0483a5bf50c9348aefe834e8a", - "sha256:6295f6763749b89c994fcb6d8a7f7ce03c3992e695f89f00b741b4580b199b7e", - "sha256:6c8bc8238a7dfdaf7a75f5ec5a663f4173f8c367e5a39f87e720495e1eed75fa", - "sha256:718856856ba31f14f13ba885ff13874be7fefc53984d2832458f12c38205f7f7", - "sha256:7f7609a718b177bf171ac93cea9fd2ddc0e03e84d8fa4e887bdfc39671d46b00", - "sha256:80ca33961ced9c63358056bd08403ff866512038883e74f3a4bf88ad3eb66838", - "sha256:80fe64a6deb6fcfdf7b8386f2cf216d329be6f2781f7d90304351811fb591360", - "sha256:81c4b81611e3a3cb30e59b0cf05b888c675f97e3adb2c8672c3154047980726b", - "sha256:855c583f268edde09474b081e3ddcd5cf3b20c12f26e0d434e1386cc5d318e7a", - "sha256:9bfdb82cdfeccec50aad441afc332faf8606dfa5e8efd18a6692b5d6e79f00fd", - "sha256:a5d24e1d674dd9d72c66ad3ea9131322819ff86250b30dc5821cbafcfa0b96b4", - "sha256:a9f44cd7e162ac6191491d7249cceb02b8116b0f7e847ee33f739d7cb1ea1f70", - "sha256:b5b3f092fe345c03bca1e0b687dfbb39364b21ebb8ba90e3fa707374b7915204", - "sha256:b9618823bd237c0d2575283f2939655f54d51b4527ec3972907a927acbcc5bfc", - "sha256:cef9c85ccbe9bee00909758936ea841ef12035296c748aaceee535969e27d31b", - "sha256:d21237d0cd37acded35154e29aec853e945950321dd2ffd1a7d86fe686814669", - "sha256:d3c5c79ab7dfce6d88f1ba639b77e77a17ea33a01b07b99840d6ed08031cb2a7", - "sha256:d9d7942b624b04b895cb95af03a23407f17646815495ce4547f0e60e0b06f58e", - "sha256:db6d9fac65bd08cea7f3540b899977c6dee9edad959fa4eaf305940d9cbd861c", - "sha256:ede5af4a2702444a832a800b8eb7f0a7a1c0eed55b644642e049c98d589e5092", - "sha256:effb7749713d5317478bb3acb3f81d9d7c7f86726d41c1facca068a04cf5bb4c", - "sha256:f154d173286a5d1863637a7dcd8c3437bb557520b01bddb0be0258dcb72696b5", - "sha256:f25ed6e28ddf50de7e7ea99d7a976d6a9c415f03adcaac9c41ff6ff41b6d86ac" + "sha256:01ce45deec9df310cbbee11104bae1a2a43308dd9c317f99235b6d3080ddd66e", + "sha256:0c51cb9edac8a5abd069fd0758ac0a8bfe52c261ee0e330f363548aca6893595", + "sha256:17869489de2fce6c36690a0c721bd3db176194af5f39249c1ac56d0bb0fcc512", + "sha256:21dee8466b42912335151d24c1665fcf44dc2ee47e021d233a40c3ca5adae59c", + "sha256:25023a6209a4d7c42154073144608c9a71d3512b648a2f5d4465182cb93d3477", + "sha256:255c9d69754a4c90b0ee484967fc8818c7ff8311c6dddcc43a4340e10cd1636a", + "sha256:35be4a9f65441d9982240e6966c1eaa1c654c4e5e931eaf580130409e31804d4", + "sha256:3f42364485bfdab19c1373b5cd62f7c5ab7cc052e19644862ec8f15bb8af289e", + "sha256:3fddcdb619ba04491e8f771636583a7cc5a5051cd193ff1aa1ee8616d2a692c5", + "sha256:463acf531f5d0925ca55904fa668bb3461c3ef6bc779e1d6d8a488092bdee378", + "sha256:4fe29a070de394e449fd88ebe1624d1e2d7ddeed4c12e0b31624561b58948d9a", + "sha256:55dd1cf09a1fd7c7b78425967aacae9b0d70125f7d3ab973fadc7b5abc3de652", + "sha256:5a3ecc026ea0e14d0ad7cd990ea7f48bfcb3eb4271034657dc9d06933c6629a7", + "sha256:5cfca31ab4c13552a0f354c87fbd7f162a4fafd25e6b521bba93a57fe6a3700a", + "sha256:66822d01e82506a19407d1afc104c3fcea3b81d5eb11485e593ad6b8492f995a", + "sha256:69e5ddc609230d4408277af135c5b5c8fe7a54b2bdb8ad7c5100b86b3aab04c6", + "sha256:6b6d4050b208c8ff886fd3db6690bf04f9a48749d78b41b7a5bf24c236ab0165", + "sha256:7a053bd4d65a3294b153bdd7724dce864a1d548416a5ef61f6d03bf149205160", + "sha256:82283af99c1c3a5ba1da44c67296d5aad19f11c535b551a5ae55328a317ce331", + "sha256:8782189c796eff29dbb37dd87afa4ad4d40fc90b2742704f94812851b725964b", + "sha256:8d79c6f468215d1a8415aa53d9868a6b40c4682165b8cb62a221b1baa47db458", + "sha256:97bda660702a856c2c9e12ec26fc6d187631ddfd896ff685814ab21ef0597033", + "sha256:a325ac71914c5c043fa50441b36606e64a10cd262de12f7a179620f579752ff8", + "sha256:a336a4f74baf67e26f3acc4d61c913e378e931817cd1e2ef4dfb79d3e051b481", + "sha256:a598d8830f6ef5501002ae85c7dbfcd9c27cc4efc02a1989369303ba85573e58", + "sha256:a5eaf3b42df2bcda61c53a742ee2c6e63f777d0e085bbc6b2ab7ed57deb13db7", + "sha256:aea7ce61328e15943d7b9eaca87e81f7c62ff90f669116f857262e9da4057ba3", + "sha256:af79d3fde1fc2e33561166d62e3b63f0cc3e47b5a3a2e5fea40d4917754734ea", + "sha256:c24f718f9dd73bb2b31a6201e6db5ea4a61fdd1d1c200f43ee585fc6dcd21b34", + "sha256:c5b0ff59785d93b3437c3703e3c64c178aabada51dea2a7f2c5eccf1bcf565a3", + "sha256:c7110ec1701b0bf8df569a7592a196c9d07c764a0a74f65471ea56816f10e2c8", + "sha256:c870193cce4b76713a2b29be5d8327c8ccbe0d4a49bc22968aa1e680930f5581", + "sha256:c9efef876c21788366ea1f50ecb39d5d6f65febe25ad1d4c0b8dff98843ac244", + "sha256:de344bcf6e2463bb25179d74d6e7989e375f906bcec8cb86edb8b12acbc7dfef", + "sha256:eb1b89b11256b5b6cad5e7593f9061ac4624f7651f7a8eb4dfa37caa1dfaa4d0", + "sha256:ed742214068efa95e9844c2d9129e209ed63f61baa4d54dbf4cf8b5e2d30ccf2", + "sha256:f401ed2bbb155e1ade150ccc63db1a4f6c1909d3d378f7d1235a44e90d75fb97", + "sha256:fb89397013cf302f282f0fc998bb7abf11d49dcff72c8ecb320f76ea6e2c5717" ], - "markers": "python_version >= '3.7'", - "version": "==9.0.1" + "version": "==9.1.0" }, "polyphemus": { - "git": "https://github.com/bellingcat/polyphemus.git", - "ref": "00a5123a3768a55ffe29f2c803a4181895f17890" + "git": "https://github.com/bellingcat/polyphemus.git" }, - "psycopg2-binary": { + "psycopg2": { "hashes": [ - "sha256:01310cf4cf26db9aea5158c217caa92d291f0500051a6469ac52166e1a16f5b7", - "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76", - "sha256:090f3348c0ab2cceb6dfbe6bf721ef61262ddf518cd6cc6ecc7d334996d64efa", - "sha256:0a29729145aaaf1ad8bafe663131890e2111f13416b60e460dae0a96af5905c9", - "sha256:0c9d5450c566c80c396b7402895c4369a410cab5a82707b11aee1e624da7d004", - "sha256:10bb90fb4d523a2aa67773d4ff2b833ec00857f5912bafcfd5f5414e45280fb1", - "sha256:12b11322ea00ad8db8c46f18b7dfc47ae215e4df55b46c67a94b4effbaec7094", - "sha256:152f09f57417b831418304c7f30d727dc83a12761627bb826951692cc6491e57", - "sha256:15803fa813ea05bef089fa78835118b5434204f3a17cb9f1e5dbfd0b9deea5af", - "sha256:15c4e4cfa45f5a60599d9cec5f46cd7b1b29d86a6390ec23e8eebaae84e64554", - "sha256:183a517a3a63503f70f808b58bfbf962f23d73b6dccddae5aa56152ef2bcb232", - "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c", - "sha256:1f6b813106a3abdf7b03640d36e24669234120c72e91d5cbaeb87c5f7c36c65b", - "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834", - "sha256:2d872e3c9d5d075a2e104540965a1cf898b52274a5923936e5bfddb58c59c7c2", - "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71", - "sha256:3303f8807f342641851578ee7ed1f3efc9802d00a6f83c101d21c608cb864460", - "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e", - "sha256:3a79d622f5206d695d7824cbf609a4f5b88ea6d6dab5f7c147fc6d333a8787e4", - "sha256:404224e5fef3b193f892abdbf8961ce20e0b6642886cfe1fe1923f41aaa75c9d", - "sha256:46f0e0a6b5fa5851bbd9ab1bc805eef362d3a230fbdfbc209f4a236d0a7a990d", - "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9", - "sha256:526ea0378246d9b080148f2d6681229f4b5964543c170dd10bf4faaab6e0d27f", - "sha256:53293533fcbb94c202b7c800a12c873cfe24599656b341f56e71dd2b557be063", - "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478", - "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092", - "sha256:63638d875be8c2784cfc952c9ac34e2b50e43f9f0a0660b65e2a87d656b3116c", - "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce", - "sha256:68641a34023d306be959101b345732360fc2ea4938982309b786f7be1b43a4a1", - "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65", - "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e", - "sha256:7af0dd86ddb2f8af5da57a976d27cd2cd15510518d582b478fbb2292428710b4", - "sha256:7b1e9b80afca7b7a386ef087db614faebbf8839b7f4db5eb107d0f1a53225029", - "sha256:874a52ecab70af13e899f7847b3e074eeb16ebac5615665db33bce8a1009cf33", - "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39", - "sha256:8b344adbb9a862de0c635f4f0425b7958bf5a4b927c8594e6e8d261775796d53", - "sha256:8fc53f9af09426a61db9ba357865c77f26076d48669f2e1bb24d85a22fb52307", - "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42", - "sha256:93cd1967a18aa0edd4b95b1dfd554cf15af657cb606280996d393dadc88c3c35", - "sha256:99485cab9ba0fa9b84f1f9e1fef106f44a46ef6afdeec8885e0b88d0772b49e8", - "sha256:9d29409b625a143649d03d0fd7b57e4b92e0ecad9726ba682244b73be91d2fdb", - "sha256:a29b3ca4ec9defec6d42bf5feb36bb5817ba3c0230dd83b4edf4bf02684cd0ae", - "sha256:a9e1f75f96ea388fbcef36c70640c4efbe4650658f3d6a2967b4cc70e907352e", - "sha256:accfe7e982411da3178ec690baaceaad3c278652998b2c45828aaac66cd8285f", - "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba", - "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24", - "sha256:b1c8068513f5b158cf7e29c43a77eb34b407db29aca749d3eb9293ee0d3103ca", - "sha256:bda845b664bb6c91446ca9609fc69f7db6c334ec5e4adc87571c34e4f47b7ddb", - "sha256:c381bda330ddf2fccbafab789d83ebc6c53db126e4383e73794c74eedce855ef", - "sha256:c3ae8e75eb7160851e59adc77b3a19a976e50622e44fd4fd47b8b18208189d42", - "sha256:d1c1b569ecafe3a69380a94e6ae09a4789bbb23666f3d3a08d06bbd2451f5ef1", - "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667", - "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272", - "sha256:e3699852e22aa68c10de06524a3721ade969abf382da95884e6a10ff798f9281", - "sha256:e847774f8ffd5b398a75bc1c18fbb56564cda3d629fe68fd81971fece2d3c67e", - "sha256:ffb7a888a047696e7f8240d649b43fb3644f14f0ee229077e7f6b9f9081635bd" + "sha256:06f32425949bd5fe8f625c49f17ebb9784e1e4fe928b7cce72edc36fb68e4c0c", + "sha256:0762c27d018edbcb2d34d51596e4346c983bd27c330218c56c4dc25ef7e819bf", + "sha256:083707a696e5e1c330af2508d8fab36f9700b26621ccbcb538abe22e15485362", + "sha256:34b33e0162cfcaad151f249c2649fd1030010c16f4bbc40a604c1cb77173dcf7", + "sha256:4295093a6ae3434d33ec6baab4ca5512a5082cc43c0505293087b8a46d108461", + "sha256:8cf3878353cc04b053822896bc4922b194792df9df2f1ad8da01fb3043602126", + "sha256:8e841d1bf3434da985cc5ef13e6f75c8981ced601fd70cc6bf33351b91562981", + "sha256:9572e08b50aed176ef6d66f15a21d823bb6f6d23152d35e8451d7d2d18fdac56", + "sha256:a81e3866f99382dfe8c15a151f1ca5fde5815fde879348fe5a9884a7c092a305", + "sha256:cb10d44e6694d763fa1078a26f7f6137d69f555a78ec85dc2ef716c37447e4b2", + "sha256:d3ca6421b942f60c008f81a3541e8faf6865a28d5a9b48544b0ee4f40cac7fca" ], "index": "pypi", "version": "==2.9.3" @@ -729,29 +546,18 @@ "sha256:f75009715dcf4a3d680c2338ab19dac5498f8121173a929872950f4fb3a48fbf", "sha256:f8524b8bc89470cec7ac51734907818d3620fb1637f8f8b542d650ebec42a126" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==3.14.1" }, "pyexiftool": { - "git": "https://github.com/smarnach/pyexiftool.git", - "ref": "3db3764895e687d75b42d3ae4e554ca8664a7f6f" + "git": "https://github.com/smarnach/pyexiftool.git" }, "pyparsing": { "hashes": [ "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea", "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484" ], - "markers": "python_version >= '3.6'", "version": "==3.0.7" }, - "pysocks": { - "hashes": [ - "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299", - "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", - "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0" - ], - "version": "==1.7.1" - }, "pytesseract": { "hashes": [ "sha256:7e2bafc7f48d1bb71443ce4633a56f5e21925a98f220a36c336297edcd1956d0", @@ -765,7 +571,6 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "pytz": { @@ -781,7 +586,6 @@ "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6", "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==0.1.0.post0" }, "ratelimit": { @@ -868,13 +672,9 @@ "sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d", "sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4" ], - "markers": "python_version >= '3.6'", "version": "==2022.3.2" }, "requests": { - "extras": [ - "socks" - ], "hashes": [ "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" @@ -887,7 +687,6 @@ "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5", "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.3.1" }, "rsa": { @@ -895,7 +694,6 @@ "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17", "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb" ], - "markers": "python_version >= '3.6' and python_version < '4'", "version": "==4.8" }, "s3transfer": { @@ -903,7 +701,6 @@ "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971", "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed" ], - "markers": "python_version >= '3.6'", "version": "==0.5.2" }, "six": { @@ -911,19 +708,16 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "snscrape": { - "git": "https://github.com/bellingcat/snscrape.git", - "ref": "d32c9add8a3691c81c9091dc1a7d079e9871379f" + "git": "https://github.com/bellingcat/snscrape" }, "soupsieve": { "hashes": [ "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb", "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9" ], - "markers": "python_version >= '3.6'", "version": "==2.3.1" }, "sqlalchemy": { @@ -994,18 +788,16 @@ }, "tzlocal": { "hashes": [ - "sha256:0f28015ac68a5c067210400a9197fc5d36ba9bc3f8eaf1da3cbd59acdfed9e09", - "sha256:28ba8d9fcb6c9a782d6e0078b4f6627af1ea26aeaa32b4eab5324abc7df4149f" + "sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745", + "sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7" ], - "markers": "python_version >= '3.6'", - "version": "==4.1" + "version": "==4.2" }, "urllib3": { "hashes": [ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==1.26.9" }, "websockets": { @@ -1059,7 +851,6 @@ "sha256:f8296b8408ec6853b26771599990721a26403e62b9de7e50ac0a056772ac0b5e", "sha256:fa35c5d1830d0fb7b810324e9eeab9aa92e8f273f11fdbdc0741dcded6d72b9f" ], - "markers": "python_version >= '3.7'", "version": "==10.2" }, "yt-dlp": { @@ -1084,7 +875,6 @@ "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4", "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==21.4.0" }, "babel": { @@ -1092,7 +882,6 @@ "sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9", "sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.9.1" }, "black": { @@ -1144,13 +933,9 @@ "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e", "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72" ], - "markers": "python_version >= '3.7'", "version": "==8.1.2" }, "coverage": { - "extras": [ - "toml" - ], "hashes": [ "sha256:03e2a7826086b91ef345ff18742ee9fc47a6839ccd517061ef8fa1976e652ce9", "sha256:07e6db90cd9686c767dcc593dff16c8c09f9814f5e9c51034066cad3373b914d", @@ -1194,7 +979,6 @@ "sha256:f9987b0354b06d4df0f4d3e0ec1ae76d7ce7cbca9a2f98c25041eb79eec766f1", "sha256:fd9e830e9d8d89b20ab1e5af09b32d33e1a08ef4c4e14411e559556fd788e6b2" ], - "markers": "python_version >= '3.7'", "version": "==6.3.2" }, "docutils": { @@ -1202,7 +986,6 @@ "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125", "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==0.17.1" }, "idna": { @@ -1218,7 +1001,6 @@ "sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c", "sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.3.0" }, "importlib-metadata": { @@ -1241,7 +1023,6 @@ "sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119", "sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9" ], - "markers": "python_version >= '3.7'", "version": "==3.1.1" }, "markupsafe": { @@ -1287,7 +1068,6 @@ "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" ], - "markers": "python_version >= '3.7'", "version": "==2.1.1" }, "mypy-extensions": { @@ -1302,7 +1082,6 @@ "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" ], - "markers": "python_version >= '3.6'", "version": "==21.3" }, "pathspec": { @@ -1317,7 +1096,6 @@ "sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d", "sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227" ], - "markers": "python_version >= '3.7'", "version": "==2.5.1" }, "pluggy": { @@ -1325,7 +1103,6 @@ "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" ], - "markers": "python_version >= '3.6'", "version": "==1.0.0" }, "py": { @@ -1333,7 +1110,6 @@ "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==1.11.0" }, "pygments": { @@ -1341,7 +1117,6 @@ "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65", "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a" ], - "markers": "python_version >= '3.5'", "version": "==2.11.2" }, "pyparsing": { @@ -1349,7 +1124,6 @@ "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea", "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484" ], - "markers": "python_version >= '3.6'", "version": "==3.0.7" }, "pytest": { @@ -1393,9 +1167,6 @@ "version": "==2022.1" }, "requests": { - "extras": [ - "socks" - ], "hashes": [ "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" @@ -1431,7 +1202,6 @@ "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a", "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58" ], - "markers": "python_version >= '3.5'", "version": "==1.0.2" }, "sphinxcontrib-devhelp": { @@ -1439,7 +1209,6 @@ "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4" ], - "markers": "python_version >= '3.5'", "version": "==1.0.2" }, "sphinxcontrib-htmlhelp": { @@ -1447,7 +1216,6 @@ "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07", "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2" ], - "markers": "python_version >= '3.6'", "version": "==2.0.0" }, "sphinxcontrib-jsmath": { @@ -1455,7 +1223,6 @@ "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" ], - "markers": "python_version >= '3.5'", "version": "==1.0.1" }, "sphinxcontrib-qthelp": { @@ -1463,7 +1230,6 @@ "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6" ], - "markers": "python_version >= '3.5'", "version": "==1.0.3" }, "sphinxcontrib-serializinghtml": { @@ -1471,7 +1237,6 @@ "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd", "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952" ], - "markers": "python_version >= '3.5'", "version": "==1.1.5" }, "tomli": { @@ -1479,7 +1244,6 @@ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_full_version < '3.11.0'", "version": "==2.0.1" }, "typing-extensions": { @@ -1495,7 +1259,6 @@ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==1.26.9" }, "zipp": { @@ -1503,7 +1266,6 @@ "sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d", "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375" ], - "markers": "python_version >= '3.7'", "version": "==3.7.0" } } diff --git a/app.py b/app.py index 160dd25..c8db5cc 100644 --- a/app.py +++ b/app.py @@ -128,7 +128,7 @@ def init_db(): if __name__ == "__main__": logger.remove() logger.add(sys.stdout, level="DEBUG", catch=True) - logger.add("./test.log", level="TRACE") + logger.add("logs/cisticola.log", level="TRACE", rotation="100 MB") parser = argparse.ArgumentParser(description="Cisticola command line tools") parser.add_argument( diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index a71bd67..5c8e4be 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -403,9 +403,6 @@ class ScraperController: session.commit() added += 1 - if added >= 200: - break - session.commit() logger.info( f"{scraper} found {added} new posts from {channel}") @@ -507,4 +504,4 @@ class ScraperController: self.connect_to_db(self.engine) class ChannelDoesNotExistError(Exception): - """The specified channel does not exist or has been deleted.""" \ No newline at end of file + """The specified channel does not exist or has been deleted."""