From 209152ea69d4bead3dc3dd3b1a62638983fecc76 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Tue, 12 Apr 2022 18:13:52 +0200 Subject: [PATCH 1/2] Synchronize channels that have changed info --- app.py | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/app.py b/app.py index f48a804..f03d3ff 100644 --- a/app.py +++ b/app.py @@ -32,24 +32,25 @@ def sync_channels(args): row = 2 for c in channels: - # only adding channels, so skip everything with an ID - if c["id"] == "": + if c["public"] == "": + c["public"] = False + if c["chat"] == "": + c["chat"] = False + + for k in c.keys(): + if c[k] == "TRUE" or c[k] == "yes": + c[k] = True + if c[k] == "FALSE" or c[k] == "no": + c[k] = False + + if c[k] == "": + c[k] = None + + del c["followers"] + + # add new channel + if c["id"] == "" or c["id"] is None: del c["id"] - del c["followers"] - - if c["public"] == "": - c["public"] = False - if c["chat"] == "": - c["chat"] = False - - for k in c.keys(): - if c[k] == "TRUE" or c[k] == "yes": - c[k] = True - if c[k] == "FALSE" or c[k] == "no": - c[k] = False - - if c[k] == "": - c[k] = None # check to see if this already exists, platform_id = None @@ -73,6 +74,23 @@ def sync_channels(args): wks.update_cell(row, 1, channel.id) time.sleep(1) + else: + channel = session.query(Channel).filter_by(id=int(c["id"])).first() + + logger.info(f"Updating channel {channel}") + channel.name = c["name"] + channel.category = c["category"] + channel.platform = c["platform"] + channel.url = c["url"] + channel.screenname = c["screenname"] + channel.country = c["country"] + channel.influencer = c["influencer"] + channel.public = c["public"] + channel.chat = c["chat"] + channel.notes = c["notes"] + + session.flush() + session.commit() row += 1 From a0dbe7d92b8f1eb05a005cc88d6c5a6163fe4c06 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Wed, 13 Apr 2022 10:10:29 +0200 Subject: [PATCH 2/2] Catch errors in channel info --- cisticola/scraper/bitchute.py | 1 + cisticola/scraper/gab.py | 1 + cisticola/scraper/gettr.py | 1 + cisticola/scraper/instagram.py | 1 + cisticola/scraper/odysee.py | 1 + cisticola/scraper/rumble.py | 1 + cisticola/scraper/telegram_snscrape.py | 1 + cisticola/scraper/telegram_telethon.py | 1 + cisticola/scraper/twitter.py | 1 + cisticola/scraper/vkontakte.py | 1 + cisticola/scraper/youtube.py | 1 + 11 files changed, 11 insertions(+) diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index 1271dfb..c0cedc9 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -70,6 +70,7 @@ class BitchuteScraper(Scraper): if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None: return True + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: base_url = channel.url diff --git a/cisticola/scraper/gab.py b/cisticola/scraper/gab.py index 4a0fb51..5602489 100644 --- a/cisticola/scraper/gab.py +++ b/cisticola/scraper/gab.py @@ -89,6 +89,7 @@ class GabScraper(Scraper): if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None: return True + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: client = Client( diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py index 6f28a75..1fa0599 100644 --- a/cisticola/scraper/gettr.py +++ b/cisticola/scraper/gettr.py @@ -72,6 +72,7 @@ class GettrScraper(Scraper): key = urlparse(url).path.split('/')[-2] + ext return key + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: client = PublicClient() username = self.get_username_from_url(channel.url) diff --git a/cisticola/scraper/instagram.py b/cisticola/scraper/instagram.py index 3eab56c..3eca83e 100644 --- a/cisticola/scraper/instagram.py +++ b/cisticola/scraper/instagram.py @@ -91,6 +91,7 @@ class InstagramScraper(Scraper): if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None: return True + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: username = self.get_username_from_url(channel.url) diff --git a/cisticola/scraper/odysee.py b/cisticola/scraper/odysee.py index 0823e8e..5bc2b6e 100644 --- a/cisticola/scraper/odysee.py +++ b/cisticola/scraper/odysee.py @@ -105,6 +105,7 @@ class OdyseeScraper(Scraper): return f'{key}.{ext}' + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: username = self.get_username_from_url(channel.url) diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index 4fba420..0c7177f 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -69,6 +69,7 @@ class RumbleScraper(Scraper): if channel.platform == "Rumble" and channel.url is not None: return True + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: profile = get_channel_profile(url = channel.url) diff --git a/cisticola/scraper/telegram_snscrape.py b/cisticola/scraper/telegram_snscrape.py index 4dd3f44..c31f458 100644 --- a/cisticola/scraper/telegram_snscrape.py +++ b/cisticola/scraper/telegram_snscrape.py @@ -55,6 +55,7 @@ class TelegramSnscrapeScraper(Scraper): media_archived=datetime.now(timezone.utc) if archive_media else None ) + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: scr = snscrape.modules.telegram.TelegramChannelScraper( diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py index 426ab4e..03ac0d6 100644 --- a/cisticola/scraper/telegram_telethon.py +++ b/cisticola/scraper/telegram_telethon.py @@ -149,6 +149,7 @@ class TelegramTelethonScraper(Scraper): archived_urls=archived_urls, media_archived=datetime.now(timezone.utc) if archive_media else None) + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: username = channel.screenname if username is None: diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py index ebbdb95..3ccef23 100644 --- a/cisticola/scraper/twitter.py +++ b/cisticola/scraper/twitter.py @@ -97,6 +97,7 @@ class TwitterScraper(Scraper): key = parsed_url.path.split('/')[-1] + ext return key + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: scraper = TwitterUserScraper(channel.screenname) diff --git a/cisticola/scraper/vkontakte.py b/cisticola/scraper/vkontakte.py index cf427ba..3ef1648 100644 --- a/cisticola/scraper/vkontakte.py +++ b/cisticola/scraper/vkontakte.py @@ -103,6 +103,7 @@ class VkontakteScraper(Scraper): return key + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: username = self.get_username_from_url(channel.url) diff --git a/cisticola/scraper/youtube.py b/cisticola/scraper/youtube.py index 40a122d..f1d8455 100644 --- a/cisticola/scraper/youtube.py +++ b/cisticola/scraper/youtube.py @@ -138,6 +138,7 @@ class YoutubeScraper(Scraper): result.media_archived = datetime.now(timezone.utc) return result + @logger.catch def get_profile(self, channel: Channel) -> RawChannelInfo: ydl_opts = {