Merge pull request #51 from bellingcat/channel-info

Channel info
This commit is contained in:
Logan Williams
2022-04-13 10:11:21 +02:00
committed by GitHub
13 changed files with 14 additions and 1 deletions

2
.gitignore vendored
View File

@@ -9,9 +9,11 @@ docs/source/_*
*.db
.env
*.session
*.session-journal
service_account.json
.vscode/
*.log
*.lock
# Unit test / coverage reports
reports

View File

@@ -429,7 +429,7 @@ class ScraperController:
# this query is really slow (~2.5 minutes) because of the shuffle. shuffling is so that multiple media archivers could work
# simultaneously with low risk of collision (at least while the number of unarchived items is very large)
posts = session.query(ScraperResult).where(ScraperResult.media_archived == None).order_by(func.random()).limit(10000).all()
posts = session.query(ScraperResult).where(ScraperResult.media_archived == None).order_by(func.random()).limit(4000).all()
logger.info(f"Found {len(posts)} posts without media. Archiving now")

View File

@@ -70,6 +70,7 @@ class BitchuteScraper(Scraper):
if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None:
return True
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
base_url = channel.url

View File

@@ -89,6 +89,7 @@ class GabScraper(Scraper):
if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None:
return True
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
client = Client(

View File

@@ -72,6 +72,7 @@ class GettrScraper(Scraper):
key = urlparse(url).path.split('/')[-2] + ext
return key
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
client = PublicClient()
username = self.get_username_from_url(channel.url)

View File

@@ -91,6 +91,7 @@ class InstagramScraper(Scraper):
if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None:
return True
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
username = self.get_username_from_url(channel.url)

View File

@@ -105,6 +105,7 @@ class OdyseeScraper(Scraper):
return f'{key}.{ext}'
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
username = self.get_username_from_url(channel.url)

View File

@@ -69,6 +69,7 @@ class RumbleScraper(Scraper):
if channel.platform == "Rumble" and channel.url is not None:
return True
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
profile = get_channel_profile(url = channel.url)

View File

@@ -55,6 +55,7 @@ class TelegramSnscrapeScraper(Scraper):
media_archived=datetime.now(timezone.utc) if archive_media else None
)
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
scr = snscrape.modules.telegram.TelegramChannelScraper(

View File

@@ -149,6 +149,7 @@ class TelegramTelethonScraper(Scraper):
archived_urls=archived_urls,
media_archived=datetime.now(timezone.utc) if archive_media else None)
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
username = channel.screenname
if username is None:

View File

@@ -97,6 +97,7 @@ class TwitterScraper(Scraper):
key = parsed_url.path.split('/')[-1] + ext
return key
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
scraper = TwitterUserScraper(channel.screenname)

View File

@@ -103,6 +103,7 @@ class VkontakteScraper(Scraper):
return key
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
username = self.get_username_from_url(channel.url)

View File

@@ -138,6 +138,7 @@ class YoutubeScraper(Scraper):
result.media_archived = datetime.now(timezone.utc)
return result
@logger.catch
def get_profile(self, channel: Channel) -> RawChannelInfo:
ydl_opts = {