mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-11 21:08:34 +03:00
2
.gitignore
vendored
2
.gitignore
vendored
@@ -9,9 +9,11 @@ docs/source/_*
|
||||
*.db
|
||||
.env
|
||||
*.session
|
||||
*.session-journal
|
||||
service_account.json
|
||||
.vscode/
|
||||
*.log
|
||||
*.lock
|
||||
|
||||
# Unit test / coverage reports
|
||||
reports
|
||||
|
||||
@@ -429,7 +429,7 @@ class ScraperController:
|
||||
|
||||
# this query is really slow (~2.5 minutes) because of the shuffle. shuffling is so that multiple media archivers could work
|
||||
# simultaneously with low risk of collision (at least while the number of unarchived items is very large)
|
||||
posts = session.query(ScraperResult).where(ScraperResult.media_archived == None).order_by(func.random()).limit(10000).all()
|
||||
posts = session.query(ScraperResult).where(ScraperResult.media_archived == None).order_by(func.random()).limit(4000).all()
|
||||
|
||||
logger.info(f"Found {len(posts)} posts without media. Archiving now")
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ class BitchuteScraper(Scraper):
|
||||
if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
base_url = channel.url
|
||||
|
||||
@@ -89,6 +89,7 @@ class GabScraper(Scraper):
|
||||
if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
client = Client(
|
||||
|
||||
@@ -72,6 +72,7 @@ class GettrScraper(Scraper):
|
||||
key = urlparse(url).path.split('/')[-2] + ext
|
||||
return key
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
client = PublicClient()
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
@@ -91,6 +91,7 @@ class InstagramScraper(Scraper):
|
||||
if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
@@ -105,6 +105,7 @@ class OdyseeScraper(Scraper):
|
||||
|
||||
return f'{key}.{ext}'
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
@@ -69,6 +69,7 @@ class RumbleScraper(Scraper):
|
||||
if channel.platform == "Rumble" and channel.url is not None:
|
||||
return True
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
profile = get_channel_profile(url = channel.url)
|
||||
|
||||
@@ -55,6 +55,7 @@ class TelegramSnscrapeScraper(Scraper):
|
||||
media_archived=datetime.now(timezone.utc) if archive_media else None
|
||||
)
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
scr = snscrape.modules.telegram.TelegramChannelScraper(
|
||||
|
||||
@@ -149,6 +149,7 @@ class TelegramTelethonScraper(Scraper):
|
||||
archived_urls=archived_urls,
|
||||
media_archived=datetime.now(timezone.utc) if archive_media else None)
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
username = channel.screenname
|
||||
if username is None:
|
||||
|
||||
@@ -97,6 +97,7 @@ class TwitterScraper(Scraper):
|
||||
key = parsed_url.path.split('/')[-1] + ext
|
||||
return key
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
scraper = TwitterUserScraper(channel.screenname)
|
||||
|
||||
@@ -103,6 +103,7 @@ class VkontakteScraper(Scraper):
|
||||
|
||||
return key
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
@@ -138,6 +138,7 @@ class YoutubeScraper(Scraper):
|
||||
result.media_archived = datetime.now(timezone.utc)
|
||||
return result
|
||||
|
||||
@logger.catch
|
||||
def get_profile(self, channel: Channel) -> RawChannelInfo:
|
||||
|
||||
ydl_opts = {
|
||||
|
||||
Reference in New Issue
Block a user