mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-13 05:48:33 +03:00
Use user id for TwitterScraper
This commit is contained in:
@@ -10,18 +10,8 @@ class TwitterScraper(cisticola.scraper.base.Scraper):
|
|||||||
"""An implementation of a Scraper for Twitter, using snscrape library"""
|
"""An implementation of a Scraper for Twitter, using snscrape library"""
|
||||||
__version__ = "TwitterScraper 0.0.1"
|
__version__ = "TwitterScraper 0.0.1"
|
||||||
|
|
||||||
# TODO snscrape should be able to scrape from user ID alone, but there is
|
|
||||||
# currently a bug/other issue, so it is extracting the username from URL
|
|
||||||
def get_username_from_url(url):
|
|
||||||
username = url.split("twitter.com/")[1]
|
|
||||||
if len(username.split("/")) > 1:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return username
|
|
||||||
|
|
||||||
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]:
|
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]:
|
||||||
scraper = snscrape.modules.twitter.TwitterProfileScraper(
|
scraper = snscrape.modules.twitter.TwitterProfileScraper(channel.platform_id)
|
||||||
TwitterScraper.get_username_from_url(channel.url))
|
|
||||||
|
|
||||||
first = True
|
first = True
|
||||||
|
|
||||||
@@ -66,5 +56,5 @@ class TwitterScraper(cisticola.scraper.base.Scraper):
|
|||||||
archived_urls=archived_urls)
|
archived_urls=archived_urls)
|
||||||
|
|
||||||
def can_handle(self, channel):
|
def can_handle(self, channel):
|
||||||
if channel.platform == "Twitter" and TwitterScraper.get_username_from_url(channel.url) is not None:
|
if channel.platform == "Twitter" and channel.platform_id:
|
||||||
return True
|
return True
|
||||||
|
|||||||
Reference in New Issue
Block a user