From 456d592792e3dbb2bda8a3b418bcb94985fd01b7 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Thu, 24 Feb 2022 20:24:03 +0100 Subject: [PATCH] Use user id for TwitterScraper --- cisticola/scraper/twitter.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py index 41287ab..4793b49 100644 --- a/cisticola/scraper/twitter.py +++ b/cisticola/scraper/twitter.py @@ -10,18 +10,8 @@ class TwitterScraper(cisticola.scraper.base.Scraper): """An implementation of a Scraper for Twitter, using snscrape library""" __version__ = "TwitterScraper 0.0.1" - # TODO snscrape should be able to scrape from user ID alone, but there is - # currently a bug/other issue, so it is extracting the username from URL - def get_username_from_url(url): - username = url.split("twitter.com/")[1] - if len(username.split("/")) > 1: - return None - - return username - def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]: - scraper = snscrape.modules.twitter.TwitterProfileScraper( - TwitterScraper.get_username_from_url(channel.url)) + scraper = snscrape.modules.twitter.TwitterProfileScraper(channel.platform_id) first = True @@ -66,5 +56,5 @@ class TwitterScraper(cisticola.scraper.base.Scraper): archived_urls=archived_urls) def can_handle(self, channel): - if channel.platform == "Twitter" and TwitterScraper.get_username_from_url(channel.url) is not None: + if channel.platform == "Twitter" and channel.platform_id: return True