diff --git a/README.md b/README.md index 0b0ae34..7a196bf 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The following services are currently supported: * Instagram: user profiles, hashtags, and locations * Reddit: users, subreddits, and searches (via Pushshift) * Telegram: channels -* Twitter: users, user profiles, hashtags, searches, threads, and lists (members as well as posts) +* Twitter: users, user profiles, hashtags, searches, threads, and list posts * VKontakte: user profiles * Weibo (Sina Weibo): user profiles diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 676b9a2..64fa99e 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -81,8 +81,7 @@ class DescriptionURL(typing.NamedTuple): indices: typing.Tuple[int, int] -class User(typing.NamedTuple, snscrape.base.Item, snscrape.base.Entity): - # This is both an Item and an Entity since it can be returned as TwitterUserScraper's entity as well as TwitterListMembersScraper's items. +class User(typing.NamedTuple, snscrape.base.Entity): # Most fields can be None if they're not known. username: str @@ -631,56 +630,3 @@ class TwitterListPostsScraper(TwitterSearchScraper): @classmethod def from_args(cls, args): return cls(args.list, retries = args.retries) - - -class TwitterListMembersScraper(TwitterOldDesignScraper): - name = 'twitter-list-members' - - def __init__(self, listName, **kwargs): - super().__init__(**kwargs) - self._user, self._list = listName.split('/') - - def get_items(self): - headers = {'User-Agent': f'Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'} - - baseUrl = f'https://twitter.com/{self._user}/lists/{self._list}/members' - r = self._get(baseUrl, headers = headers) - if r.status_code != 200: - logger.warning('List not found') - return - soup = bs4.BeautifulSoup(r.text, 'lxml') - container = soup.find('div', 'stream-container') - if not container: - raise snscrape.base.ScraperException('Unable to find container') - items = container.find_all('li', 'js-stream-item') - if not items: - logger.warning('Empty list') - return - for item in items: - yield User(username = item.find('div', 'account')['data-screen-name']) - - if not container.has_attr('data-min-position') or container['data-min-position'] == '': - return - maxPosition = container['data-min-position'] - while True: - r = self._get( - f'{baseUrl}/timeline?include_available_features=1&include_entities=1&max_position={maxPosition}&reset_error_state=false', - headers = headers, - responseOkCallback = self._check_json_callback - ) - obj = json.loads(r.text) - soup = bs4.BeautifulSoup(obj['items_html'], 'lxml') - items = soup.find_all('li', 'js-stream-item') - for item in items: - yield User(username = item.find('div', 'account')['data-screen-name']) - if not obj['has_more_items']: - break - maxPosition = obj['min_position'] - - @classmethod - def setup_parser(cls, subparser): - subparser.add_argument('list', help = 'A Twitter list, formatted as "username/listname"') - - @classmethod - def from_args(cls, args): - return cls(args.list, retries = args.retries)