From d91f971f51c1b7b51344ee1449a4fc9abe8c9bf8 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 21 Sep 2021 19:39:40 +0000 Subject: [PATCH] Refactor user label implementation and add support for bot accounts Closes #281 --- snscrape/modules/twitter.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index c27e17e..591909f 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -136,8 +136,7 @@ class User(snscrape.base.Entity): linkTcourl: typing.Optional[str] = None profileImageUrl: typing.Optional[str] = None profileBannerUrl: typing.Optional[str] = None - label: typing.Optional[str] = None - labelUrl: typing.Optional[str] = None + label: typing.Optional['UserLabel'] = None @property def url(self): @@ -147,6 +146,14 @@ class User(snscrape.base.Entity): return self.url +@dataclasses.dataclass +class UserLabel: + description: str + url: typing.Optional[str] = None + badgeUrl: typing.Optional[str] = None + longDescription: typing.Optional[str] = None + + class ScrollDirection(enum.Enum): TOP = enum.auto() BOTTOM = enum.auto() @@ -459,11 +466,21 @@ class TwitterAPIScraper(snscrape.base.Scraper): kwargs['linkTcourl'] = user.get('url') kwargs['profileImageUrl'] = user['profile_image_url_https'] kwargs['profileBannerUrl'] = user.get('profile_banner_url') - if 'ext' in user and 'label' in user['ext']['highlightedLabel']['r']['ok']: - kwargs['label'] = user['ext']['highlightedLabel']['r']['ok']['label']['description'] - kwargs['labelUrl'] = user['ext']['highlightedLabel']['r']['ok']['label']['url']['url'] + if 'ext' in user and (label := user['ext']['highlightedLabel']['r']['ok'].get('label')): + kwargs['label'] = self._user_label_to_user_label(label) return User(**kwargs) + def _user_label_to_user_label(self, label): + labelKwargs = {} + labelKwargs['description'] = label['description'] + if 'url' in label and 'url' in label['url']: + labelKwargs['url'] = label['url']['url'] + if 'badge' in label and 'url' in label['badge']: + labelKwargs['badgeUrl'] = label['badge']['url'] + if 'longDescription' in label and 'text' in label['longDescription']: + labelKwargs['longDescription'] = label['longDescription']['text'] + return UserLabel(**labelKwargs) + class TwitterSearchScraper(TwitterAPIScraper): name = 'twitter-search' @@ -563,6 +580,9 @@ class TwitterUserScraper(TwitterSearchScraper): user = obj['data']['user'] rawDescription = user['legacy']['description'] description = self._render_text_with_urls(rawDescription, user['legacy']['entities']['description']['urls']) + label = None + if (labelO := user['affiliates_highlighted_label'].get('label')): + label = self._user_label_to_user_label(labelO) return User( username = user['legacy']['screen_name'], id = user['rest_id'], @@ -584,8 +604,7 @@ class TwitterUserScraper(TwitterSearchScraper): linkTcourl = user['legacy'].get('url'), profileImageUrl = user['legacy']['profile_image_url_https'], profileBannerUrl = user['legacy'].get('profile_banner_url'), - label = user['affiliates_highlighted_label']['label']['description'] if user['affiliates_highlighted_label'] else None, - labelUrl = user['affiliates_highlighted_label']['label']['url']['url'] if user['affiliates_highlighted_label'] else None, + label = label, ) def get_items(self):