From 25ee014e2970737e500e2964ab09584b11b19fb0 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Wed, 16 Feb 2022 02:59:21 +0000 Subject: [PATCH] Extract cards --- snscrape/modules/twitter.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 68b0633..ec5632f 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -68,6 +68,7 @@ class Tweet(snscrape.base.Item): place: typing.Optional['Place'] = None hashtags: typing.Optional[typing.List[str]] = None cashtags: typing.Optional[typing.List[str]] = None + card: typing.Optional['Card'] = None username = snscrape.base._DeprecatedProperty('username', lambda self: self.user.username, 'user.username') outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks) if self.outlinks else '', 'outlinks') @@ -131,6 +132,14 @@ class Place: countryCode: str +@dataclasses.dataclass +class Card: + title: str + description: str + url: str + thumbnailUrl: str + + @dataclasses.dataclass class TweetRef(snscrape.base.Item): '''A reference to a tweet for which no proper Tweet object could be produced from the data returned by Twitter''' @@ -496,7 +505,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper): raise snscrape.base.ScraperException(f'Unable to handle entry {entryId!r}') yield self._tweet_to_tweet(tweet, obj) - def _make_tweet(self, tweet, user, retweetedTweet = None, quotedTweet = None): + def _make_tweet(self, tweet, user, retweetedTweet = None, quotedTweet = None, card = None): kwargs = {} kwargs['id'] = tweet['id'] if 'id' in tweet else int(tweet['id_str']) kwargs['content'] = tweet['full_text'] @@ -589,8 +598,27 @@ class _TwitterAPIScraper(snscrape.base.Scraper): kwargs['hashtags'] = [o['text'] for o in tweet['entities']['hashtags']] if tweet['entities'].get('symbols'): kwargs['cashtags'] = [o['text'] for o in tweet['entities']['symbols']] + if card: + kwargs['card'] = card return Tweet(**kwargs) + def _make_card(self, card, apiType): + cardKwargs = {} + for key, kwarg in [('title', 'title'), ('description', 'description'), ('card_url', 'url'), ('thumbnail_image_original', 'thumbnailUrl')]: + if apiType is _TwitterAPIType.V2: + value = card['binding_values'].get(key) + elif apiType is _TwitterAPIType.GRAPHQL: + value = next((o['value'] for o in card['legacy']['binding_values'] if o['key'] == key), None) + if not value: + continue + if value['type'] == 'STRING': + cardKwargs[kwarg] = value['string_value'] + elif value['type'] == 'IMAGE': + cardKwargs[kwarg] = value['image_value']['url'] + else: + raise snscrape.base.ScraperError(f'Unknown card value type: {value["type"]!r}') + return Card(**cardKwargs) + def _tweet_to_tweet(self, tweet, obj): user = self._user_to_user(obj['globalObjects']['users'][tweet['user_id_str']]) kwargs = {} @@ -598,6 +626,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper): kwargs['retweetedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['retweeted_status_id_str']], obj) if 'quoted_status_id_str' in tweet and tweet['quoted_status_id_str'] in obj['globalObjects']['tweets']: kwargs['quotedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['quoted_status_id_str']], obj) + if 'card' in tweet: + kwargs['card'] = self._make_card(tweet['card'], _TwitterAPIType.V2) return self._make_tweet(tweet, user, **kwargs) def _graphql_timeline_tweet_item_result_to_tweet(self, result): @@ -624,6 +654,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper): kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str'])) else: kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id'])) + if 'card' in result: + kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL) return self._make_tweet(tweet, user, **kwargs) def _graphql_timeline_instructions_to_tweets(self, instructions, includeConversationThreads = False):