mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-13 04:48:28 +03:00
Add Tweet.{inReplyToTweetId,inReplyToUser}
This makes User.displayname optional because the replied-to user is not always present in the user mentions.
This commit is contained in:
@@ -42,6 +42,8 @@ class Tweet(snscrape.base.Item):
|
|||||||
media: typing.Optional[typing.List['Medium']] = None
|
media: typing.Optional[typing.List['Medium']] = None
|
||||||
retweetedTweet: typing.Optional['Tweet'] = None
|
retweetedTweet: typing.Optional['Tweet'] = None
|
||||||
quotedTweet: typing.Optional['Tweet'] = None
|
quotedTweet: typing.Optional['Tweet'] = None
|
||||||
|
inReplyToTweetId: typing.Optional[int] = None
|
||||||
|
inReplyToUser: typing.Optional['User'] = None
|
||||||
mentionedUsers: typing.Optional[typing.List['User']] = None
|
mentionedUsers: typing.Optional[typing.List['User']] = None
|
||||||
coordinates: typing.Optional['Coordinates'] = None
|
coordinates: typing.Optional['Coordinates'] = None
|
||||||
place: typing.Optional['Place'] = None
|
place: typing.Optional['Place'] = None
|
||||||
@@ -112,8 +114,8 @@ class User(snscrape.base.Entity):
|
|||||||
# Most fields can be None if they're not known.
|
# Most fields can be None if they're not known.
|
||||||
|
|
||||||
username: str
|
username: str
|
||||||
displayname: str
|
|
||||||
id: int
|
id: int
|
||||||
|
displayname: typing.Optional[str] = None
|
||||||
description: typing.Optional[str] = None # Description as it's displayed on the web interface with URLs replaced
|
description: typing.Optional[str] = None # Description as it's displayed on the web interface with URLs replaced
|
||||||
rawDescription: typing.Optional[str] = None # Raw description with the URL(s) intact
|
rawDescription: typing.Optional[str] = None # Raw description with the URL(s) intact
|
||||||
descriptionUrls: typing.Optional[typing.List[DescriptionURL]] = None
|
descriptionUrls: typing.Optional[typing.List[DescriptionURL]] = None
|
||||||
@@ -368,8 +370,19 @@ class TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
kwargs['retweetedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['retweeted_status_id_str']], obj) if 'retweeted_status_id_str' in tweet else None
|
kwargs['retweetedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['retweeted_status_id_str']], obj) if 'retweeted_status_id_str' in tweet else None
|
||||||
if 'quoted_status_id_str' in tweet and tweet['quoted_status_id_str'] in obj['globalObjects']['tweets']:
|
if 'quoted_status_id_str' in tweet and tweet['quoted_status_id_str'] in obj['globalObjects']['tweets']:
|
||||||
kwargs['quotedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['quoted_status_id_str']], obj)
|
kwargs['quotedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['quoted_status_id_str']], obj)
|
||||||
|
if (inReplyToTweetId := tweet.get('in_reply_to_status_id_str')):
|
||||||
|
kwargs['inReplyToTweetId'] = int(inReplyToTweetId)
|
||||||
|
inReplyToUserId = int(tweet['in_reply_to_user_id_str'])
|
||||||
|
if inReplyToUserId == kwargs['user'].id:
|
||||||
|
kwargs['inReplyToUser'] = kwargs['user']
|
||||||
|
elif tweet['entities'].get('user_mentions'):
|
||||||
|
for u in tweet['entities']['user_mentions']:
|
||||||
|
if u['id_str'] == tweet['in_reply_to_user_id_str']:
|
||||||
|
kwargs['inReplyToUser'] = User(username = u['screen_name'], id = u['id'] if 'id' in u else int(u['id_str']), displayname = u['name'])
|
||||||
|
if 'inReplyToUser' not in kwargs:
|
||||||
|
kwargs['inReplyToUser'] = User(username = tweet['in_reply_to_screen_name'], id = inReplyToUserId)
|
||||||
kwargs['mentionedUsers'] = [
|
kwargs['mentionedUsers'] = [
|
||||||
User(username = u['screen_name'], displayname = u['name'], id = u['id'] if 'id' in u else int(u['id_str'])) \
|
User(username = u['screen_name'], id = u['id'] if 'id' in u else int(u['id_str']), displayname = u['name']) \
|
||||||
for u in tweet['entities']['user_mentions']
|
for u in tweet['entities']['user_mentions']
|
||||||
] if 'user_mentions' in tweet['entities'] and tweet['entities']['user_mentions'] else None
|
] if 'user_mentions' in tweet['entities'] and tweet['entities']['user_mentions'] else None
|
||||||
|
|
||||||
@@ -405,8 +418,8 @@ class TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
def _user_to_user(self, user):
|
def _user_to_user(self, user):
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
kwargs['username'] = user['screen_name']
|
kwargs['username'] = user['screen_name']
|
||||||
kwargs['displayname'] = user['name']
|
|
||||||
kwargs['id'] = user['id'] if 'id' in user else int(user['id_str'])
|
kwargs['id'] = user['id'] if 'id' in user else int(user['id_str'])
|
||||||
|
kwargs['displayname'] = user['name']
|
||||||
kwargs['description'] = self._render_text_with_urls(user['description'], user['entities']['description'].get('urls'))
|
kwargs['description'] = self._render_text_with_urls(user['description'], user['entities']['description'].get('urls'))
|
||||||
kwargs['rawDescription'] = user['description']
|
kwargs['rawDescription'] = user['description']
|
||||||
kwargs['descriptionUrls'] = [{'text': x.get('display_url'), 'url': x['expanded_url'], 'tcourl': x['url'], 'indices': tuple(x['indices'])} for x in user['entities']['description'].get('urls', [])]
|
kwargs['descriptionUrls'] = [{'text': x.get('display_url'), 'url': x['expanded_url'], 'tcourl': x['url'], 'indices': tuple(x['indices'])} for x in user['entities']['description'].get('urls', [])]
|
||||||
@@ -521,8 +534,8 @@ class TwitterUserScraper(TwitterSearchScraper):
|
|||||||
description = self._render_text_with_urls(rawDescription, user['legacy']['entities']['description']['urls'])
|
description = self._render_text_with_urls(rawDescription, user['legacy']['entities']['description']['urls'])
|
||||||
return User(
|
return User(
|
||||||
username = user['legacy']['screen_name'],
|
username = user['legacy']['screen_name'],
|
||||||
displayname = user['legacy']['name'],
|
|
||||||
id = user['rest_id'],
|
id = user['rest_id'],
|
||||||
|
displayname = user['legacy']['name'],
|
||||||
description = description,
|
description = description,
|
||||||
rawDescription = rawDescription,
|
rawDescription = rawDescription,
|
||||||
descriptionUrls = [{'text': x.get('display_url'), 'url': x['expanded_url'], 'tcourl': x['url'], 'indices': tuple(x['indices'])} for x in user['legacy']['entities']['description']['urls']],
|
descriptionUrls = [{'text': x.get('display_url'), 'url': x['expanded_url'], 'tcourl': x['url'], 'indices': tuple(x['indices'])} for x in user['legacy']['entities']['description']['urls']],
|
||||||
|
|||||||
Reference in New Issue
Block a user