mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-12 20:38:29 +03:00
@@ -14,6 +14,7 @@ __all__ = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
import base64
|
||||||
import collections
|
import collections
|
||||||
import copy
|
import copy
|
||||||
import dataclasses
|
import dataclasses
|
||||||
@@ -515,6 +516,27 @@ class UserLabel:
|
|||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class UserRef:
|
class UserRef:
|
||||||
id: int
|
id: int
|
||||||
|
text: typing.Optional[str] = None
|
||||||
|
textLinks: typing.Optional[typing.List[TextLink]] = None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f'https://twitter.com/i/user/{self.id}'
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class Community(snscrape.base.Entity):
|
||||||
|
id: int
|
||||||
|
name: str
|
||||||
|
description: str
|
||||||
|
created: datetime.datetime
|
||||||
|
admin: typing.Union[User, UserRef]
|
||||||
|
creator: typing.Union[User, UserRef]
|
||||||
|
membersFacepile: typing.List[typing.Union[User, UserRef]]
|
||||||
|
moderatorsCount: int
|
||||||
|
membersCount: int
|
||||||
|
rules: typing.List[str]
|
||||||
|
theme: str
|
||||||
|
bannerUrl: str
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
@@ -1428,6 +1450,14 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
yield self._graphql_timeline_tweet_item_result_to_tweet(entry['content']['itemContent']['tweet_results']['result'], tweetId = tweetId)
|
yield self._graphql_timeline_tweet_item_result_to_tweet(entry['content']['itemContent']['tweet_results']['result'], tweetId = tweetId)
|
||||||
else:
|
else:
|
||||||
_logger.warning('Got unrecognised timeline tweet item(s)')
|
_logger.warning('Got unrecognised timeline tweet item(s)')
|
||||||
|
elif entry['entryId'].startswith('homeConversation-'):
|
||||||
|
if entry['content']['entryType'] == 'TimelineTimelineModule':
|
||||||
|
for item in entry['content']['items']:
|
||||||
|
if not item['entryId'].startswith('homeConversation-') or '-tweet-' not in item['entryId']:
|
||||||
|
raise snscrape.base.ScraperException(f'Unexpected home conversation entry ID: {item["entryId"]!r}')
|
||||||
|
tweetId = int(item['entryId'].split('-tweet-', 1)[1])
|
||||||
|
if item['item']['itemContent']['itemType'] == 'TimelineTweet':
|
||||||
|
yield self._graphql_timeline_tweet_item_result_to_tweet(item['item']['itemContent']['tweet_results']['result'], tweetId = tweetId)
|
||||||
elif includeConversationThreads and entry['entryId'].startswith('conversationthread-'): #TODO show more cursor?
|
elif includeConversationThreads and entry['entryId'].startswith('conversationthread-'): #TODO show more cursor?
|
||||||
for item in entry['content']['items']:
|
for item in entry['content']['items']:
|
||||||
if item['entryId'].startswith(f'{entry["entryId"]}-tweet-'):
|
if item['entryId'].startswith(f'{entry["entryId"]}-tweet-'):
|
||||||
@@ -1497,6 +1527,30 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
labelKwargs['longDescription'] = label['longDescription']['text']
|
labelKwargs['longDescription'] = label['longDescription']['text']
|
||||||
return UserLabel(**labelKwargs)
|
return UserLabel(**labelKwargs)
|
||||||
|
|
||||||
|
def _graphql_user_results_to_user_ref(self, obj):
|
||||||
|
if 'id' not in obj:
|
||||||
|
return None
|
||||||
|
if isinstance(obj['id'], int):
|
||||||
|
userId = obj['id']
|
||||||
|
elif obj['id'].startswith('VXNlclJlc3VsdHM6'):
|
||||||
|
# UserResults:<userid> in base64
|
||||||
|
try:
|
||||||
|
userId = base64.b64decode(obj['id'])
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
assert userId.startswith(b'UserResults:')
|
||||||
|
userId = int(userId.split(b':', 1)[1])
|
||||||
|
kwargs = {}
|
||||||
|
if 'result' in obj and obj['result']['__typename'] == 'UserUnavailable' and 'unavailable_message' in obj['result']:
|
||||||
|
kwargs['text'] = obj['result']['unavailable_message']['text']
|
||||||
|
kwargs['textLinks'] = [TextLink(text = kwargs['text'][x['fromIndex']:x['toIndex']], url = x['ref']['url'], tcourl = None, indices = (x['fromIndex'], x['toIndex'])) for x in obj['result']['unavailable_message']['entities']]
|
||||||
|
return UserRef(id = userId, **kwargs)
|
||||||
|
|
||||||
|
def _graphql_user_results_to_user(self, results):
|
||||||
|
if 'result' not in results or results['result']['__typename'] == 'UserUnavailable':
|
||||||
|
return self._graphql_user_results_to_user_ref(results)
|
||||||
|
return self._user_to_user(results['result']['legacy'], id_ = int(results['result']['rest_id']))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _cli_construct(cls, argparseArgs, *args, **kwargs):
|
def _cli_construct(cls, argparseArgs, *args, **kwargs):
|
||||||
kwargs['guestTokenManager'] = _CLIGuestTokenManager()
|
kwargs['guestTokenManager'] = _CLIGuestTokenManager()
|
||||||
@@ -1880,6 +1934,102 @@ class TwitterListPostsScraper(TwitterSearchScraper):
|
|||||||
return cls._cli_construct(args, args.list)
|
return cls._cli_construct(args, args.list)
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterCommunityScraper(_TwitterAPIScraper):
|
||||||
|
name = 'twitter-community'
|
||||||
|
|
||||||
|
def __init__(self, communityId, **kwargs):
|
||||||
|
self._communityId = communityId
|
||||||
|
super().__init__(f'https://twitter.com/i/communities/{self._communityId}', **kwargs)
|
||||||
|
|
||||||
|
def _get_entity(self):
|
||||||
|
self._ensure_guest_token()
|
||||||
|
params = {
|
||||||
|
'variables': {
|
||||||
|
'communityId': str(self._communityId),
|
||||||
|
'withDmMuting': False,
|
||||||
|
'withSafetyModeUserFields': False,
|
||||||
|
'withSuperFollowsUserFields': True,
|
||||||
|
},
|
||||||
|
'features': {
|
||||||
|
'responsive_web_graphql_exclude_directive_enabled': False,
|
||||||
|
'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
|
||||||
|
'responsive_web_graphql_timeline_navigation_enabled': True,
|
||||||
|
'responsive_web_twitter_blue_verified_badge_is_enabled': True,
|
||||||
|
'verified_phone_label_enabled': False,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
obj = self._get_api_data('https://api.twitter.com/graphql/MO8cE7aTvaenXJX_teUGcA/CommunitiesFetchOneQuery', _TwitterAPIType.GRAPHQL, params = params)
|
||||||
|
if not obj['data'] or 'result' not in obj['data']['communityResults'] or obj['data']['communityResults']['result']['__typename'] == 'CommunityUnavailable':
|
||||||
|
_logger.warning('Empty response or unavailable community')
|
||||||
|
return None
|
||||||
|
community = obj['data']['communityResults']['result']
|
||||||
|
return Community(
|
||||||
|
id = int(community['id_str']),
|
||||||
|
name = community['name'],
|
||||||
|
description = community['description'],
|
||||||
|
created = datetime.datetime.fromtimestamp(community['created_at'] / 1000, tz = datetime.timezone.utc),
|
||||||
|
admin = self._graphql_user_results_to_user(community['admin_results']),
|
||||||
|
creator = self._graphql_user_results_to_user(community['creator_results']),
|
||||||
|
membersFacepile = [self._graphql_user_results_to_user(m) for m in community['members_facepile_results']],
|
||||||
|
moderatorsCount = community['moderator_count'],
|
||||||
|
membersCount = community['member_count'],
|
||||||
|
rules = [r['name'] for r in community['rules']],
|
||||||
|
theme = community.get('custom_theme', community['default_theme']),
|
||||||
|
bannerUrl = community.get('custom_banner_media', community['default_banner_media'])['media_info']['original_img_url'],
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_items(self):
|
||||||
|
paginationVariables = {
|
||||||
|
'count': 20,
|
||||||
|
'cursor': None,
|
||||||
|
'communityId': str(self._communityId),
|
||||||
|
'withCommunity': True,
|
||||||
|
'withSuperFollowsUserFields': True,
|
||||||
|
'withDownvotePerspective': False,
|
||||||
|
'withReactionsMetadata': False,
|
||||||
|
'withReactionsPerspective': False,
|
||||||
|
'withSuperFollowsTweetFields': True,
|
||||||
|
}
|
||||||
|
variables = paginationVariables.copy()
|
||||||
|
del variables['count'], variables['cursor']
|
||||||
|
features = {
|
||||||
|
'responsive_web_twitter_blue_verified_badge_is_enabled': True,
|
||||||
|
'responsive_web_graphql_exclude_directive_enabled': False,
|
||||||
|
'verified_phone_label_enabled': False,
|
||||||
|
'responsive_web_graphql_timeline_navigation_enabled': True,
|
||||||
|
'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
|
||||||
|
'tweetypie_unmention_optimization_enabled': True,
|
||||||
|
'vibe_api_enabled': True,
|
||||||
|
'responsive_web_edit_tweet_api_enabled': True,
|
||||||
|
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
|
||||||
|
'view_counts_everywhere_api_enabled': True,
|
||||||
|
'longform_notetweets_consumption_enabled': True,
|
||||||
|
'tweet_awards_web_tipping_enabled': False,
|
||||||
|
'freedom_of_speech_not_reach_fetch_enabled': False,
|
||||||
|
'standardized_nudges_misinfo': True,
|
||||||
|
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||||
|
'interactive_text_enabled': True,
|
||||||
|
'responsive_web_text_conversations_enabled': False,
|
||||||
|
'responsive_web_enhance_cards_enabled': False,
|
||||||
|
}
|
||||||
|
params = {'variables': variables, 'features': features}
|
||||||
|
paginationParams = {'variables': paginationVariables, 'features': features}
|
||||||
|
|
||||||
|
for obj in self._iter_api_data('https://api.twitter.com/graphql/Qvst9FkHq45wuqicCvMpVw/CommunityTweetsTimeline', _TwitterAPIType.GRAPHQL, params, paginationParams):
|
||||||
|
if obj['data']['communityResults']['result']['__typename'] == 'CommunityUnavailable':
|
||||||
|
_logger.warning('Community unavailable')
|
||||||
|
break
|
||||||
|
yield from self._graphql_timeline_instructions_to_tweets(obj['data']['communityResults']['result']['community_timeline']['timeline']['instructions'])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _cli_setup_parser(cls, subparser):
|
||||||
|
subparser.add_argument('communityId', type = int, help = 'A community ID')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _cli_from_args(cls, args):
|
||||||
|
return cls._cli_construct(args, args.communityId)
|
||||||
|
|
||||||
|
|
||||||
class TwitterTrendsScraper(_TwitterAPIScraper):
|
class TwitterTrendsScraper(_TwitterAPIScraper):
|
||||||
name = 'twitter-trends'
|
name = 'twitter-trends'
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user