mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-11 11:58:28 +03:00
Update GraphQL API parameters
This commit is contained in:
@@ -15,6 +15,7 @@ __all__ = [
|
|||||||
|
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
import copy
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
import email.utils
|
import email.utils
|
||||||
@@ -704,7 +705,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
def _get_api_data(self, endpoint, apiType, params):
|
def _get_api_data(self, endpoint, apiType, params):
|
||||||
self._ensure_guest_token()
|
self._ensure_guest_token()
|
||||||
if apiType is _TwitterAPIType.GRAPHQL:
|
if apiType is _TwitterAPIType.GRAPHQL:
|
||||||
params = urllib.parse.urlencode({'variables': json.dumps(params, separators = (',', ':'))}, quote_via = urllib.parse.quote)
|
params = urllib.parse.urlencode({k: json.dumps(v, separators = (',', ':')) for k, v in params.items()}, quote_via = urllib.parse.quote)
|
||||||
r = self._get(endpoint, params = params, headers = self._apiHeaders, responseOkCallback = self._check_api_response)
|
r = self._get(endpoint, params = params, headers = self._apiHeaders, responseOkCallback = self._check_api_response)
|
||||||
try:
|
try:
|
||||||
obj = r.json()
|
obj = r.json()
|
||||||
@@ -724,8 +725,11 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
if cursor is None:
|
if cursor is None:
|
||||||
reqParams = params
|
reqParams = params
|
||||||
else:
|
else:
|
||||||
reqParams = paginationParams.copy()
|
reqParams = copy.deepcopy(paginationParams)
|
||||||
reqParams['cursor'] = cursor
|
if apiType is _TwitterAPIType.V2:
|
||||||
|
reqParams['cursor'] = cursor
|
||||||
|
else:
|
||||||
|
reqParams['variables']['cursor'] = cursor
|
||||||
bottomCursorAndStop = None
|
bottomCursorAndStop = None
|
||||||
if direction is _ScrollDirection.TOP or direction is _ScrollDirection.BOTH:
|
if direction is _ScrollDirection.TOP or direction is _ScrollDirection.BOTH:
|
||||||
dir = 'top'
|
dir = 'top'
|
||||||
@@ -808,8 +812,11 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
if newCursor != cursor:
|
if newCursor != cursor:
|
||||||
emptyResponsesOnCursor = 0
|
emptyResponsesOnCursor = 0
|
||||||
cursor = newCursor
|
cursor = newCursor
|
||||||
reqParams = paginationParams.copy()
|
reqParams = copy.deepcopy(paginationParams)
|
||||||
reqParams['cursor'] = cursor
|
if apiType is _TwitterAPIType.V2:
|
||||||
|
reqParams['cursor'] = cursor
|
||||||
|
else:
|
||||||
|
reqParams['variables']['cursor'] = cursor
|
||||||
|
|
||||||
def _count_tweets(self, entries):
|
def _count_tweets(self, entries):
|
||||||
return sum(entry['entryId'].startswith('sq-I-t-') or entry['entryId'].startswith('tweet-') for entry in entries)
|
return sum(entry['entryId'].startswith('sq-I-t-') or entry['entryId'].startswith('tweet-') for entry in entries)
|
||||||
@@ -1522,7 +1529,7 @@ class TwitterUserScraper(TwitterSearchScraper):
|
|||||||
fieldName = 'userId'
|
fieldName = 'userId'
|
||||||
endpoint = 'https://twitter.com/i/api/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId'
|
endpoint = 'https://twitter.com/i/api/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId'
|
||||||
variables = {fieldName: str(self._user), 'withSafetyModeUserFields': True, 'withSuperFollowsUserFields': True}
|
variables = {fieldName: str(self._user), 'withSafetyModeUserFields': True, 'withSuperFollowsUserFields': True}
|
||||||
obj = self._get_api_data(endpoint, _TwitterAPIType.GRAPHQL, params = variables)
|
obj = self._get_api_data(endpoint, _TwitterAPIType.GRAPHQL, params = {'variables': variables})
|
||||||
if not obj['data'] or obj['data']['user']['result']['__typename'] == 'UserUnavailable':
|
if not obj['data'] or obj['data']['user']['result']['__typename'] == 'UserUnavailable':
|
||||||
return None
|
return None
|
||||||
user = obj['data']['user']['result']
|
user = obj['data']['user']['result']
|
||||||
@@ -1618,9 +1625,30 @@ class TwitterProfileScraper(TwitterUserScraper):
|
|||||||
}
|
}
|
||||||
variables = paginationVariables.copy()
|
variables = paginationVariables.copy()
|
||||||
del variables['cursor']
|
del variables['cursor']
|
||||||
|
features = {
|
||||||
|
'responsive_web_twitter_blue_verified_badge_is_enabled': True,
|
||||||
|
'verified_phone_label_enabled': False,
|
||||||
|
'responsive_web_graphql_timeline_navigation_enabled': True,
|
||||||
|
'view_counts_public_visibility_enabled': True,
|
||||||
|
'view_counts_everywhere_api_enabled': True,
|
||||||
|
'longform_notetweets_consumption_enabled': False,
|
||||||
|
'tweetypie_unmention_optimization_enabled': True,
|
||||||
|
'responsive_web_uc_gql_enabled': True,
|
||||||
|
'vibe_api_enabled': True,
|
||||||
|
'responsive_web_edit_tweet_api_enabled': True,
|
||||||
|
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
|
||||||
|
'standardized_nudges_misinfo': True,
|
||||||
|
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||||
|
'interactive_text_enabled': True,
|
||||||
|
'responsive_web_text_conversations_enabled': False,
|
||||||
|
'responsive_web_enhance_cards_enabled': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {'variables': variables, 'features': features}
|
||||||
|
paginationParams = {'variables': paginationVariables, 'features': features}
|
||||||
|
|
||||||
gotPinned = False
|
gotPinned = False
|
||||||
for obj in self._iter_api_data('https://twitter.com/i/api/graphql/BSKxQ9_IaCoVyIvQHQROIQ/UserTweetsAndReplies', _TwitterAPIType.GRAPHQL, variables, paginationVariables):
|
for obj in self._iter_api_data('https://twitter.com/i/api/graphql/W3HCLclD2VauuL6RcQm9MA/UserTweetsAndReplies', _TwitterAPIType.GRAPHQL, params, paginationParams):
|
||||||
instructions = obj['data']['user']['result']['timeline']['timeline']['instructions']
|
instructions = obj['data']['user']['result']['timeline']['timeline']['instructions']
|
||||||
if not gotPinned:
|
if not gotPinned:
|
||||||
for instruction in instructions:
|
for instruction in instructions:
|
||||||
@@ -1677,10 +1705,8 @@ class TwitterTweetScraper(_TwitterAPIScraper):
|
|||||||
'includePromotedContent': True,
|
'includePromotedContent': True,
|
||||||
'withCommunity': True,
|
'withCommunity': True,
|
||||||
'withQuickPromoteEligibilityTweetFields': True,
|
'withQuickPromoteEligibilityTweetFields': True,
|
||||||
'withTweetQuoteCount': True,
|
|
||||||
'withBirdwatchNotes': True,
|
'withBirdwatchNotes': True,
|
||||||
'withSuperFollowsUserFields': True,
|
'withSuperFollowsUserFields': True,
|
||||||
'withBirdwatchPivots': False,
|
|
||||||
'withDownvotePerspective': False,
|
'withDownvotePerspective': False,
|
||||||
'withReactionsMetadata': False,
|
'withReactionsMetadata': False,
|
||||||
'withReactionsPerspective': False,
|
'withReactionsPerspective': False,
|
||||||
@@ -1690,9 +1716,30 @@ class TwitterTweetScraper(_TwitterAPIScraper):
|
|||||||
}
|
}
|
||||||
variables = paginationVariables.copy()
|
variables = paginationVariables.copy()
|
||||||
del variables['cursor'], variables['referrer']
|
del variables['cursor'], variables['referrer']
|
||||||
url = 'https://twitter.com/i/api/graphql/8svRea_Lc0_mdhwP6dqe0Q/TweetDetail'
|
features = {
|
||||||
|
'responsive_web_twitter_blue_verified_badge_is_enabled': True,
|
||||||
|
'verified_phone_label_enabled': False,
|
||||||
|
'responsive_web_graphql_timeline_navigation_enabled': True,
|
||||||
|
'view_counts_public_visibility_enabled': True,
|
||||||
|
'view_counts_everywhere_api_enabled': True,
|
||||||
|
'longform_notetweets_consumption_enabled': False,
|
||||||
|
'tweetypie_unmention_optimization_enabled': True,
|
||||||
|
'responsive_web_uc_gql_enabled': True,
|
||||||
|
'vibe_api_enabled': True,
|
||||||
|
'responsive_web_edit_tweet_api_enabled': True,
|
||||||
|
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
|
||||||
|
'standardized_nudges_misinfo': True,
|
||||||
|
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||||
|
'interactive_text_enabled': True,
|
||||||
|
'responsive_web_text_conversations_enabled': False,
|
||||||
|
'responsive_web_enhance_cards_enabled': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {'variables': variables, 'features': features}
|
||||||
|
paginationParams = {'variables': paginationVariables, 'features': features}
|
||||||
|
url = 'https://twitter.com/i/api/graphql/HQ_gjq7zDNvSiJOCSkwUEw/TweetDetail'
|
||||||
if self._mode is TwitterTweetScraperMode.SINGLE:
|
if self._mode is TwitterTweetScraperMode.SINGLE:
|
||||||
obj = self._get_api_data(url, _TwitterAPIType.GRAPHQL, params = variables)
|
obj = self._get_api_data(url, _TwitterAPIType.GRAPHQL, params = params)
|
||||||
if not obj['data']:
|
if not obj['data']:
|
||||||
return
|
return
|
||||||
for instruction in obj['data']['threaded_conversation_with_injections']['instructions']:
|
for instruction in obj['data']['threaded_conversation_with_injections']['instructions']:
|
||||||
@@ -1703,7 +1750,7 @@ class TwitterTweetScraper(_TwitterAPIScraper):
|
|||||||
yield self._graphql_timeline_tweet_item_result_to_tweet(entry['content']['itemContent']['tweet_results']['result'])
|
yield self._graphql_timeline_tweet_item_result_to_tweet(entry['content']['itemContent']['tweet_results']['result'])
|
||||||
break
|
break
|
||||||
elif self._mode is TwitterTweetScraperMode.SCROLL:
|
elif self._mode is TwitterTweetScraperMode.SCROLL:
|
||||||
for obj in self._iter_api_data(url, _TwitterAPIType.GRAPHQL, variables, paginationVariables, direction = _ScrollDirection.BOTH):
|
for obj in self._iter_api_data(url, _TwitterAPIType.GRAPHQL, params, paginationParams, direction = _ScrollDirection.BOTH):
|
||||||
if not obj['data']:
|
if not obj['data']:
|
||||||
continue
|
continue
|
||||||
yield from self._graphql_timeline_instructions_to_tweets(obj['data']['threaded_conversation_with_injections']['instructions'], includeConversationThreads = True)
|
yield from self._graphql_timeline_instructions_to_tweets(obj['data']['threaded_conversation_with_injections']['instructions'], includeConversationThreads = True)
|
||||||
@@ -1713,11 +1760,11 @@ class TwitterTweetScraper(_TwitterAPIScraper):
|
|||||||
queue.append(self._tweetId)
|
queue.append(self._tweetId)
|
||||||
while queue:
|
while queue:
|
||||||
tweetId = queue.popleft()
|
tweetId = queue.popleft()
|
||||||
thisPagVariables = paginationVariables.copy()
|
thisPagParams = copy.deepcopy(paginationVariables)
|
||||||
thisPagVariables['focalTweetId'] = str(tweetId)
|
thisPagParams['variables']['focalTweetId'] = str(tweetId)
|
||||||
thisVariables = thisPagVariables.copy()
|
thisParams = copy.deepcopy(thisPagParams)
|
||||||
del thisPagVariables['cursor'], thisPagVariables['referrer']
|
del thisPagParams['variables']['cursor'], thisPagParams['variables']['referrer']
|
||||||
for obj in self._iter_api_data(url, _TwitterAPIType.GRAPHQL, thisVariables, thisPagVariables, direction = _ScrollDirection.BOTH):
|
for obj in self._iter_api_data(url, _TwitterAPIType.GRAPHQL, thisParams, thisPagParams, direction = _ScrollDirection.BOTH):
|
||||||
if not obj['data']:
|
if not obj['data']:
|
||||||
continue
|
continue
|
||||||
for tweet in self._graphql_timeline_instructions_to_tweets(obj['data']['threaded_conversation_with_injections']['instructions'], includeConversationThreads = True):
|
for tweet in self._graphql_timeline_instructions_to_tweets(obj['data']['threaded_conversation_with_injections']['instructions'], includeConversationThreads = True):
|
||||||
|
|||||||
Reference in New Issue
Block a user