From 9235890f9a489ccbe1a05652329162b88ee9f6d1 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 7 Feb 2022 04:04:21 +0000 Subject: [PATCH] Fix KeyError crash on attempting to scrape inexistent tweet ID --- snscrape/modules/twitter.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 0c17b6b..b5d0017 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -398,7 +398,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper): instructions = obj['data']['user']['result']['timeline']['timeline']['instructions'] else: # TweetDetail - instructions = obj['data']['threaded_conversation_with_injections']['instructions'] + instructions = obj['data'].get('threaded_conversation_with_injections', {}).get('instructions', []) tweetCount = 0 for instruction in instructions: if 'addEntries' in instruction: @@ -944,6 +944,8 @@ class TwitterTweetScraper(_TwitterAPIScraper): url = 'https://twitter.com/i/api/graphql/8svRea_Lc0_mdhwP6dqe0Q/TweetDetail' if self._mode is TwitterTweetScraperMode.SINGLE: obj = self._get_api_data(url, _TwitterAPIType.GRAPHQL, params = variables) + if not obj['data']: + return for instruction in obj['data']['threaded_conversation_with_injections']['instructions']: if instruction['type'] != 'TimelineAddEntries': continue @@ -953,6 +955,8 @@ class TwitterTweetScraper(_TwitterAPIScraper): break elif self._mode is TwitterTweetScraperMode.SCROLL: for obj in self._iter_api_data(url, _TwitterAPIType.GRAPHQL, variables, paginationVariables, direction = _ScrollDirection.BOTH): + if not obj['data']: + continue yield from self._graphql_timeline_instructions_to_tweets(obj['data']['threaded_conversation_with_injections']['instructions'], includeConversationThreads = True) elif self._mode is TwitterTweetScraperMode.RECURSE: seenTweets = set() @@ -965,6 +969,8 @@ class TwitterTweetScraper(_TwitterAPIScraper): thisVariables = thisPagVariables.copy() del thisPagVariables['cursor'], thisPagVariables['referrer'] for obj in self._iter_api_data(url, _TwitterAPIType.GRAPHQL, thisVariables, thisPagVariables, direction = _ScrollDirection.BOTH): + if not obj['data']: + continue for tweet in self._graphql_timeline_instructions_to_tweets(obj['data']['threaded_conversation_with_injections']['instructions'], includeConversationThreads = True): if tweet.id not in seenTweets: yield tweet