From 2480b173f430df8c79d92bc9545546ea14ef0bee Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 26 Feb 2022 19:31:08 +0000 Subject: [PATCH 1/5] Fix crash on race condition in CLI guest token manager resets Fixes #414 --- snscrape/modules/twitter.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 7c78f69..655fc66 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -287,7 +287,11 @@ class _CLIGuestTokenManager(GuestTokenManager): def reset(self): super().reset() with self._lock: - os.remove(self._file) + try: + os.remove(self._file) + except FileNotFoundError: + # Another process likely already removed the file + pass class _TwitterAPIType(enum.Enum): From 3a92b5bf0d93142e75b64cfb3828d69143bd106c Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 26 Feb 2022 19:32:55 +0000 Subject: [PATCH 2/5] Add log message for guest token file deletion --- snscrape/modules/twitter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 655fc66..48baf9d 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -287,6 +287,7 @@ class _CLIGuestTokenManager(GuestTokenManager): def reset(self): super().reset() with self._lock: + _logger.info(f'Deleting guest token file {self._file}') try: os.remove(self._file) except FileNotFoundError: From 1ab0f4fccb0a694a2146436641bf578fa8d80d59 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 7 Mar 2022 22:16:58 +0000 Subject: [PATCH 3/5] Fix missing quoted tweet reference in certain buggy cases --- snscrape/modules/twitter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 48baf9d..d0f720f 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -666,6 +666,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper): kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str'])) else: kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id'])) + elif 'quoted_status_id_str' in tweet: + kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str'])) if 'card' in result: kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL) return self._make_tweet(tweet, user, **kwargs) From 694657ef801d51561e235c8351c2bc9e953b130c Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Wed, 9 Mar 2022 01:01:47 +0000 Subject: [PATCH 4/5] Fix broken exception references --- snscrape/modules/twitter.py | 4 ++-- snscrape/modules/weibo.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index d0f720f..78d70a3 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -344,7 +344,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper): r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response) o = r.json() if not o.get('guest_token'): - raise snscrape.base.ScraperError('Unable to retrieve guest token') + raise snscrape.base.ScraperException('Unable to retrieve guest token') self._guestTokenManager.token = o['guest_token'] assert self._guestTokenManager.token _logger.debug(f'Using guest token {self._guestTokenManager.token}') @@ -649,7 +649,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper): #TODO Include result['softInterventionPivot'] in the Tweet object result = result['tweet'] else: - raise snscrape.base.ScraperError(f'Unknown result type {result["__typename"]!r}') + raise snscrape.base.ScraperException(f'Unknown result type {result["__typename"]!r}') tweet = result['legacy'] userId = int(result['core']['user_results']['result']['rest_id']) user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId) diff --git a/snscrape/modules/weibo.py b/snscrape/modules/weibo.py index 38318bd..796f864 100644 --- a/snscrape/modules/weibo.py +++ b/snscrape/modules/weibo.py @@ -70,7 +70,7 @@ class WeiboUserScraper(snscrape.base.Scraper): _logger.warning('User does not exist') self._user = _userDoesNotExist else: - raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})') + raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})') def _check_timeline_response(self, r): if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}': From 5d156c6a1510aab8d64daf709517691dd065cfb5 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sun, 3 Apr 2022 02:34:30 +0000 Subject: [PATCH 5/5] Detect and raise error on redirect from GraphQL endpoint to login #165 --- snscrape/modules/instagram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/snscrape/modules/instagram.py b/snscrape/modules/instagram.py index 4a89f3d..14483e0 100644 --- a/snscrape/modules/instagram.py +++ b/snscrape/modules/instagram.py @@ -96,6 +96,8 @@ class _InstagramCommonScraper(snscrape.base.Scraper): def _check_json_callback(self, r): if r.status_code != 200: return False, f'status code {r.status_code}' + if r.url.startswith('https://www.instagram.com/accounts/login/'): + raise snscrape.base.ScraperException('Redirected to login page') try: obj = json.loads(r.text) except json.JSONDecodeError as e: