diff --git a/snscrape/modules/instagram.py b/snscrape/modules/instagram.py index 4a89f3d..14483e0 100644 --- a/snscrape/modules/instagram.py +++ b/snscrape/modules/instagram.py @@ -96,6 +96,8 @@ class _InstagramCommonScraper(snscrape.base.Scraper): def _check_json_callback(self, r): if r.status_code != 200: return False, f'status code {r.status_code}' + if r.url.startswith('https://www.instagram.com/accounts/login/'): + raise snscrape.base.ScraperException('Redirected to login page') try: obj = json.loads(r.text) except json.JSONDecodeError as e: diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 337843f..0b4acc8 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -287,7 +287,12 @@ class _CLIGuestTokenManager(GuestTokenManager): def reset(self): super().reset() with self._lock: - os.remove(self._file) + _logger.info(f'Deleting guest token file {self._file}') + try: + os.remove(self._file) + except FileNotFoundError: + # Another process likely already removed the file + pass class _TwitterAPIType(enum.Enum): @@ -339,7 +344,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper): r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response) o = r.json() if not o.get('guest_token'): - raise snscrape.base.ScraperError('Unable to retrieve guest token') + raise snscrape.base.ScraperException('Unable to retrieve guest token') self._guestTokenManager.token = o['guest_token'] assert self._guestTokenManager.token _logger.debug(f'Using guest token {self._guestTokenManager.token}') @@ -647,7 +652,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper): #TODO Include result['softInterventionPivot'] in the Tweet object result = result['tweet'] else: - raise snscrape.base.ScraperError(f'Unknown result type {result["__typename"]!r}') + raise snscrape.base.ScraperException(f'Unknown result type {result["__typename"]!r}') tweet = result['legacy'] userId = int(result['core']['user_results']['result']['rest_id']) user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId) @@ -664,6 +669,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper): kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str'])) else: kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id'])) + elif 'quoted_status_id_str' in tweet: + kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str'])) if 'card' in result: kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL) return self._make_tweet(tweet, user, **kwargs) diff --git a/snscrape/modules/weibo.py b/snscrape/modules/weibo.py index 38318bd..796f864 100644 --- a/snscrape/modules/weibo.py +++ b/snscrape/modules/weibo.py @@ -70,7 +70,7 @@ class WeiboUserScraper(snscrape.base.Scraper): _logger.warning('User does not exist') self._user = _userDoesNotExist else: - raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})') + raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})') def _check_timeline_response(self, r): if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}':