mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-08 02:28:29 +03:00
Merge branch 'master' into telegram-media
This commit is contained in:
@@ -96,6 +96,8 @@ class _InstagramCommonScraper(snscrape.base.Scraper):
|
||||
def _check_json_callback(self, r):
|
||||
if r.status_code != 200:
|
||||
return False, f'status code {r.status_code}'
|
||||
if r.url.startswith('https://www.instagram.com/accounts/login/'):
|
||||
raise snscrape.base.ScraperException('Redirected to login page')
|
||||
try:
|
||||
obj = json.loads(r.text)
|
||||
except json.JSONDecodeError as e:
|
||||
|
||||
@@ -287,7 +287,12 @@ class _CLIGuestTokenManager(GuestTokenManager):
|
||||
def reset(self):
|
||||
super().reset()
|
||||
with self._lock:
|
||||
os.remove(self._file)
|
||||
_logger.info(f'Deleting guest token file {self._file}')
|
||||
try:
|
||||
os.remove(self._file)
|
||||
except FileNotFoundError:
|
||||
# Another process likely already removed the file
|
||||
pass
|
||||
|
||||
|
||||
class _TwitterAPIType(enum.Enum):
|
||||
@@ -339,7 +344,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
||||
r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response)
|
||||
o = r.json()
|
||||
if not o.get('guest_token'):
|
||||
raise snscrape.base.ScraperError('Unable to retrieve guest token')
|
||||
raise snscrape.base.ScraperException('Unable to retrieve guest token')
|
||||
self._guestTokenManager.token = o['guest_token']
|
||||
assert self._guestTokenManager.token
|
||||
_logger.debug(f'Using guest token {self._guestTokenManager.token}')
|
||||
@@ -647,7 +652,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
||||
#TODO Include result['softInterventionPivot'] in the Tweet object
|
||||
result = result['tweet']
|
||||
else:
|
||||
raise snscrape.base.ScraperError(f'Unknown result type {result["__typename"]!r}')
|
||||
raise snscrape.base.ScraperException(f'Unknown result type {result["__typename"]!r}')
|
||||
tweet = result['legacy']
|
||||
userId = int(result['core']['user_results']['result']['rest_id'])
|
||||
user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId)
|
||||
@@ -664,6 +669,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
||||
kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
|
||||
else:
|
||||
kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id']))
|
||||
elif 'quoted_status_id_str' in tweet:
|
||||
kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
|
||||
if 'card' in result:
|
||||
kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
|
||||
return self._make_tweet(tweet, user, **kwargs)
|
||||
|
||||
@@ -177,11 +177,15 @@ class VKontakteUserScraper(snscrape.base.Scraper):
|
||||
continue
|
||||
if 'data-video' in a.attrs:
|
||||
# Video
|
||||
if 'data-link-attr' in a.attrs:
|
||||
hrefUrl = urllib.parse.unquote(a.attrs['data-link-attr'].split('to=')[1].split('&')[0])
|
||||
else:
|
||||
hrefUrl = f'https://vk.com{a["href"]}'
|
||||
video = Video(
|
||||
id = a['data-video'],
|
||||
list = a['data-list'],
|
||||
duration = int(a['data-duration']),
|
||||
url = f'https://vk.com{a["href"]}',
|
||||
url = hrefUrl,
|
||||
thumbUrl = a['style'][(begin := a['style'].find('background-image: url(') + 22) : a['style'].find(')', begin)],
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -70,7 +70,7 @@ class WeiboUserScraper(snscrape.base.Scraper):
|
||||
_logger.warning('User does not exist')
|
||||
self._user = _userDoesNotExist
|
||||
else:
|
||||
raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})')
|
||||
raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})')
|
||||
|
||||
def _check_timeline_response(self, r):
|
||||
if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}':
|
||||
|
||||
Reference in New Issue
Block a user