mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-08 02:28:29 +03:00
Merge branch 'master' into telegram-media
This commit is contained in:
@@ -96,6 +96,8 @@ class _InstagramCommonScraper(snscrape.base.Scraper):
|
|||||||
def _check_json_callback(self, r):
|
def _check_json_callback(self, r):
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
return False, f'status code {r.status_code}'
|
return False, f'status code {r.status_code}'
|
||||||
|
if r.url.startswith('https://www.instagram.com/accounts/login/'):
|
||||||
|
raise snscrape.base.ScraperException('Redirected to login page')
|
||||||
try:
|
try:
|
||||||
obj = json.loads(r.text)
|
obj = json.loads(r.text)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
|
|||||||
@@ -287,7 +287,12 @@ class _CLIGuestTokenManager(GuestTokenManager):
|
|||||||
def reset(self):
|
def reset(self):
|
||||||
super().reset()
|
super().reset()
|
||||||
with self._lock:
|
with self._lock:
|
||||||
os.remove(self._file)
|
_logger.info(f'Deleting guest token file {self._file}')
|
||||||
|
try:
|
||||||
|
os.remove(self._file)
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Another process likely already removed the file
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class _TwitterAPIType(enum.Enum):
|
class _TwitterAPIType(enum.Enum):
|
||||||
@@ -339,7 +344,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response)
|
r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response)
|
||||||
o = r.json()
|
o = r.json()
|
||||||
if not o.get('guest_token'):
|
if not o.get('guest_token'):
|
||||||
raise snscrape.base.ScraperError('Unable to retrieve guest token')
|
raise snscrape.base.ScraperException('Unable to retrieve guest token')
|
||||||
self._guestTokenManager.token = o['guest_token']
|
self._guestTokenManager.token = o['guest_token']
|
||||||
assert self._guestTokenManager.token
|
assert self._guestTokenManager.token
|
||||||
_logger.debug(f'Using guest token {self._guestTokenManager.token}')
|
_logger.debug(f'Using guest token {self._guestTokenManager.token}')
|
||||||
@@ -647,7 +652,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
#TODO Include result['softInterventionPivot'] in the Tweet object
|
#TODO Include result['softInterventionPivot'] in the Tweet object
|
||||||
result = result['tweet']
|
result = result['tweet']
|
||||||
else:
|
else:
|
||||||
raise snscrape.base.ScraperError(f'Unknown result type {result["__typename"]!r}')
|
raise snscrape.base.ScraperException(f'Unknown result type {result["__typename"]!r}')
|
||||||
tweet = result['legacy']
|
tweet = result['legacy']
|
||||||
userId = int(result['core']['user_results']['result']['rest_id'])
|
userId = int(result['core']['user_results']['result']['rest_id'])
|
||||||
user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId)
|
user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId)
|
||||||
@@ -664,6 +669,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
|
kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
|
||||||
else:
|
else:
|
||||||
kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id']))
|
kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id']))
|
||||||
|
elif 'quoted_status_id_str' in tweet:
|
||||||
|
kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
|
||||||
if 'card' in result:
|
if 'card' in result:
|
||||||
kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
|
kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
|
||||||
return self._make_tweet(tweet, user, **kwargs)
|
return self._make_tweet(tweet, user, **kwargs)
|
||||||
|
|||||||
@@ -177,11 +177,15 @@ class VKontakteUserScraper(snscrape.base.Scraper):
|
|||||||
continue
|
continue
|
||||||
if 'data-video' in a.attrs:
|
if 'data-video' in a.attrs:
|
||||||
# Video
|
# Video
|
||||||
|
if 'data-link-attr' in a.attrs:
|
||||||
|
hrefUrl = urllib.parse.unquote(a.attrs['data-link-attr'].split('to=')[1].split('&')[0])
|
||||||
|
else:
|
||||||
|
hrefUrl = f'https://vk.com{a["href"]}'
|
||||||
video = Video(
|
video = Video(
|
||||||
id = a['data-video'],
|
id = a['data-video'],
|
||||||
list = a['data-list'],
|
list = a['data-list'],
|
||||||
duration = int(a['data-duration']),
|
duration = int(a['data-duration']),
|
||||||
url = f'https://vk.com{a["href"]}',
|
url = hrefUrl,
|
||||||
thumbUrl = a['style'][(begin := a['style'].find('background-image: url(') + 22) : a['style'].find(')', begin)],
|
thumbUrl = a['style'][(begin := a['style'].find('background-image: url(') + 22) : a['style'].find(')', begin)],
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ class WeiboUserScraper(snscrape.base.Scraper):
|
|||||||
_logger.warning('User does not exist')
|
_logger.warning('User does not exist')
|
||||||
self._user = _userDoesNotExist
|
self._user = _userDoesNotExist
|
||||||
else:
|
else:
|
||||||
raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})')
|
raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})')
|
||||||
|
|
||||||
def _check_timeline_response(self, r):
|
def _check_timeline_response(self, r):
|
||||||
if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}':
|
if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}':
|
||||||
|
|||||||
Reference in New Issue
Block a user