Merge branch 'master' into telegram-media

This commit is contained in:
Tristan Lee
2022-05-09 09:21:40 -05:00
4 changed files with 18 additions and 5 deletions

View File

@@ -96,6 +96,8 @@ class _InstagramCommonScraper(snscrape.base.Scraper):
def _check_json_callback(self, r):
if r.status_code != 200:
return False, f'status code {r.status_code}'
if r.url.startswith('https://www.instagram.com/accounts/login/'):
raise snscrape.base.ScraperException('Redirected to login page')
try:
obj = json.loads(r.text)
except json.JSONDecodeError as e:

View File

@@ -287,7 +287,12 @@ class _CLIGuestTokenManager(GuestTokenManager):
def reset(self):
super().reset()
with self._lock:
os.remove(self._file)
_logger.info(f'Deleting guest token file {self._file}')
try:
os.remove(self._file)
except FileNotFoundError:
# Another process likely already removed the file
pass
class _TwitterAPIType(enum.Enum):
@@ -339,7 +344,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response)
o = r.json()
if not o.get('guest_token'):
raise snscrape.base.ScraperError('Unable to retrieve guest token')
raise snscrape.base.ScraperException('Unable to retrieve guest token')
self._guestTokenManager.token = o['guest_token']
assert self._guestTokenManager.token
_logger.debug(f'Using guest token {self._guestTokenManager.token}')
@@ -647,7 +652,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
#TODO Include result['softInterventionPivot'] in the Tweet object
result = result['tweet']
else:
raise snscrape.base.ScraperError(f'Unknown result type {result["__typename"]!r}')
raise snscrape.base.ScraperException(f'Unknown result type {result["__typename"]!r}')
tweet = result['legacy']
userId = int(result['core']['user_results']['result']['rest_id'])
user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId)
@@ -664,6 +669,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
else:
kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id']))
elif 'quoted_status_id_str' in tweet:
kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
if 'card' in result:
kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
return self._make_tweet(tweet, user, **kwargs)

View File

@@ -177,11 +177,15 @@ class VKontakteUserScraper(snscrape.base.Scraper):
continue
if 'data-video' in a.attrs:
# Video
if 'data-link-attr' in a.attrs:
hrefUrl = urllib.parse.unquote(a.attrs['data-link-attr'].split('to=')[1].split('&')[0])
else:
hrefUrl = f'https://vk.com{a["href"]}'
video = Video(
id = a['data-video'],
list = a['data-list'],
duration = int(a['data-duration']),
url = f'https://vk.com{a["href"]}',
url = hrefUrl,
thumbUrl = a['style'][(begin := a['style'].find('background-image: url(') + 22) : a['style'].find(')', begin)],
)
continue

View File

@@ -70,7 +70,7 @@ class WeiboUserScraper(snscrape.base.Scraper):
_logger.warning('User does not exist')
self._user = _userDoesNotExist
else:
raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})')
raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})')
def _check_timeline_response(self, r):
if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}':