mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-11 11:58:28 +03:00
@@ -69,7 +69,7 @@ def _requests_response_repr(name, response, withHistory = True):
|
||||
if withHistory and response.history:
|
||||
ret.append(f'\n {name}.history = [')
|
||||
for previousResponse in response.history:
|
||||
ret.append(f'\n ')
|
||||
ret.append('\n ')
|
||||
ret.append(_requests_response_repr('_', previousResponse, withHistory = False).replace('\n', '\n '))
|
||||
ret.append('\n ]')
|
||||
ret.append(f'\n {name}.status_code = {response.status_code}')
|
||||
@@ -83,8 +83,8 @@ def _requests_response_repr(name, response, withHistory = True):
|
||||
def _requests_exception_repr(name, exc):
|
||||
ret = []
|
||||
ret.append(f'{name} = {exc!r}')
|
||||
ret.append(f'\n ' + _repr(f'{name}.request', exc.request).replace('\n', '\n '))
|
||||
ret.append(f'\n ' + _repr(f'{name}.response', exc.response).replace('\n', '\n '))
|
||||
ret.append('\n ' + _repr(f'{name}.request', exc.request).replace('\n', '\n '))
|
||||
ret.append('\n ' + _repr(f'{name}.response', exc.response).replace('\n', '\n '))
|
||||
return ''.join(ret)
|
||||
|
||||
|
||||
@@ -151,7 +151,7 @@ def _dump_stack_and_locals(trace, exc = None):
|
||||
fp.write('\n')
|
||||
fp.write('\n')
|
||||
if 'self' in locals_ and hasattr(locals_['self'], '__dict__'):
|
||||
fp.write(f'Object dict:\n')
|
||||
fp.write('Object dict:\n')
|
||||
fp.write(repr(locals_['self'].__dict__))
|
||||
fp.write('\n\n')
|
||||
name = fp.name
|
||||
|
||||
@@ -342,7 +342,7 @@ class FacebookGroupScraper(_FacebookCommonScraper):
|
||||
while (data := pageletDataPattern.search(r.text).group(0)[pageletDataPrefixLength:]):
|
||||
# As on the user profile pages, the web app sends a lot of additional parameters, but those all seem to be unnecessary (although some change the response format, e.g. from JSON to HTML)
|
||||
r = self._get(
|
||||
f'https://upload.facebook.com/ajax/pagelet/generic.php/GroupEntstreamPagelet',
|
||||
'https://upload.facebook.com/ajax/pagelet/generic.php/GroupEntstreamPagelet',
|
||||
params = {'data': data, '__a': 1},
|
||||
headers = headers,
|
||||
)
|
||||
|
||||
@@ -106,12 +106,12 @@ class _InstagramCommonScraper(snscrape.base.Scraper):
|
||||
def get_items(self):
|
||||
r = self._initial_page()
|
||||
if r.status_code == 404:
|
||||
_logger.warning(f'Page does not exist')
|
||||
_logger.warning('Page does not exist')
|
||||
return
|
||||
response = r._snscrape_json_obj
|
||||
rhxGis = response['rhx_gis'] if 'rhx_gis' in response else ''
|
||||
if response['entry_data'][self._pageName][0]['graphql'][self._responseContainer][self._edgeXToMedia]['count'] == 0:
|
||||
_logger.info(f'Page has no posts')
|
||||
_logger.info('Page has no posts')
|
||||
return
|
||||
if not response['entry_data'][self._pageName][0]['graphql'][self._responseContainer][self._edgeXToMedia]['edges']:
|
||||
_logger.warning('Private account')
|
||||
|
||||
@@ -131,7 +131,7 @@ class _RedditPushshiftScraper(snscrape.base.Scraper):
|
||||
else: # E.g. submission 617p51 but can likely happen for comments as well
|
||||
permalink = f'/comments/{d["link_id"][3:]}/_/{d["id"]}/'
|
||||
else:
|
||||
_logger.warning(f'Unable to find or construct permalink')
|
||||
_logger.warning('Unable to find or construct permalink')
|
||||
permalink = '/'
|
||||
|
||||
kwargs = {
|
||||
|
||||
@@ -387,7 +387,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
||||
stopOnEmptyResponse = entry['content']['operation']['cursor']['stopOnEmptyResponse']
|
||||
elif entry['entryId'].startswith('cursor-showMoreThreadsPrompt-'): # E.g. 'offensive' replies button
|
||||
promptCursor = entry['content']['operation']['cursor']['value']
|
||||
elif direction is _ScrollDirection.BOTH and bottomCursorAndStop is None and (entry['entryId'] == f'sq-cursor-bottom' or entry['entryId'].startswith('cursor-bottom-')):
|
||||
elif direction is _ScrollDirection.BOTH and bottomCursorAndStop is None and (entry['entryId'] == 'sq-cursor-bottom' or entry['entryId'].startswith('cursor-bottom-')):
|
||||
newBottomCursorAndStop = (entry['content']['operation']['cursor']['value'], entry['content']['operation']['cursor'].get('stopOnEmptyResponse', False))
|
||||
if bottomCursorAndStop is None and newBottomCursorAndStop is not None:
|
||||
bottomCursorAndStop = newBottomCursorAndStop
|
||||
@@ -631,9 +631,9 @@ class TwitterSearchScraper(_TwitterAPIScraper):
|
||||
# Accept a 429 response as "valid" to prevent retries; handled explicitly in get_items
|
||||
return True, None
|
||||
if r.headers.get('content-type').replace(' ', '') != 'application/json;charset=utf-8':
|
||||
return False, f'content type is not JSON'
|
||||
return False, 'content type is not JSON'
|
||||
if r.status_code != 200:
|
||||
return False, f'non-200 status code'
|
||||
return False, 'non-200 status code'
|
||||
return True, None
|
||||
|
||||
def get_items(self):
|
||||
|
||||
Reference in New Issue
Block a user