mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-10 19:38:29 +03:00
Better log output for temporary failures (fixes #2)
This commit is contained in:
@@ -59,11 +59,32 @@ class Scraper:
|
||||
logger.debug(f'... with data: {data!r}')
|
||||
try:
|
||||
r = self._session.send(req, timeout = timeout)
|
||||
if responseOkCallback is None or responseOkCallback(r):
|
||||
logger.debug(f'{req.url} retrieved successfully')
|
||||
return r
|
||||
except requests.exceptions.RequestException as exc:
|
||||
logger.error(f'Error retrieving {url}: {exc!r}')
|
||||
if attempt < self._retries:
|
||||
retrying = ', retrying'
|
||||
level = logging.WARNING
|
||||
else:
|
||||
retrying = ''
|
||||
level = logging.ERROR
|
||||
logger.log(level, f'Error retrieving {req.url}: {exc!r}{retrying}')
|
||||
else:
|
||||
if responseOkCallback is not None:
|
||||
success, msg = responseOkCallback(r)
|
||||
else:
|
||||
success, msg = (True, None)
|
||||
msg = f': {msg}' if msg else ''
|
||||
|
||||
if success:
|
||||
logger.debug(f'{req.url} retrieved successfully{msg}')
|
||||
return r
|
||||
else:
|
||||
if attempt < self._retries:
|
||||
retrying = ', retrying'
|
||||
level = logging.WARNING
|
||||
else:
|
||||
retrying = ''
|
||||
level = logging.ERROR
|
||||
logger.log(level, f'Error retrieving {req.url}{msg}{retrying}')
|
||||
if attempt < self._retries:
|
||||
sleepTime = 1.0 * 2**attempt # exponential backoff: sleep 1 second after first attempt, 2 after second, 4 after third, etc.
|
||||
logger.info(f'Waiting {sleepTime:.0f} seconds')
|
||||
|
||||
@@ -27,9 +27,8 @@ class TwitterSearchScraper(snscrape.base.Scraper):
|
||||
|
||||
def _check_json_callback(self, r):
|
||||
if r.headers['content-type'] != 'application/json;charset=utf-8':
|
||||
logger.error(f'Content type of {r.url} is not JSON')
|
||||
return False
|
||||
return True
|
||||
return False, f'content type is not JSON'
|
||||
return True, None
|
||||
|
||||
def get_items(self):
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||
|
||||
Reference in New Issue
Block a user