mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-11 03:48:29 +03:00
Log and ignore tweets which are not contained in the globalObjects
Fixes #61
This commit is contained in:
@@ -216,8 +216,14 @@ class TwitterAPIScraper(TwitterCommonScraper):
|
||||
if 'tweet' in entry['content']['item']['content']:
|
||||
if 'promotedMetadata' in entry['content']['item']['content']['tweet']: # Promoted tweet aka ads
|
||||
continue
|
||||
if entry['content']['item']['content']['tweet']['id'] not in obj['globalObjects']['tweets']:
|
||||
logger.warning(f'Skipping tweet {entry["content"]["item"]["content"]["tweet"]["id"]} which is not in globalObjects')
|
||||
continue
|
||||
tweet = obj['globalObjects']['tweets'][entry['content']['item']['content']['tweet']['id']]
|
||||
elif 'tombstone' in entry['content']['item']['content'] and 'tweet' in entry['content']['item']['content']['tombstone']:
|
||||
if entry['content']['item']['content']['tombstone']['tweet']['id'] not in obj['globalObjects']['tweets']:
|
||||
logger.warning(f'Skipping tweet {entry["content"]["item"]["content"]["tombstone"]["tweet"]["id"]} which is not in globalObjects')
|
||||
continue
|
||||
tweet = obj['globalObjects']['tweets'][entry['content']['item']['content']['tombstone']['tweet']['id']]
|
||||
else:
|
||||
raise snscrape.base.ScraperException(f'Unable to handle entry {entry["entryId"]!r}')
|
||||
|
||||
Reference in New Issue
Block a user