Log and ignore tweets which are not contained in the globalObjects

Fixes #61
This commit is contained in:
JustAnotherArchivist
2020-09-01 03:45:23 +00:00
parent 072519f539
commit bd619bf4e9

View File

@@ -216,8 +216,14 @@ class TwitterAPIScraper(TwitterCommonScraper):
if 'tweet' in entry['content']['item']['content']:
if 'promotedMetadata' in entry['content']['item']['content']['tweet']: # Promoted tweet aka ads
continue
if entry['content']['item']['content']['tweet']['id'] not in obj['globalObjects']['tweets']:
logger.warning(f'Skipping tweet {entry["content"]["item"]["content"]["tweet"]["id"]} which is not in globalObjects')
continue
tweet = obj['globalObjects']['tweets'][entry['content']['item']['content']['tweet']['id']]
elif 'tombstone' in entry['content']['item']['content'] and 'tweet' in entry['content']['item']['content']['tombstone']:
if entry['content']['item']['content']['tombstone']['tweet']['id'] not in obj['globalObjects']['tweets']:
logger.warning(f'Skipping tweet {entry["content"]["item"]["content"]["tombstone"]["tweet"]["id"]} which is not in globalObjects')
continue
tweet = obj['globalObjects']['tweets'][entry['content']['item']['content']['tombstone']['tweet']['id']]
else:
raise snscrape.base.ScraperException(f'Unable to handle entry {entry["entryId"]!r}')