From bd619bf4e9946827df7e6cb96911380190f52bd2 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 1 Sep 2020 03:45:23 +0000 Subject: [PATCH] Log and ignore tweets which are not contained in the globalObjects Fixes #61 --- snscrape/modules/twitter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index a4cb752..980341a 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -216,8 +216,14 @@ class TwitterAPIScraper(TwitterCommonScraper): if 'tweet' in entry['content']['item']['content']: if 'promotedMetadata' in entry['content']['item']['content']['tweet']: # Promoted tweet aka ads continue + if entry['content']['item']['content']['tweet']['id'] not in obj['globalObjects']['tweets']: + logger.warning(f'Skipping tweet {entry["content"]["item"]["content"]["tweet"]["id"]} which is not in globalObjects') + continue tweet = obj['globalObjects']['tweets'][entry['content']['item']['content']['tweet']['id']] elif 'tombstone' in entry['content']['item']['content'] and 'tweet' in entry['content']['item']['content']['tombstone']: + if entry['content']['item']['content']['tombstone']['tweet']['id'] not in obj['globalObjects']['tweets']: + logger.warning(f'Skipping tweet {entry["content"]["item"]["content"]["tombstone"]["tweet"]["id"]} which is not in globalObjects') + continue tweet = obj['globalObjects']['tweets'][entry['content']['item']['content']['tombstone']['tweet']['id']] else: raise snscrape.base.ScraperException(f'Unable to handle entry {entry["entryId"]!r}')