From 27374285a257369e3d2c614212ec2c875b720c77 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Fri, 13 Jan 2023 08:32:02 +0000 Subject: [PATCH] Fix crash on missing source label data This data had been announced in mid-November to disappear but was still always returned by the API until very recently. --- snscrape/modules/twitter.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 62a5a01..3aa36ed 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -69,7 +69,7 @@ class Tweet(snscrape.base.Item): quoteCount: int conversationId: int lang: str - source: str + source: typing.Optional[str] = None sourceUrl: typing.Optional[str] = None sourceLabel: typing.Optional[str] = None links: typing.Optional[typing.List['TextLink']] = None @@ -899,11 +899,12 @@ class _TwitterAPIScraper(snscrape.base.Scraper): kwargs['quoteCount'] = tweet['quote_count'] kwargs['conversationId'] = tweet['conversation_id'] if 'conversation_id' in tweet else int(tweet['conversation_id_str']) kwargs['lang'] = tweet['lang'] - kwargs['source'] = tweet['source'] - if (match := re.search(r'href=[\'"]?([^\'" >]+)', tweet['source'])): - kwargs['sourceUrl'] = match.group(1) - if (match := re.search(r'>([^<]*)<', tweet['source'])): - kwargs['sourceLabel'] = match.group(1) + if 'source' in tweet: + kwargs['source'] = tweet['source'] + if (match := re.search(r'href=[\'"]?([^\'" >]+)', tweet['source'])): + kwargs['sourceUrl'] = match.group(1) + if (match := re.search(r'>([^<]*)<', tweet['source'])): + kwargs['sourceLabel'] = match.group(1) if 'extended_entities' in tweet and 'media' in tweet['extended_entities']: media = [] for medium in tweet['extended_entities']['media']: