mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-11 11:58:28 +03:00
Fix crash on missing source label data
This data had been announced in mid-November to disappear but was still always returned by the API until very recently.
This commit is contained in:
@@ -69,7 +69,7 @@ class Tweet(snscrape.base.Item):
|
||||
quoteCount: int
|
||||
conversationId: int
|
||||
lang: str
|
||||
source: str
|
||||
source: typing.Optional[str] = None
|
||||
sourceUrl: typing.Optional[str] = None
|
||||
sourceLabel: typing.Optional[str] = None
|
||||
links: typing.Optional[typing.List['TextLink']] = None
|
||||
@@ -899,11 +899,12 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
||||
kwargs['quoteCount'] = tweet['quote_count']
|
||||
kwargs['conversationId'] = tweet['conversation_id'] if 'conversation_id' in tweet else int(tweet['conversation_id_str'])
|
||||
kwargs['lang'] = tweet['lang']
|
||||
kwargs['source'] = tweet['source']
|
||||
if (match := re.search(r'href=[\'"]?([^\'" >]+)', tweet['source'])):
|
||||
kwargs['sourceUrl'] = match.group(1)
|
||||
if (match := re.search(r'>([^<]*)<', tweet['source'])):
|
||||
kwargs['sourceLabel'] = match.group(1)
|
||||
if 'source' in tweet:
|
||||
kwargs['source'] = tweet['source']
|
||||
if (match := re.search(r'href=[\'"]?([^\'" >]+)', tweet['source'])):
|
||||
kwargs['sourceUrl'] = match.group(1)
|
||||
if (match := re.search(r'>([^<]*)<', tweet['source'])):
|
||||
kwargs['sourceLabel'] = match.group(1)
|
||||
if 'extended_entities' in tweet and 'media' in tweet['extended_entities']:
|
||||
media = []
|
||||
for medium in tweet['extended_entities']['media']:
|
||||
|
||||
Reference in New Issue
Block a user