From 3fb731ade16dacc8c83d2b5efa28bfca80c764c7 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Thu, 16 Sep 2021 08:06:05 -0400 Subject: [PATCH] User Labels In August of 2020 Twitter started to label the accounts of government officials and state-affiliated media entities: https://blog.twitter.com/en_us/topics/product/2020/new-labels-for-government-and-state-affiliated-media-accounts This information is extremely important for researchers who are studying the impact of social media on political discourse, especially because it is not currently available through either Twitter's v1.1 or v2 API endpoints. The code in this small PR may seem a bit brittle but I've been using it to collect data with each of the twitter subcommands and it seems to work reliably. While there are image and page URLs associated with each label I chose to only collect the text description of the lable since it should be sufficient for finding the additional information later if needed. --- snscrape/modules/twitter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 9b0a598..b6da951 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -136,6 +136,7 @@ class User(snscrape.base.Entity): linkTcourl: typing.Optional[str] = None profileImageUrl: typing.Optional[str] = None profileBannerUrl: typing.Optional[str] = None + label: typing.Optional[str] = None @property def url(self): @@ -457,6 +458,9 @@ class TwitterAPIScraper(snscrape.base.Scraper): kwargs['linkTcourl'] = user.get('url') kwargs['profileImageUrl'] = user['profile_image_url_https'] kwargs['profileBannerUrl'] = user.get('profile_banner_url') + if 'label' in user['ext']['highlightedLabel']['r']['ok']: + kwargs['label'] = user['ext']['highlightedLabel']['r']['ok']['label']['description'] + return User(**kwargs)