From a11eef6b06e03177aac3371a81fd8b587e760f5e Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Thu, 16 Sep 2021 13:04:57 -0400 Subject: [PATCH] User label url MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each label also has a URL which is used for learning more about the label. While there are more label descriptions than label URLs the URLs do seem to group language variants of the same label. For example https://help.twitter.com/rules-and-policies/state-affiliated-china is used for all of the following label descriptions: * Média affilié à un État, Chine * China state-affiliated media * 中国官方媒体 * Çin devletine bağlı medya * China government official In some analysis contexts it could be useful to group these together. --- snscrape/modules/twitter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index b6da951..ea0e0fa 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -137,6 +137,7 @@ class User(snscrape.base.Entity): profileImageUrl: typing.Optional[str] = None profileBannerUrl: typing.Optional[str] = None label: typing.Optional[str] = None + labelUrl: typing.Optional[str] = None @property def url(self): @@ -460,6 +461,7 @@ class TwitterAPIScraper(snscrape.base.Scraper): kwargs['profileBannerUrl'] = user.get('profile_banner_url') if 'label' in user['ext']['highlightedLabel']['r']['ok']: kwargs['label'] = user['ext']['highlightedLabel']['r']['ok']['label']['description'] + kwargs['labelUrl'] = user['ext']['highlightedLabel']['r']['ok']['label']['url']['url'] return User(**kwargs)