mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-08 02:28:29 +03:00
Introduce dedicated IntWithGranularity type and deprecate the direct *Granularity fields
This commit is contained in:
@@ -37,9 +37,13 @@ class _JSONDataclass:
|
||||
def json(self):
|
||||
'''Convert the object to a JSON string'''
|
||||
out = dataclasses.asdict(self)
|
||||
for key, value in out.items():
|
||||
for key, value in list(out.items()): # Modifying the dict below, so make a copy first
|
||||
if isinstance(value, _JSONDataclass):
|
||||
out[key] = value.json()
|
||||
elif isinstance(value, IntWithGranularity):
|
||||
out[key] = int(value)
|
||||
assert f'{key}.granularity' not in out, f'Granularity collision on {key}.granularity'
|
||||
out[f'{key}.granularity'] = value.granularity
|
||||
return json.dumps(out, default = _json_serialise_datetime)
|
||||
|
||||
|
||||
@@ -65,10 +69,18 @@ class Entity(_JSONDataclass):
|
||||
pass
|
||||
|
||||
|
||||
Granularity = int
|
||||
'''Type of fields storing the unit/granularity of numbers.
|
||||
class IntWithGranularity(int):
|
||||
'''A number with an associated granularity
|
||||
|
||||
For example, a granularity of 1000 means that the SNS returned something like '42k' and the last three significant digits are unknown.'''
|
||||
For example, an IntWithGranularity(42000, 1000) represents a number on the order of 42000 with two significant digits, i.e. something counted with a granularity of 1000.'''
|
||||
|
||||
def __new__(cls, value, granularity, *args, **kwargs):
|
||||
obj = super().__new__(cls, value, *args, **kwargs)
|
||||
obj.granularity = granularity
|
||||
return obj
|
||||
|
||||
def __reduce__(self):
|
||||
return (IntWithGranularity, (int(self), self.granularity))
|
||||
|
||||
|
||||
class URLItem(Item):
|
||||
|
||||
@@ -33,12 +33,13 @@ class InstagramPost(snscrape.base.Item):
|
||||
class User(snscrape.base.Entity):
|
||||
username: str
|
||||
name: typing.Optional[str]
|
||||
followers: int
|
||||
followersGranularity: snscrape.base.Granularity
|
||||
following: int
|
||||
followingGranularity: snscrape.base.Granularity
|
||||
posts: int
|
||||
postsGranularity: snscrape.base.Granularity
|
||||
followers: snscrape.base.IntWithGranularity
|
||||
following: snscrape.base.IntWithGranularity
|
||||
posts: snscrape.base.IntWithGranularity
|
||||
|
||||
followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity')
|
||||
followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity')
|
||||
postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity')
|
||||
|
||||
def __str__(self):
|
||||
return f'https://www.instagram.com/{self.username}/'
|
||||
@@ -204,18 +205,15 @@ class InstagramUserScraper(InstagramCommonScraper):
|
||||
else:
|
||||
return int(s.replace(',', '')), 1
|
||||
|
||||
followers, followersGranularity = parse_num(m.group(1))
|
||||
following, followingGranularity = parse_num(m.group(2))
|
||||
posts, postsGranularity = parse_num(m.group(3))
|
||||
followers = snscrape.base.IntWithGranularity(*parse_num(m.group(1)))
|
||||
following = snscrape.base.IntWithGranularity(*parse_num(m.group(2)))
|
||||
posts = snscrape.base.IntWithGranularity(*parse_num(m.group(3)))
|
||||
return User(
|
||||
username = m.group(5) or m.group(6),
|
||||
name = m.group(4) or None,
|
||||
followers = followers,
|
||||
followersGranularity = followersGranularity,
|
||||
following = following,
|
||||
followingGranularity = followingGranularity,
|
||||
posts = posts,
|
||||
postsGranularity = postsGranularity,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -43,14 +43,15 @@ class Channel(snscrape.base.Entity):
|
||||
photo: str
|
||||
description: typing.Optional[str] = None
|
||||
members: typing.Optional[int] = None
|
||||
photos: typing.Optional[int] = None
|
||||
photosGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
videos: typing.Optional[int] = None
|
||||
videosGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
links: typing.Optional[int] = None
|
||||
linksGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
files: typing.Optional[int] = None
|
||||
filesGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
photos: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
videos: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
links: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
files: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
|
||||
photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity')
|
||||
videosGranularity = snscrape.base._DeprecatedProperty('videosGranularity', lambda self: self.videos.granularity, 'videos.granularity')
|
||||
linksGranularity = snscrape.base._DeprecatedProperty('linksGranularity', lambda self: self.links.granularity, 'links.granularity')
|
||||
filesGranularity = snscrape.base._DeprecatedProperty('filesGranularity', lambda self: self.files.granularity, 'files.granularity')
|
||||
|
||||
def __str__(self):
|
||||
return f'https://t.me/s/{self.username}'
|
||||
@@ -186,7 +187,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
|
||||
# Already extracted more accurately from /channel, skip
|
||||
continue
|
||||
elif type_ in ('photos', 'videos', 'links', 'files'):
|
||||
kwargs[type_], kwargs[f'{type_}Granularity'] = value, granularity
|
||||
kwargs[type_] = snscrape.base.IntWithGranularity(value, granularity)
|
||||
|
||||
return Channel(**kwargs)
|
||||
|
||||
|
||||
@@ -29,16 +29,17 @@ class User(snscrape.base.Entity):
|
||||
verified: bool
|
||||
description: typing.Optional[str] = None
|
||||
websites: typing.Optional[typing.List[str]] = None
|
||||
followers: typing.Optional[int] = None
|
||||
followersGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
posts: typing.Optional[int] = None
|
||||
postsGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
photos: typing.Optional[int] = None
|
||||
photosGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
tags: typing.Optional[int] = None
|
||||
tagsGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
following: typing.Optional[int] = None
|
||||
followingGranularity: typing.Optional[snscrape.base.Granularity] = None
|
||||
followers: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
posts: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
photos: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
tags: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
following: typing.Optional[snscrape.base.IntWithGranularity] = None
|
||||
|
||||
followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity')
|
||||
postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity')
|
||||
photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity')
|
||||
tagsGranularity = snscrape.base._DeprecatedProperty('tagsGranularity', lambda self: self.tags.granularity, 'tags.granularity')
|
||||
followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity')
|
||||
|
||||
def __str__(self):
|
||||
return f'https://vk.com/{self.username}'
|
||||
@@ -204,16 +205,16 @@ class VKontakteUserScraper(snscrape.base.Scraper):
|
||||
if label in ('follower', 'post', 'photo', 'tag'):
|
||||
label = f'{label}s'
|
||||
if label in ('followers', 'posts', 'photos', 'tags'):
|
||||
kwargs[label], kwargs[f'{label}Granularity'] = count, granularity
|
||||
kwargs[label] = snscrape.base.IntWithGranularity(count, granularity)
|
||||
|
||||
if (idolsDiv := soup.find('div', id = 'profile_idols')):
|
||||
if (topDiv := idolsDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Following':
|
||||
kwargs['following'], kwargs['followingGranularity'] = parse_num(topDiv.find('span', class_ = 'header_count').text)
|
||||
kwargs['following'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text))
|
||||
|
||||
# On public pages, this is where followers are listed
|
||||
if (followersDiv := soup.find('div', id = 'public_followers')):
|
||||
if (topDiv := followersDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Followers':
|
||||
kwargs['followers'], kwargs['followersGranularity'] = parse_num(topDiv.find('span', class_ = 'header_count').text)
|
||||
kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text))
|
||||
|
||||
return User(**kwargs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user