diff --git a/snscrape/base.py b/snscrape/base.py index 9ee25a3..881d293 100644 --- a/snscrape/base.py +++ b/snscrape/base.py @@ -37,9 +37,13 @@ class _JSONDataclass: def json(self): '''Convert the object to a JSON string''' out = dataclasses.asdict(self) - for key, value in out.items(): + for key, value in list(out.items()): # Modifying the dict below, so make a copy first if isinstance(value, _JSONDataclass): out[key] = value.json() + elif isinstance(value, IntWithGranularity): + out[key] = int(value) + assert f'{key}.granularity' not in out, f'Granularity collision on {key}.granularity' + out[f'{key}.granularity'] = value.granularity return json.dumps(out, default = _json_serialise_datetime) @@ -65,10 +69,18 @@ class Entity(_JSONDataclass): pass -Granularity = int -'''Type of fields storing the unit/granularity of numbers. +class IntWithGranularity(int): + '''A number with an associated granularity -For example, a granularity of 1000 means that the SNS returned something like '42k' and the last three significant digits are unknown.''' + For example, an IntWithGranularity(42000, 1000) represents a number on the order of 42000 with two significant digits, i.e. something counted with a granularity of 1000.''' + + def __new__(cls, value, granularity, *args, **kwargs): + obj = super().__new__(cls, value, *args, **kwargs) + obj.granularity = granularity + return obj + + def __reduce__(self): + return (IntWithGranularity, (int(self), self.granularity)) class URLItem(Item): diff --git a/snscrape/modules/instagram.py b/snscrape/modules/instagram.py index 8afc15c..a68fdf7 100644 --- a/snscrape/modules/instagram.py +++ b/snscrape/modules/instagram.py @@ -33,12 +33,13 @@ class InstagramPost(snscrape.base.Item): class User(snscrape.base.Entity): username: str name: typing.Optional[str] - followers: int - followersGranularity: snscrape.base.Granularity - following: int - followingGranularity: snscrape.base.Granularity - posts: int - postsGranularity: snscrape.base.Granularity + followers: snscrape.base.IntWithGranularity + following: snscrape.base.IntWithGranularity + posts: snscrape.base.IntWithGranularity + + followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity') + followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity') + postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity') def __str__(self): return f'https://www.instagram.com/{self.username}/' @@ -204,18 +205,15 @@ class InstagramUserScraper(InstagramCommonScraper): else: return int(s.replace(',', '')), 1 - followers, followersGranularity = parse_num(m.group(1)) - following, followingGranularity = parse_num(m.group(2)) - posts, postsGranularity = parse_num(m.group(3)) + followers = snscrape.base.IntWithGranularity(*parse_num(m.group(1))) + following = snscrape.base.IntWithGranularity(*parse_num(m.group(2))) + posts = snscrape.base.IntWithGranularity(*parse_num(m.group(3))) return User( username = m.group(5) or m.group(6), name = m.group(4) or None, followers = followers, - followersGranularity = followersGranularity, following = following, - followingGranularity = followingGranularity, posts = posts, - postsGranularity = postsGranularity, ) diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py index 57f7350..d8819c9 100644 --- a/snscrape/modules/telegram.py +++ b/snscrape/modules/telegram.py @@ -43,14 +43,15 @@ class Channel(snscrape.base.Entity): photo: str description: typing.Optional[str] = None members: typing.Optional[int] = None - photos: typing.Optional[int] = None - photosGranularity: typing.Optional[snscrape.base.Granularity] = None - videos: typing.Optional[int] = None - videosGranularity: typing.Optional[snscrape.base.Granularity] = None - links: typing.Optional[int] = None - linksGranularity: typing.Optional[snscrape.base.Granularity] = None - files: typing.Optional[int] = None - filesGranularity: typing.Optional[snscrape.base.Granularity] = None + photos: typing.Optional[snscrape.base.IntWithGranularity] = None + videos: typing.Optional[snscrape.base.IntWithGranularity] = None + links: typing.Optional[snscrape.base.IntWithGranularity] = None + files: typing.Optional[snscrape.base.IntWithGranularity] = None + + photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity') + videosGranularity = snscrape.base._DeprecatedProperty('videosGranularity', lambda self: self.videos.granularity, 'videos.granularity') + linksGranularity = snscrape.base._DeprecatedProperty('linksGranularity', lambda self: self.links.granularity, 'links.granularity') + filesGranularity = snscrape.base._DeprecatedProperty('filesGranularity', lambda self: self.files.granularity, 'files.granularity') def __str__(self): return f'https://t.me/s/{self.username}' @@ -186,7 +187,7 @@ class TelegramChannelScraper(snscrape.base.Scraper): # Already extracted more accurately from /channel, skip continue elif type_ in ('photos', 'videos', 'links', 'files'): - kwargs[type_], kwargs[f'{type_}Granularity'] = value, granularity + kwargs[type_] = snscrape.base.IntWithGranularity(value, granularity) return Channel(**kwargs) diff --git a/snscrape/modules/vkontakte.py b/snscrape/modules/vkontakte.py index 89fa63c..0feb834 100644 --- a/snscrape/modules/vkontakte.py +++ b/snscrape/modules/vkontakte.py @@ -29,16 +29,17 @@ class User(snscrape.base.Entity): verified: bool description: typing.Optional[str] = None websites: typing.Optional[typing.List[str]] = None - followers: typing.Optional[int] = None - followersGranularity: typing.Optional[snscrape.base.Granularity] = None - posts: typing.Optional[int] = None - postsGranularity: typing.Optional[snscrape.base.Granularity] = None - photos: typing.Optional[int] = None - photosGranularity: typing.Optional[snscrape.base.Granularity] = None - tags: typing.Optional[int] = None - tagsGranularity: typing.Optional[snscrape.base.Granularity] = None - following: typing.Optional[int] = None - followingGranularity: typing.Optional[snscrape.base.Granularity] = None + followers: typing.Optional[snscrape.base.IntWithGranularity] = None + posts: typing.Optional[snscrape.base.IntWithGranularity] = None + photos: typing.Optional[snscrape.base.IntWithGranularity] = None + tags: typing.Optional[snscrape.base.IntWithGranularity] = None + following: typing.Optional[snscrape.base.IntWithGranularity] = None + + followersGranularity = snscrape.base._DeprecatedProperty('followersGranularity', lambda self: self.followers.granularity, 'followers.granularity') + postsGranularity = snscrape.base._DeprecatedProperty('postsGranularity', lambda self: self.posts.granularity, 'posts.granularity') + photosGranularity = snscrape.base._DeprecatedProperty('photosGranularity', lambda self: self.photos.granularity, 'photos.granularity') + tagsGranularity = snscrape.base._DeprecatedProperty('tagsGranularity', lambda self: self.tags.granularity, 'tags.granularity') + followingGranularity = snscrape.base._DeprecatedProperty('followingGranularity', lambda self: self.following.granularity, 'following.granularity') def __str__(self): return f'https://vk.com/{self.username}' @@ -204,16 +205,16 @@ class VKontakteUserScraper(snscrape.base.Scraper): if label in ('follower', 'post', 'photo', 'tag'): label = f'{label}s' if label in ('followers', 'posts', 'photos', 'tags'): - kwargs[label], kwargs[f'{label}Granularity'] = count, granularity + kwargs[label] = snscrape.base.IntWithGranularity(count, granularity) if (idolsDiv := soup.find('div', id = 'profile_idols')): if (topDiv := idolsDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Following': - kwargs['following'], kwargs['followingGranularity'] = parse_num(topDiv.find('span', class_ = 'header_count').text) + kwargs['following'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text)) # On public pages, this is where followers are listed if (followersDiv := soup.find('div', id = 'public_followers')): if (topDiv := followersDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Followers': - kwargs['followers'], kwargs['followersGranularity'] = parse_num(topDiv.find('span', class_ = 'header_count').text) + kwargs['followers'] = snscrape.base.IntWithGranularity(*parse_num(topDiv.find('span', class_ = 'header_count').text)) return User(**kwargs)