From ff54c350bc134b9f4b598b74bbfa4a03161b4919 Mon Sep 17 00:00:00 2001 From: JackDallas <3620144+JackDallas@users.noreply.github.com> Date: Fri, 30 Aug 2019 12:43:02 +0100 Subject: [PATCH 1/2] Add more fields to the instagram scraper --- snscrape/modules/instagram.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/snscrape/modules/instagram.py b/snscrape/modules/instagram.py index fac1a2e..cc9cec4 100644 --- a/snscrape/modules/instagram.py +++ b/snscrape/modules/instagram.py @@ -16,6 +16,11 @@ class InstagramPost(typing.NamedTuple, snscrape.base.Item): content: str thumbnailUrl: str displayUrl: str + username: str + likes: int + comments: int + commentsDisabled: bool + isVideo: bool def __str__(self): return self.cleanUrl @@ -57,7 +62,8 @@ class InstagramCommonScraper(snscrape.base.Scraper): def _response_to_items(self, response): for node in response[self._responseContainer][self._edgeXToMedia]['edges']: code = node['node']['shortcode'] - usernameQuery = '?taken-by=' + node['node']['owner']['username'] if 'username' in node['node']['owner'] else '' + username = node['node']['owner']['username'] if 'username' in node['node']['owner'] else '' + usernameQuery = '?taken-by=' + username cleanUrl = f'https://www.instagram.com/p/{code}/' yield InstagramPost( cleanUrl = cleanUrl, @@ -66,6 +72,11 @@ class InstagramCommonScraper(snscrape.base.Scraper): content = node['node']['edge_media_to_caption']['edges'][0]['node']['text'] if len(node['node']['edge_media_to_caption']['edges']) else None, thumbnailUrl = node['node']['thumbnail_src'], displayUrl = node['node']['display_url'], + username = username, + likes = node['node']['edge_liked_by']['count'], + comments = node['node']['edge_media_to_comment']['count'], + commentsDisabled = node['node']['comments_disabled'], + isVideo = node['node']['is_video'], ) def _check_initial_page_callback(self, r): From 9568028bf9ccfd63ebc2e170f6cc978b49970313 Mon Sep 17 00:00:00 2001 From: Jack Dallas <3620144+JackDallas@users.noreply.github.com> Date: Fri, 7 Feb 2020 11:30:16 +0000 Subject: [PATCH 2/2] Update changed fields --- snscrape/modules/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snscrape/modules/instagram.py b/snscrape/modules/instagram.py index cc9cec4..31642e7 100644 --- a/snscrape/modules/instagram.py +++ b/snscrape/modules/instagram.py @@ -73,7 +73,7 @@ class InstagramCommonScraper(snscrape.base.Scraper): thumbnailUrl = node['node']['thumbnail_src'], displayUrl = node['node']['display_url'], username = username, - likes = node['node']['edge_liked_by']['count'], + likes = node['node']['edge_media_preview_like']['count'], comments = node['node']['edge_media_to_comment']['count'], commentsDisabled = node['node']['comments_disabled'], isVideo = node['node']['is_video'],