mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-12 20:38:29 +03:00
Remove dirtyUrl which does not appear to be used anymore by Instagram
#234
This commit is contained in:
@@ -13,8 +13,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class InstagramPost(snscrape.base.Item):
|
class InstagramPost(snscrape.base.Item):
|
||||||
cleanUrl: str
|
url: str
|
||||||
dirtyUrl: str
|
|
||||||
date: datetime.datetime
|
date: datetime.datetime
|
||||||
content: str
|
content: str
|
||||||
thumbnailUrl: str
|
thumbnailUrl: str
|
||||||
@@ -26,7 +25,7 @@ class InstagramPost(snscrape.base.Item):
|
|||||||
isVideo: bool
|
isVideo: bool
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.cleanUrl
|
return self.url
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
@@ -85,11 +84,9 @@ class InstagramCommonScraper(snscrape.base.Scraper):
|
|||||||
for node in response[self._responseContainer][self._edgeXToMedia]['edges']:
|
for node in response[self._responseContainer][self._edgeXToMedia]['edges']:
|
||||||
code = node['node']['shortcode']
|
code = node['node']['shortcode']
|
||||||
username = node['node']['owner']['username'] if 'username' in node['node']['owner'] else None
|
username = node['node']['owner']['username'] if 'username' in node['node']['owner'] else None
|
||||||
usernameQuery = '?taken-by=' + (username or '')
|
url = f'https://www.instagram.com/p/{code}/'
|
||||||
cleanUrl = f'https://www.instagram.com/p/{code}/'
|
|
||||||
yield InstagramPost(
|
yield InstagramPost(
|
||||||
cleanUrl = cleanUrl,
|
url = url,
|
||||||
dirtyUrl = f'{cleanUrl}{usernameQuery}',
|
|
||||||
date = datetime.datetime.fromtimestamp(node['node']['taken_at_timestamp'], datetime.timezone.utc),
|
date = datetime.datetime.fromtimestamp(node['node']['taken_at_timestamp'], datetime.timezone.utc),
|
||||||
content = node['node']['edge_media_to_caption']['edges'][0]['node']['text'] if len(node['node']['edge_media_to_caption']['edges']) else None,
|
content = node['node']['edge_media_to_caption']['edges'][0]['node']['text'] if len(node['node']['edge_media_to_caption']['edges']) else None,
|
||||||
thumbnailUrl = node['node']['thumbnail_src'],
|
thumbnailUrl = node['node']['thumbnail_src'],
|
||||||
|
|||||||
Reference in New Issue
Block a user