From 892941b609e9a995748a871aeaf211bddf429827 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sun, 13 Dec 2020 23:22:17 +0000 Subject: [PATCH] Fix crash on reposts of hidden profiles --- snscrape/modules/vkontakte.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/snscrape/modules/vkontakte.py b/snscrape/modules/vkontakte.py index a87e9dd..7b0d9ba 100644 --- a/snscrape/modules/vkontakte.py +++ b/snscrape/modules/vkontakte.py @@ -140,7 +140,11 @@ class VKontakteUserScraper(snscrape.base.Scraper): logger.warning(f'Could not parse date string: {dateSpan.text!r}') def _post_div_to_item(self, post, isCopy = False): - url = urllib.parse.urljoin(self._baseUrl, post.find('a', class_ = 'post_link' if not isCopy else 'published_by_date')['href']) + postLink = post.find('a', class_ = 'post_link' if not isCopy else 'published_by_date') + if not postLink: + logger.warning(f'Skipping post without link: {str(post)[:200]!r}') + return + url = urllib.parse.urljoin(self._baseUrl, postLink['href']) assert (url.startswith('https://vk.com/wall') or (isCopy and (url.startswith('https://vk.com/video') or url.startswith('https://vk.com/photo')))) and '_' in url and url[-1] != '_' and url.rsplit('_', 1)[1].strip('0123456789') == '' if not isCopy: dateSpan = post.find('div', class_ = 'post_date').find('span', class_ = 'rel_date')