mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-10 19:38:29 +03:00
Merge pull request #518 from hgrsd/fix/vkontakte-photo-scrape
fix(vkontakte): update photo detection
This commit is contained in:
@@ -117,6 +117,9 @@ class VKontakteUserScraper(snscrape.base.Scraper):
|
||||
return urllib.parse.unquote(a['href'][13 : end])
|
||||
return None
|
||||
|
||||
def is_photo(self, a):
|
||||
return 'aria-label' in a.attrs and a.attrs['aria-label'].startswith('photo')
|
||||
|
||||
def _date_span_to_date(self, dateSpan):
|
||||
if not dateSpan:
|
||||
return None
|
||||
@@ -172,7 +175,7 @@ class VKontakteUserScraper(snscrape.base.Scraper):
|
||||
not (not isCopy and thumbsDiv.parent.name == 'div' and 'class' in thumbsDiv.parent.attrs and 'copy_quote' in thumbsDiv.parent.attrs['class']): # Skip post quotes
|
||||
photos = []
|
||||
for a in thumbsDiv.find_all('a', class_ = 'page_post_thumb_wrap'):
|
||||
if 'data-photo-id' not in a.attrs and 'data-video' not in a.attrs:
|
||||
if not self.is_photo(a) and 'data-video' not in a.attrs:
|
||||
_logger.warning(f'Skipping non-photo and non-video thumb wrap on {url}')
|
||||
continue
|
||||
if 'data-video' in a.attrs:
|
||||
|
||||
Reference in New Issue
Block a user