mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-10 03:18:29 +03:00
Fix vkontakte-user pagination
This commit is contained in:
@@ -80,14 +80,15 @@ class VKontakteUserScraper(snscrape.base.Scraper):
|
||||
if r.status_code != 200:
|
||||
logger.error(f'Got status code {r.status_code}')
|
||||
return
|
||||
fields = r.content.split(b'<!>')
|
||||
if fields[5].startswith(b'<div class="page_block no_posts">'):
|
||||
# Convert to JSON and read the HTML payload. Note that this implicitly converts the data to a Python string (i.e., Unicode), away from a windows-1251-encoded bytes.
|
||||
posts = r.json()['payload'][1][0]
|
||||
if posts.startswith('<div class="page_block no_posts">'):
|
||||
# Reached the end
|
||||
break
|
||||
if not fields[5].startswith(b'<div id="post'):
|
||||
logger.error(f'Got an unknown response: {fields[5][:200]!r}...')
|
||||
if not posts.startswith('<div id="post'):
|
||||
logger.error(f'Got an unknown response: {posts[:200]!r}...')
|
||||
break
|
||||
soup = bs4.BeautifulSoup(fields[5], 'lxml', from_encoding = r.encoding)
|
||||
soup = bs4.BeautifulSoup(posts, 'lxml')
|
||||
yield from self._soup_to_items(soup, baseUrl)
|
||||
|
||||
@classmethod
|
||||
|
||||
Reference in New Issue
Block a user