Fix vkontakte-user pagination

This commit is contained in:
Jody Leonard
2019-10-30 20:39:27 -04:00
parent e6aae35304
commit 20ea117a2c

View File

@@ -80,14 +80,15 @@ class VKontakteUserScraper(snscrape.base.Scraper):
if r.status_code != 200:
logger.error(f'Got status code {r.status_code}')
return
fields = r.content.split(b'<!>')
if fields[5].startswith(b'<div class="page_block no_posts">'):
# Convert to JSON and read the HTML payload. Note that this implicitly converts the data to a Python string (i.e., Unicode), away from a windows-1251-encoded bytes.
posts = r.json()['payload'][1][0]
if posts.startswith('<div class="page_block no_posts">'):
# Reached the end
break
if not fields[5].startswith(b'<div id="post'):
logger.error(f'Got an unknown response: {fields[5][:200]!r}...')
if not posts.startswith('<div id="post'):
logger.error(f'Got an unknown response: {posts[:200]!r}...')
break
soup = bs4.BeautifulSoup(fields[5], 'lxml', from_encoding = r.encoding)
soup = bs4.BeautifulSoup(posts, 'lxml')
yield from self._soup_to_items(soup, baseUrl)
@classmethod