mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-12 20:38:29 +03:00
added capability to extract the number of channel members when the the string in membersDiv has the word 'subscribers' rather than 'members'.
This commit is contained in:
@@ -177,8 +177,8 @@ class TelegramChannelScraper(snscrape.base.Scraper):
|
|||||||
raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
|
raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
|
||||||
soup = bs4.BeautifulSoup(r.text, 'lxml')
|
soup = bs4.BeautifulSoup(r.text, 'lxml')
|
||||||
membersDiv = soup.find('div', class_ = 'tgme_page_extra')
|
membersDiv = soup.find('div', class_ = 'tgme_page_extra')
|
||||||
if membersDiv.text.endswith(' members'):
|
if membersDiv.text.endswith((' members', ' subscribers')):
|
||||||
kwargs['members'] = int(membersDiv.text[:-8].replace(' ', ''))
|
kwargs['members'] = int(''.join(membersDiv.text.split(' ')[:-1]))
|
||||||
kwargs['photo'] = soup.find('img', class_ = 'tgme_page_photo_image').attrs['src']
|
kwargs['photo'] = soup.find('img', class_ = 'tgme_page_photo_image').attrs['src']
|
||||||
|
|
||||||
r, soup = self._initial_page()
|
r, soup = self._initial_page()
|
||||||
|
|||||||
Reference in New Issue
Block a user