mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-12 12:28:28 +03:00
fixed edge case where channel with no members fails _get_entity
This commit is contained in:
@@ -270,8 +270,12 @@ class TelegramChannelScraper(snscrape.base.Scraper):
|
|||||||
raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
|
raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
|
||||||
soup = bs4.BeautifulSoup(r.text, 'lxml')
|
soup = bs4.BeautifulSoup(r.text, 'lxml')
|
||||||
membersDiv = soup.find('div', class_ = 'tgme_page_extra')
|
membersDiv = soup.find('div', class_ = 'tgme_page_extra')
|
||||||
if membersDiv.text.endswith((' members', ' subscribers')):
|
if membersDiv.text.split(',')[0].endswith((' members', ' subscribers')):
|
||||||
kwargs['members'] = int(''.join(membersDiv.text.split(' ')[:-1]))
|
membersStr = ''.join(membersDiv.text.split(',')[0].split(' ')[:-1])
|
||||||
|
if membersStr == 'no':
|
||||||
|
kwargs['members'] = 0
|
||||||
|
else:
|
||||||
|
kwargs['members'] = int(membersStr)
|
||||||
photoImg = soup.find('img', class_ = 'tgme_page_photo_image')
|
photoImg = soup.find('img', class_ = 'tgme_page_photo_image')
|
||||||
if photoImg is not None:
|
if photoImg is not None:
|
||||||
kwargs['photo'] = photoImg.attrs['src']
|
kwargs['photo'] = photoImg.attrs['src']
|
||||||
|
|||||||
Reference in New Issue
Block a user