From b0a52e5ad7d600645951247a6e91a724f83cd677 Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Sat, 2 Apr 2022 21:26:29 -0500 Subject: [PATCH 1/2] handled case where Rumble video has no view information displayed --- cisticola/scraper/rumble.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index 737be05..37f1b53 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -94,11 +94,17 @@ def process_video(video): else: rumbles = rumble_soup['data-value'] + view_span = video.find('span', {'class' : 'video-item--views'}) + if view_span is None: + views = None + else: + views = view_span.get('data-value') + info = { 'title' : video.find('h3').text, 'thumbnail' : video.find('img')['src'], 'link' : BASE_URL + video.find('a', href = True)['href'], - 'views' : video.find('span', {'class' : 'video-item--views'})['data-value'], + 'views' : views, 'rumbles' : rumbles, 'duration' : video.find('span', {'class' : 'video-item--duration'})['data-value'], 'datetime' : datetime.fromisoformat(video.find('time')['datetime'])} @@ -119,7 +125,7 @@ def get_channel_videos(url): if r.status_code == 404: break - soup = BeautifulSoup(r.content, features = 'lxml') + soup = BeautifulSoup(r.content, features = 'html.parser') video_list = soup.find_all('li', {'class' : 'video-listing-entry'}) From 90c99aec0008c0a035b927259f504c46c1f5ecff Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Sat, 2 Apr 2022 22:36:25 -0500 Subject: [PATCH 2/2] ensured that Gettr username is lowercase for API requests to work correctly --- cisticola/scraper/gettr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py index 89ed35f..f785771 100644 --- a/cisticola/scraper/gettr.py +++ b/cisticola/scraper/gettr.py @@ -23,7 +23,7 @@ class GettrScraper(Scraper): @logger.catch def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: client = PublicClient() - username = self.get_username_from_url(channel.url) + username = self.get_username_from_url(channel.url).lower() scraper = client.user_activity(username=username, type="posts") for post in scraper: