mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-13 05:48:33 +03:00
Merge branch 'main' into initial-release
This commit is contained in:
@@ -23,7 +23,7 @@ class GettrScraper(Scraper):
|
|||||||
@logger.catch
|
@logger.catch
|
||||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||||
client = PublicClient()
|
client = PublicClient()
|
||||||
username = self.get_username_from_url(channel.url)
|
username = self.get_username_from_url(channel.url).lower()
|
||||||
scraper = client.user_activity(username=username, type="posts")
|
scraper = client.user_activity(username=username, type="posts")
|
||||||
|
|
||||||
for post in scraper:
|
for post in scraper:
|
||||||
|
|||||||
@@ -94,11 +94,17 @@ def process_video(video):
|
|||||||
else:
|
else:
|
||||||
rumbles = rumble_soup['data-value']
|
rumbles = rumble_soup['data-value']
|
||||||
|
|
||||||
|
view_span = video.find('span', {'class' : 'video-item--views'})
|
||||||
|
if view_span is None:
|
||||||
|
views = None
|
||||||
|
else:
|
||||||
|
views = view_span.get('data-value')
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'title' : video.find('h3').text,
|
'title' : video.find('h3').text,
|
||||||
'thumbnail' : video.find('img')['src'],
|
'thumbnail' : video.find('img')['src'],
|
||||||
'link' : BASE_URL + video.find('a', href = True)['href'],
|
'link' : BASE_URL + video.find('a', href = True)['href'],
|
||||||
'views' : video.find('span', {'class' : 'video-item--views'})['data-value'],
|
'views' : views,
|
||||||
'rumbles' : rumbles,
|
'rumbles' : rumbles,
|
||||||
'duration' : video.find('span', {'class' : 'video-item--duration'})['data-value'],
|
'duration' : video.find('span', {'class' : 'video-item--duration'})['data-value'],
|
||||||
'datetime' : datetime.fromisoformat(video.find('time')['datetime'])}
|
'datetime' : datetime.fromisoformat(video.find('time')['datetime'])}
|
||||||
@@ -119,7 +125,7 @@ def get_channel_videos(url):
|
|||||||
if r.status_code == 404:
|
if r.status_code == 404:
|
||||||
break
|
break
|
||||||
|
|
||||||
soup = BeautifulSoup(r.content, features = 'lxml')
|
soup = BeautifulSoup(r.content, features = 'html.parser')
|
||||||
|
|
||||||
video_list = soup.find_all('li', {'class' : 'video-listing-entry'})
|
video_list = soup.find_all('li', {'class' : 'video-listing-entry'})
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user