diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 3a399ec..4d31370 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -1,4 +1,4 @@ -from typing import List +from typing import Generator import cisticola.base import requests import os @@ -55,5 +55,5 @@ class Scraper: def can_handle(self, channel: cisticola.base.Channel) -> bool: pass - def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]: + def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]: pass diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index ed144dc..c5c9f66 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -4,7 +4,7 @@ import re from html.parser import HTMLParser import dateparser import json -from typing import List +from typing import Generator import requests from bs4 import BeautifulSoup @@ -23,7 +23,7 @@ class BitchuteScraper(cisticola.scraper.Scraper): return username - def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]: + def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]: session = requests.Session() session.headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0" diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py index 5ae7d96..9a52a69 100644 --- a/cisticola/scraper/gettr.py +++ b/cisticola/scraper/gettr.py @@ -2,7 +2,7 @@ import cisticola.base import cisticola.scraper.base from datetime import datetime import json -from typing import List +from typing import Generator from gogettr import PublicClient class GettrScraper(cisticola.scraper.base.Scraper): @@ -16,7 +16,7 @@ class GettrScraper(cisticola.scraper.base.Scraper): return username - def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]: + def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]: client = PublicClient() username = GettrScraper.get_username_from_url(channel.url) scraper = client.user_activity(username=username, type="posts") diff --git a/cisticola/scraper/telegram_snscrape.py b/cisticola/scraper/telegram_snscrape.py index c76910f..bba9276 100644 --- a/cisticola/scraper/telegram_snscrape.py +++ b/cisticola/scraper/telegram_snscrape.py @@ -1,6 +1,6 @@ import cisticola.base import cisticola.scraper.base -from typing import List +from typing import Generator import snscrape.modules from datetime import datetime, timezone @@ -12,7 +12,7 @@ class TelegramSnscrapeScraper(cisticola.scraper.base.Scraper): if channel.platform == "Telegram" and channel.public and not channel.chat: return True - def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None): + def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]: scr = snscrape.modules.telegram.TelegramChannelScraper( channel.screenname) diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py index 4793b49..a43365c 100644 --- a/cisticola/scraper/twitter.py +++ b/cisticola/scraper/twitter.py @@ -1,7 +1,7 @@ import cisticola.base import cisticola.scraper.base from datetime import datetime, timezone -from typing import List +from typing import Generator import snscrape.modules from loguru import logger @@ -10,7 +10,7 @@ class TwitterScraper(cisticola.scraper.base.Scraper): """An implementation of a Scraper for Twitter, using snscrape library""" __version__ = "TwitterScraper 0.0.1" - def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]: + def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]: scraper = snscrape.modules.twitter.TwitterProfileScraper(channel.platform_id) first = True @@ -22,7 +22,6 @@ class TwitterScraper(cisticola.scraper.base.Scraper): first = False continue else: - print('too far') break archived_urls = {}