diff --git a/cisticola/scraper/gab.py b/cisticola/scraper/gab.py index 2307ca5..d1b6fbb 100644 --- a/cisticola/scraper/gab.py +++ b/cisticola/scraper/gab.py @@ -17,21 +17,34 @@ class GabScraper(Scraper): return username + def get_group_id_from_url(self, url): + group_id = int(url.split('/')[-1]) + + return group_id + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: client = Client( username = os.environ['GAB_USER'], password = os.environ['GAB_PASS'], threads = 25) - username = self.get_username_from_url(channel.url) + if channel.url.split('/')[-2] == 'groups': - result = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json() - user_id = int(result['id']) + group_id = self.get_group_id_from_url(url = channel.url) + scraper = client.pull_group_posts( + id = group_id, + depth = float('inf')) + else: - scraper = client.pull_statuses( - id = user_id, - created_after = date.min, - replies = False) + username = self.get_username_from_url(channel.url) + + result = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json() + user_id = int(result['id']) + + scraper = client.pull_statuses( + id = user_id, + created_after = date.min, + replies = False) for post in scraper: if since is not None and datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc): @@ -82,8 +95,15 @@ class GabScraper(Scraper): password = os.environ['GAB_PASS'], threads = 25) - username = self.get_username_from_url(channel.url) + if channel.url.split('/')[-2] == 'groups': - profile = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json() + group_id = self.get_group_id_from_url(url = channel.url) + profile = client.pull_group(id = group_id) + + else: + + username = self.get_username_from_url(channel.url) + + profile = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json() return profile \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 962fbed..684c15d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,6 +33,19 @@ GAB_CHANNEL_KWARGS = { 'chat': False, 'notes': ''} +GAB_GROUP_KWARGS = { + 'name': 'iran group (test)', + 'platform_id': 10001, + 'category': 'test', + 'platform': 'Gab', + 'url': 'https://gab.com/groups/10001', + 'screenname': 'iran group', + 'country': 'IR', + 'influencer': None, + 'public': True, + 'chat': True, + 'notes': ''} + GETTR_CHANNEL_KWARGS = { 'name': 'LizardRepublic (test)', 'platform_id': 'lizardrepublic', @@ -178,6 +191,7 @@ def channel_kwargs(): return { 'bitchute' : BITCHUTE_CHANNEL_KWARGS, 'gab' : GAB_CHANNEL_KWARGS, + 'gab_group' : GAB_GROUP_KWARGS, 'gettr' : GETTR_CHANNEL_KWARGS, 'instagram' : INSTAGRAM_CHANNEL_KWARGS, 'odysee' : ODYSEE_CHANNEL_KWARGS, diff --git a/tests/scraper/gab.py b/tests/scraper/gab.py index ed9d32a..d600429 100644 --- a/tests/scraper/gab.py +++ b/tests/scraper/gab.py @@ -23,4 +23,26 @@ def test_scrape_gab_profile(channel_kwargs): scraper = GabScraper() channel = Channel(**channel_kwargs['gab']) + scraper.get_profile(channel=channel) + +def test_scrape_gab_group_no_media(controller, channel_kwargs): + + channels = [Channel(**channel_kwargs['gab_group'])] + controller.register_scraper(scraper = GabScraper()) + controller.scrape_channels(channels = channels, archive_media = False) + +@pytest.mark.media +def test_scrape_gab_group(controller, channel_kwargs): + + controller.reset_db() + + channels = [Channel(**channel_kwargs['gab_group'])] + controller.register_scraper(scraper = GabScraper()) + controller.scrape_channels(channels = channels, archive_media = True) + +@pytest.mark.profile +def test_scrape_gab_group_profile(channel_kwargs): + + scraper = GabScraper() + channel = Channel(**channel_kwargs['gab_group']) scraper.get_profile(channel=channel) \ No newline at end of file