mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-11 12:58:33 +03:00
implemented Gettr scraper
This commit is contained in:
1
Pipfile
1
Pipfile
@@ -7,6 +7,7 @@ name = "pypi"
|
||||
sqlalchemy = "*"
|
||||
snscrape = "*"
|
||||
loguru = "*"
|
||||
gogettr = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
|
||||
24
Pipfile.lock
generated
24
Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "8d3e1f9f21d8d86b560a55f660d1eb089287706e81fc8d21ba66f52827861b73"
|
||||
"sha256": "e335358892de4b581de211099e214f370f8cfd1f86b2cd2b3f0ea6d2d43313bb"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -39,6 +39,14 @@
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.12"
|
||||
},
|
||||
"click": {
|
||||
"hashes": [
|
||||
"sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3",
|
||||
"sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==8.0.3"
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
"sha256:9cd540a9352e432c7246a48fe4e8712b10acb1df2ad1f30e8c070b82ae1fed85",
|
||||
@@ -47,6 +55,14 @@
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.6.0"
|
||||
},
|
||||
"gogettr": {
|
||||
"hashes": [
|
||||
"sha256:9f5c90e3b1befe6eb561d4bca9ca124faddbe5787d8b429f02703c68dd51d255",
|
||||
"sha256:eebdd76e5bb67e3848905f9d6841a9a897a44944edf2b6e1a5a1e5e8fb2110c1"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.8.0"
|
||||
},
|
||||
"greenlet": {
|
||||
"hashes": [
|
||||
"sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3",
|
||||
@@ -105,7 +121,7 @@
|
||||
"sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c",
|
||||
"sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))",
|
||||
"version": "==1.1.2"
|
||||
},
|
||||
"idna": {
|
||||
@@ -200,9 +216,7 @@
|
||||
"version": "==1.7.1"
|
||||
},
|
||||
"requests": {
|
||||
"extras": [
|
||||
"socks"
|
||||
],
|
||||
"extras": [],
|
||||
"hashes": [
|
||||
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
|
||||
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
|
||||
|
||||
40
cisticola/scraper/gettr.py
Normal file
40
cisticola/scraper/gettr.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import cisticola.base
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import List
|
||||
from gogettr import PublicClient
|
||||
|
||||
class GettrScraper(cisticola.scraper.Scraper):
|
||||
"""An implementation of a Scraper for Gettr, using gogettr library"""
|
||||
__version__ = "GettrScraper 0.0.1"
|
||||
|
||||
def get_username_from_url(url):
|
||||
username = url.split("gettr.com/user/")[1]
|
||||
if len(username.split("/")) > 1:
|
||||
return None
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]:
|
||||
posts = []
|
||||
client = PublicClient()
|
||||
username = GettrScraper.get_username_from_url(channel.url)
|
||||
scraper = client.user_activity(username=username, type="posts")
|
||||
|
||||
for post in scraper:
|
||||
if since is not None and post['cdate'] <= int(since.date_archived.timestamp()):
|
||||
break
|
||||
|
||||
posts.append(cisticola.base.ScraperResult(scraper=self.__version__,
|
||||
platform="Gettr",
|
||||
channel=username,
|
||||
platform_id=post['_id'],
|
||||
date=datetime.fromtimestamp(post['cdate']/1000.),
|
||||
date_archived=datetime.now(),
|
||||
raw_data=json.dumps(post)))
|
||||
|
||||
return posts
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Gettr" and GettrScraper.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
6
test.py
6
test.py
@@ -15,7 +15,11 @@ test_channels = [cisticola.base.Channel(id=0, name="Logan Williams (test)", plat
|
||||
cisticola.base.Channel(id=1, name="JQHN SPARTAN", platform_id=-1001181961026,
|
||||
category="qanon", followers=None, platform="Telegram",
|
||||
url="https://t.me/jqhnspartan", country="FR",
|
||||
influencer="JQNH SPARTAN", public=True, chat=False, notes="")]
|
||||
influencer="JQNH SPARTAN", public=True, chat=False, notes=""),
|
||||
cisticola.base.Channel(id=2, name="LizardRepublic", platform_id='lizardrepublic',
|
||||
category="qanon", followers=None, platform="Gettr",
|
||||
url="https://www.gettr.com/user/lizardrepublic", country="US",
|
||||
influencer=None, public=True, chat=False, notes=""),]
|
||||
|
||||
|
||||
controller = cisticola.ScraperController()
|
||||
|
||||
Reference in New Issue
Block a user