From 0b1c175dd9579f67c597bcd3d63db3b7220251ac Mon Sep 17 00:00:00 2001
From: Logan Williams <logan.williams@alum.mit.edu>
Date: Thu, 24 Feb 2022 20:25:14 +0100
Subject: [PATCH] Modify GettrScraper to yield results, archive media (videos
 incomplete)

---
 cisticola/base.py          |  2 +-
 cisticola/scraper/gettr.py | 32 ++++++++++++++++++++++++--------
 test.py                    |  9 +++++++--
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/cisticola/base.py b/cisticola/base.py
index d2913e2..03a1641 100644
--- a/cisticola/base.py
+++ b/cisticola/base.py
@@ -12,7 +12,7 @@ class ScraperResult:
 
     scraper: str
     platform: str
-    channel: int
+    channel: int #TODO there is probably a way of making this a Channel object foreign key
     platform_id: str
     date: datetime
     raw_data: str
diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py
index c656549..5ae7d96 100644
--- a/cisticola/scraper/gettr.py
+++ b/cisticola/scraper/gettr.py
@@ -1,10 +1,11 @@
 import cisticola.base
+import cisticola.scraper.base
 from datetime import datetime
 import json
 from typing import List
 from gogettr import PublicClient
 
-class GettrScraper(cisticola.scraper.Scraper):
+class GettrScraper(cisticola.scraper.base.Scraper):
     """An implementation of a Scraper for Gettr, using gogettr library"""
     __version__ = "GettrScraper 0.0.1"
 
@@ -16,25 +17,40 @@ class GettrScraper(cisticola.scraper.Scraper):
         return username
 
     def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> List[cisticola.base.ScraperResult]:
-        posts = []
         client = PublicClient()
         username = GettrScraper.get_username_from_url(channel.url)
         scraper = client.user_activity(username=username, type="posts")
 
         for post in scraper:
-            if since is not None and post['cdate'] <= int(since.date_archived.timestamp()):
+            if since is not None and datetime.fromtimestamp(post['cdate']*0.001) <= since.date:
                 break
 
-            posts.append(cisticola.base.ScraperResult(
+            archived_urls = {}
+
+            if 'imgs' in post:
+                for img in post['imgs']:
+                    url = "https://media.gettr.com/" + img
+                    archived_url = self.archive_media(url)
+                    archived_urls[img] = archived_url
+
+            if 'main' in post:
+                archived_url = self.archive_media("https://media.gettr.com/" + post['main'])
+                archived_urls[post['main']] = archived_url
+
+            # TODO this is just archiving the playlist file, not the actual video
+            if 'vid' in post:
+                archived_url = self.archive_media("https://media.gettr.com/" + post['vid'])
+                archived_urls[post['vid']] = archived_url
+
+            yield cisticola.base.ScraperResult(
                 scraper=self.__version__,
                 platform="Gettr",
-                channel=username,
+                channel=channel.id,
                 platform_id=post['_id'],
                 date=datetime.fromtimestamp(post['cdate']/1000.),
                 date_archived=datetime.now(),
-                raw_data=json.dumps(post)))
-
-        return posts
+                raw_data=json.dumps(post),
+                archived_urls=archived_urls)
 
     def can_handle(self, channel):
         if channel.platform == "Gettr" and GettrScraper.get_username_from_url(channel.url) is not None:
diff --git a/test.py b/test.py
index 0e4a6e0..9c60fb0 100644
--- a/test.py
+++ b/test.py
@@ -1,11 +1,13 @@
 import cisticola
 import cisticola.scraper.telegram_snscrape
 import cisticola.scraper.twitter
+import cisticola.scraper.gettr
 
 from sqlalchemy import create_engine
 
 
-test_channels = [cisticola.base.Channel(id=0, name="Logan Williams (test)", platform_id=891729132,
+test_channels = [
+    cisticola.base.Channel(id=0, name="Logan Williams (test)", platform_id=891729132,
                                    category="test", followers=None, platform="Twitter",
                                    url="https://twitter.com/obtusatum", screenname="obtusatum", country="US",
                                    influencer=None, public=True, chat=False,
@@ -32,7 +34,10 @@ controller.register_scraper(twitter)
 telegram = cisticola.scraper.telegram_snscrape.TelegramSnscrapeScraper()
 controller.register_scraper(telegram)
 
-engine = create_engine('sqlite:///test.db')
+gettr = cisticola.scraper.gettr.GettrScraper()
+controller.register_scraper(gettr)
+
+engine = create_engine('sqlite:///test3.db')
 controller.connect_to_db(engine)
 
 controller.scrape_channels(test_channels)