fixed problems in recommendationengine class, updated README

2026-06-07 19:08:33 +03:00 · 2022-08-09 09:45:40 -05:00
parent b18e5591fa
commit cb30169ece
6 changed files with 60 additions and 43 deletions
--- a/README.md
+++ b/README.md
@@ -4,8 +4,4 @@ Scraper for alt-tech video sharing platform [Odysee](https://odysee.com/).
 ### TODO
 - Implement CLI
- Profile run-time, look into implementing async requests
+- Profile run-time, look into implementing async requests
 - Add error handling/backoff waiting to requests
 - Implement basic test suite
 - Formaize network graph generation into class/module
 - Work on reverse-engineering auth_token instead of having it hard-coded
--- a/examples/generate_network.py
+++ b/examples/generate_network.py
@@ -24,7 +24,7 @@ if __name__ == '__main__':
    engine = polyphemus.base.RecommendationEngine(channel_list= [CHANNEL_NAME])
-    weighted_edge_list, claim_id_to_video = engine.generate(iterations = 1)
+    weighted_edge_list, channels, claim_id_to_video = engine.generate(iterations = ITERATIONS)
    G = nx.DiGraph()
    G.add_weighted_edges_from(weighted_edge_list)
--- a/polyphemus/_cli.py
+++ b/polyphemus/_cli.py
@@ -0,0 +1,8 @@
 # -*- coding: UTF-8 -*-
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
 from . import api
 from . import base 
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
--- a/polyphemus/api.py
+++ b/polyphemus/api.py
@@ -68,7 +68,7 @@ def make_request(request: Callable, kwargs: dict) -> requests.Response:
    retry_reasons = []
    # TODO this looks a bit gross, try to refactor
-    while n_retries < 5:
+    while n_retries < 10:
        time.sleep(2 ** n_retries - 1)
        try:
            response = request(**kwargs)
--- a/polyphemus/base.py
+++ b/polyphemus/base.py
@@ -218,7 +218,7 @@ def process_raw_video_info(raw_video_info: dict, auth_token: str = None, additio
        channel_id = channel_id,
        channel_name = channel_name,
        claim_id = raw_video_info['claim_id'],
-        created = datetime.fromtimestamp(int(created)),
+        created = datetime.fromtimestamp(max(int(created), 0)),
        text = raw_video_info['value'].get('description'),
        languages = raw_video_info['value'].get('languages'),
        tags = raw_video_info['value'].get('tags',[]),
@@ -269,14 +269,15 @@ class RecommendationEngine:
    #-------------------------------------------------------------------------#
    def generate(self, iterations = 1):
-        
+
-        for channel_name in self.channel_list:
+        if not self.new_videos:
-            print(channel_name)
+            for channel_name in self.channel_list:
-            scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token)
+                print(channel_name)
-            
+                scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token)
-            self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False)))
+                
-            
+                self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False)))
-        self.claim_id_to_video = dict(zip([v.claim_id for v in self.new_videos], self.new_videos))
+                
            self.claim_id_to_video.update(dict(zip([v.claim_id for v in self.new_videos], self.new_videos)))
        for iteration in range(int(iterations)):
@@ -311,6 +312,15 @@ class RecommendationEngine:
        c = Counter(channel_edge_list)
        self.weighted_edge_list = [(source, target, weight) for (source, target), weight in c.most_common()]
-        return self.weighted_edge_list, self.claim_id_to_video
+        usernames = set([channel.strip('@') for edge in self.weighted_edge_list for channel in edge[:2]])
        self.channels = {}
        for username in usernames:
            try:
                self.channels['@' + username] = OdyseeChannelScraper(channel_name = username, auth_token=self.auth_token).get_entity().__dict__
            except KeyError:
                pass
        return self.weighted_edge_list, self.channels, self.claim_id_to_video
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
--- a/setup.py
+++ b/setup.py
@@ -10,36 +10,39 @@ from setuptools import setup
 def readme( ):
  with open( os.path.abspath(
-    os.path.join(
+      os.path.join(
-      os.path.dirname( __file__ ),
+          os.path.dirname( __file__ ),
-      'README.md' ) ) ) as f:
+          'README.md' ) ) ) as f:
-    return f.read( )
+      return f.read( )
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
 setup(
-  name = 'polyphemus',
+    name = 'polyphemus',
-  version = '0.1',
+    version = '0.1',
-  description = 'Scraping Odysee video data',
+    description = 'Scraping Odysee video data',
-  long_description = readme( ),
+    long_description = readme(),
-  author = 'Bellingcat',
+    author = 'Bellingcat',
-  packages = [
+    packages = [
-    'polyphemus' ],
+        'polyphemus'],
-  install_requires = [
+    install_requires = [
-    'requests >= 2.27.0',
+        'requests >= 2.27.0',
-    'beautifulsoup4 >= 4.10.0',
+        'beautifulsoup4 >= 4.10.0',
-    'pandas >= 1.4.0'],
+        'pandas >= 1.4.0'],
-  extras_require = {
+    extras_require = {
-    'docs': [
+        'docs': [
-      'sphinx >= 3.3.1',
+            'sphinx >= 3.3.1',
-      'sphinx_rtd_theme >= 0.5',],
+            'sphinx_rtd_theme >= 0.5',],
-    'tests': [
+        'tests': [
-      'pytest >= 6.1.2',
+            'pytest >= 6.1.2',
-      'pytest-cov >= 2.10.1',
+            'pytest-cov >= 2.10.1',
-      'pytest-html >= 3.0.0',
+            'pytest-html >= 3.0.0',
-      'pytest-metadata >= 1.10.0']},
+            'pytest-metadata >= 1.10.0']},
-  include_package_data = True,
+    include_package_data = True,
-  zip_safe = False )
+    zip_safe = False,
    entry_points = {
        'console_scripts': [
            'polyphemus = polyphemus._cli:main']})
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#