From cb30169ece2534a53c8b91b906b519064b31cdab Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Tue, 9 Aug 2022 09:45:40 -0500 Subject: [PATCH] fixed problems in recommendationengine class, updated README --- README.md | 6 +--- examples/generate_network.py | 2 +- polyphemus/_cli.py | 8 ++++++ polyphemus/api.py | 2 +- polyphemus/base.py | 30 +++++++++++++------- setup.py | 55 +++++++++++++++++++----------------- 6 files changed, 60 insertions(+), 43 deletions(-) create mode 100644 polyphemus/_cli.py diff --git a/README.md b/README.md index 26dc8a1..5e699fa 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,4 @@ Scraper for alt-tech video sharing platform [Odysee](https://odysee.com/). ### TODO - Implement CLI -- Profile run-time, look into implementing async requests -- Add error handling/backoff waiting to requests -- Implement basic test suite -- Formaize network graph generation into class/module -- Work on reverse-engineering auth_token instead of having it hard-coded +- Profile run-time, look into implementing async requests \ No newline at end of file diff --git a/examples/generate_network.py b/examples/generate_network.py index 37dc800..df4c10a 100644 --- a/examples/generate_network.py +++ b/examples/generate_network.py @@ -24,7 +24,7 @@ if __name__ == '__main__': engine = polyphemus.base.RecommendationEngine(channel_list= [CHANNEL_NAME]) - weighted_edge_list, claim_id_to_video = engine.generate(iterations = 1) + weighted_edge_list, channels, claim_id_to_video = engine.generate(iterations = ITERATIONS) G = nx.DiGraph() G.add_weighted_edges_from(weighted_edge_list) diff --git a/polyphemus/_cli.py b/polyphemus/_cli.py new file mode 100644 index 0000000..93d73dd --- /dev/null +++ b/polyphemus/_cli.py @@ -0,0 +1,8 @@ +# -*- coding: UTF-8 -*- + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# + +from . import api +from . import base + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# \ No newline at end of file diff --git a/polyphemus/api.py b/polyphemus/api.py index e0e2464..50a3856 100644 --- a/polyphemus/api.py +++ b/polyphemus/api.py @@ -68,7 +68,7 @@ def make_request(request: Callable, kwargs: dict) -> requests.Response: retry_reasons = [] # TODO this looks a bit gross, try to refactor - while n_retries < 5: + while n_retries < 10: time.sleep(2 ** n_retries - 1) try: response = request(**kwargs) diff --git a/polyphemus/base.py b/polyphemus/base.py index a9588da..ac5648a 100644 --- a/polyphemus/base.py +++ b/polyphemus/base.py @@ -218,7 +218,7 @@ def process_raw_video_info(raw_video_info: dict, auth_token: str = None, additio channel_id = channel_id, channel_name = channel_name, claim_id = raw_video_info['claim_id'], - created = datetime.fromtimestamp(int(created)), + created = datetime.fromtimestamp(max(int(created), 0)), text = raw_video_info['value'].get('description'), languages = raw_video_info['value'].get('languages'), tags = raw_video_info['value'].get('tags',[]), @@ -269,14 +269,15 @@ class RecommendationEngine: #-------------------------------------------------------------------------# def generate(self, iterations = 1): - - for channel_name in self.channel_list: - print(channel_name) - scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token) - - self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False))) - - self.claim_id_to_video = dict(zip([v.claim_id for v in self.new_videos], self.new_videos)) + + if not self.new_videos: + for channel_name in self.channel_list: + print(channel_name) + scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token) + + self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False))) + + self.claim_id_to_video.update(dict(zip([v.claim_id for v in self.new_videos], self.new_videos))) for iteration in range(int(iterations)): @@ -311,6 +312,15 @@ class RecommendationEngine: c = Counter(channel_edge_list) self.weighted_edge_list = [(source, target, weight) for (source, target), weight in c.most_common()] - return self.weighted_edge_list, self.claim_id_to_video + usernames = set([channel.strip('@') for edge in self.weighted_edge_list for channel in edge[:2]]) + + self.channels = {} + for username in usernames: + try: + self.channels['@' + username] = OdyseeChannelScraper(channel_name = username, auth_token=self.auth_token).get_entity().__dict__ + except KeyError: + pass + + return self.weighted_edge_list, self.channels, self.claim_id_to_video #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# \ No newline at end of file diff --git a/setup.py b/setup.py index 582ae88..5d60f32 100644 --- a/setup.py +++ b/setup.py @@ -10,36 +10,39 @@ from setuptools import setup def readme( ): with open( os.path.abspath( - os.path.join( - os.path.dirname( __file__ ), - 'README.md' ) ) ) as f: + os.path.join( + os.path.dirname( __file__ ), + 'README.md' ) ) ) as f: - return f.read( ) + return f.read( ) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# setup( - name = 'polyphemus', - version = '0.1', - description = 'Scraping Odysee video data', - long_description = readme( ), - author = 'Bellingcat', - packages = [ - 'polyphemus' ], - install_requires = [ - 'requests >= 2.27.0', - 'beautifulsoup4 >= 4.10.0', - 'pandas >= 1.4.0'], - extras_require = { - 'docs': [ - 'sphinx >= 3.3.1', - 'sphinx_rtd_theme >= 0.5',], - 'tests': [ - 'pytest >= 6.1.2', - 'pytest-cov >= 2.10.1', - 'pytest-html >= 3.0.0', - 'pytest-metadata >= 1.10.0']}, - include_package_data = True, - zip_safe = False ) + name = 'polyphemus', + version = '0.1', + description = 'Scraping Odysee video data', + long_description = readme(), + author = 'Bellingcat', + packages = [ + 'polyphemus'], + install_requires = [ + 'requests >= 2.27.0', + 'beautifulsoup4 >= 4.10.0', + 'pandas >= 1.4.0'], + extras_require = { + 'docs': [ + 'sphinx >= 3.3.1', + 'sphinx_rtd_theme >= 0.5',], + 'tests': [ + 'pytest >= 6.1.2', + 'pytest-cov >= 2.10.1', + 'pytest-html >= 3.0.0', + 'pytest-metadata >= 1.10.0']}, + include_package_data = True, + zip_safe = False, + entry_points = { + 'console_scripts': [ + 'polyphemus = polyphemus._cli:main']}) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# \ No newline at end of file