mirror of
https://github.com/bellingcat/polyphemus.git
synced 2026-06-07 19:08:33 +03:00
fixed problems in recommendationengine class, updated README
This commit is contained in:
@@ -4,8 +4,4 @@ Scraper for alt-tech video sharing platform [Odysee](https://odysee.com/).
|
||||
|
||||
### TODO
|
||||
- Implement CLI
|
||||
- Profile run-time, look into implementing async requests
|
||||
- Add error handling/backoff waiting to requests
|
||||
- Implement basic test suite
|
||||
- Formaize network graph generation into class/module
|
||||
- Work on reverse-engineering auth_token instead of having it hard-coded
|
||||
- Profile run-time, look into implementing async requests
|
||||
@@ -24,7 +24,7 @@ if __name__ == '__main__':
|
||||
|
||||
engine = polyphemus.base.RecommendationEngine(channel_list= [CHANNEL_NAME])
|
||||
|
||||
weighted_edge_list, claim_id_to_video = engine.generate(iterations = 1)
|
||||
weighted_edge_list, channels, claim_id_to_video = engine.generate(iterations = ITERATIONS)
|
||||
|
||||
G = nx.DiGraph()
|
||||
G.add_weighted_edges_from(weighted_edge_list)
|
||||
|
||||
8
polyphemus/_cli.py
Normal file
8
polyphemus/_cli.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
from . import api
|
||||
from . import base
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
@@ -68,7 +68,7 @@ def make_request(request: Callable, kwargs: dict) -> requests.Response:
|
||||
retry_reasons = []
|
||||
|
||||
# TODO this looks a bit gross, try to refactor
|
||||
while n_retries < 5:
|
||||
while n_retries < 10:
|
||||
time.sleep(2 ** n_retries - 1)
|
||||
try:
|
||||
response = request(**kwargs)
|
||||
|
||||
@@ -218,7 +218,7 @@ def process_raw_video_info(raw_video_info: dict, auth_token: str = None, additio
|
||||
channel_id = channel_id,
|
||||
channel_name = channel_name,
|
||||
claim_id = raw_video_info['claim_id'],
|
||||
created = datetime.fromtimestamp(int(created)),
|
||||
created = datetime.fromtimestamp(max(int(created), 0)),
|
||||
text = raw_video_info['value'].get('description'),
|
||||
languages = raw_video_info['value'].get('languages'),
|
||||
tags = raw_video_info['value'].get('tags',[]),
|
||||
@@ -269,14 +269,15 @@ class RecommendationEngine:
|
||||
#-------------------------------------------------------------------------#
|
||||
|
||||
def generate(self, iterations = 1):
|
||||
|
||||
for channel_name in self.channel_list:
|
||||
print(channel_name)
|
||||
scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token)
|
||||
|
||||
self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False)))
|
||||
|
||||
self.claim_id_to_video = dict(zip([v.claim_id for v in self.new_videos], self.new_videos))
|
||||
|
||||
if not self.new_videos:
|
||||
for channel_name in self.channel_list:
|
||||
print(channel_name)
|
||||
scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token)
|
||||
|
||||
self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False)))
|
||||
|
||||
self.claim_id_to_video.update(dict(zip([v.claim_id for v in self.new_videos], self.new_videos)))
|
||||
|
||||
for iteration in range(int(iterations)):
|
||||
|
||||
@@ -311,6 +312,15 @@ class RecommendationEngine:
|
||||
c = Counter(channel_edge_list)
|
||||
self.weighted_edge_list = [(source, target, weight) for (source, target), weight in c.most_common()]
|
||||
|
||||
return self.weighted_edge_list, self.claim_id_to_video
|
||||
usernames = set([channel.strip('@') for edge in self.weighted_edge_list for channel in edge[:2]])
|
||||
|
||||
self.channels = {}
|
||||
for username in usernames:
|
||||
try:
|
||||
self.channels['@' + username] = OdyseeChannelScraper(channel_name = username, auth_token=self.auth_token).get_entity().__dict__
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return self.weighted_edge_list, self.channels, self.claim_id_to_video
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
55
setup.py
55
setup.py
@@ -10,36 +10,39 @@ from setuptools import setup
|
||||
def readme( ):
|
||||
|
||||
with open( os.path.abspath(
|
||||
os.path.join(
|
||||
os.path.dirname( __file__ ),
|
||||
'README.md' ) ) ) as f:
|
||||
os.path.join(
|
||||
os.path.dirname( __file__ ),
|
||||
'README.md' ) ) ) as f:
|
||||
|
||||
return f.read( )
|
||||
return f.read( )
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
setup(
|
||||
name = 'polyphemus',
|
||||
version = '0.1',
|
||||
description = 'Scraping Odysee video data',
|
||||
long_description = readme( ),
|
||||
author = 'Bellingcat',
|
||||
packages = [
|
||||
'polyphemus' ],
|
||||
install_requires = [
|
||||
'requests >= 2.27.0',
|
||||
'beautifulsoup4 >= 4.10.0',
|
||||
'pandas >= 1.4.0'],
|
||||
extras_require = {
|
||||
'docs': [
|
||||
'sphinx >= 3.3.1',
|
||||
'sphinx_rtd_theme >= 0.5',],
|
||||
'tests': [
|
||||
'pytest >= 6.1.2',
|
||||
'pytest-cov >= 2.10.1',
|
||||
'pytest-html >= 3.0.0',
|
||||
'pytest-metadata >= 1.10.0']},
|
||||
include_package_data = True,
|
||||
zip_safe = False )
|
||||
name = 'polyphemus',
|
||||
version = '0.1',
|
||||
description = 'Scraping Odysee video data',
|
||||
long_description = readme(),
|
||||
author = 'Bellingcat',
|
||||
packages = [
|
||||
'polyphemus'],
|
||||
install_requires = [
|
||||
'requests >= 2.27.0',
|
||||
'beautifulsoup4 >= 4.10.0',
|
||||
'pandas >= 1.4.0'],
|
||||
extras_require = {
|
||||
'docs': [
|
||||
'sphinx >= 3.3.1',
|
||||
'sphinx_rtd_theme >= 0.5',],
|
||||
'tests': [
|
||||
'pytest >= 6.1.2',
|
||||
'pytest-cov >= 2.10.1',
|
||||
'pytest-html >= 3.0.0',
|
||||
'pytest-metadata >= 1.10.0']},
|
||||
include_package_data = True,
|
||||
zip_safe = False,
|
||||
entry_points = {
|
||||
'console_scripts': [
|
||||
'polyphemus = polyphemus._cli:main']})
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
Reference in New Issue
Block a user