From fd8ee27f5240b3929d53a1338ca8653df3cf94f2 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 30 Sep 2022 23:34:18 +0200 Subject: [PATCH] Updated scraper.py and requirements.txt --- requirements.txt | 2 ++ scraper.py | 63 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 21c2162..b4b6924 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ +tqdm +requests youtube-comment-downloader \ No newline at end of file diff --git a/scraper.py b/scraper.py index b6c8602..4829759 100644 --- a/scraper.py +++ b/scraper.py @@ -1,3 +1,5 @@ +import tqdm +import requests import argparse from collections import defaultdict from itertools import combinations @@ -5,6 +7,44 @@ from itertools import islice from youtube_comment_downloader import YoutubeCommentDownloader +program_version_number = '2022.1.0.0' +update_check_endpoint = "https://api.github.com/repos/rly0nheart/YouTube-Comment-Scraper/releases/latest" + +def notice(): + notice_msg = f""" + YouTube-Comment-Scraper {program_version_number} Copyright (C) 2022 Richard Mwewa + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + """ + print(notice_msg) + + +def check_and_get_updates(): + notice() + """ + Checks if the release tag matches the current tag in the program + If there's a match, ignore + """ + response = requests.get(update_check_endpoint).json() + if response['tag_name'] == program_version_number: + pass + else: + update_prompt = input(f"[?] A new release is available ({response['tag_name']}). Would you like to install it? (y/n) ") + if update_prompt.lower() == "y": + files_to_update = ['scraper.py', 'test_find_multiple_users.py', 'README.md', 'requirements.txt'] + for file in tqdm(files_to_update, desc=f'Updating'): + data = requests.get(f'https://raw.githubusercontent.com/rly0nheart/YouTube-Comment-Scraper/master/{file}') + with open(file, "wb") as f: + f.write(data.content) + f.close() + print(f"[+] Updated: Re-run program.");exit() + else: + pass + + def get_comment_dict(video_url, max_comments=100): """ Creates a dictionary mapping comment-authors @@ -25,7 +65,7 @@ def find_multiple_authors(video_urls): video_dict = {} for url in video_urls: vid_uid = url.split('=')[1].split('&')[0] - print('Getting comments for video: ', vid_uid) + print('[~] Getting comments for video: ', vid_uid) video_dict[vid_uid] = get_comment_dict(url) # Iterate over the possible combinations of videos @@ -38,13 +78,13 @@ def find_multiple_authors(video_urls): print(f'Videos: {vid_id1} & {vid_id2} have {len(common_authors)}') print(common_authors) for author in common_authors: - print(f'Author: {author}') - print(f'Video {vid_id1} comments: ') + print(f'[+] Author: {author}') + print(f'[+] Video {vid_id1} comments: ') # Iterate over each comment author left on video1 # and print first 100 chars for i, comment in enumerate(dict1[author]): print(i+1, comment['text'][:100]) - print(f'Video {vid_id2} comments: ') + print(f'[+] Video {vid_id2} comments: ') for i, comment in enumerate(dict2[author]): print(i+1, comment['text'][:100]) @@ -52,8 +92,17 @@ def find_multiple_authors(video_urls): if __name__ == '__main__': - parser = argparse.ArgumentParser(prog='scraper.py') - parser.add_argument('videos', nargs='+', help='List of YouTube video urls') + parser = argparse.ArgumentParser('YouTube-Comment-Scraper — by Richard Mwewa', epilog='scrapes youtube comments and checks whether a user commented on the given videos') + parser.add_argument('videos', nargs='+', help='list of youtube video urls') + parser.add_argument('-v', '--version', version='2022.1.0.0', action='version') args = parser.parse_args() - find_multiple_authors(args.videos) + try: + check_and_get_updates() + find_multiple_authors(args.videos) + + except KeyboardInterrupt: + print('[!] Process interrupted with Ctrl+C.') + + except Exception as e: + print('[!] An error occurred:', e)