From 5ce2151723dcf7defa413c7737347624deac2cab Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Mon, 7 Nov 2022 21:44:32 +0200 Subject: [PATCH] Delete scraper.py --- scraper.py | 108 ----------------------------------------------------- 1 file changed, 108 deletions(-) delete mode 100644 scraper.py diff --git a/scraper.py b/scraper.py deleted file mode 100644 index 4829759..0000000 --- a/scraper.py +++ /dev/null @@ -1,108 +0,0 @@ -import tqdm -import requests -import argparse -from collections import defaultdict -from itertools import combinations -from itertools import islice -from youtube_comment_downloader import YoutubeCommentDownloader - - -program_version_number = '2022.1.0.0' -update_check_endpoint = "https://api.github.com/repos/rly0nheart/YouTube-Comment-Scraper/releases/latest" - -def notice(): - notice_msg = f""" - YouTube-Comment-Scraper {program_version_number} Copyright (C) 2022 Richard Mwewa - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - """ - print(notice_msg) - - -def check_and_get_updates(): - notice() - """ - Checks if the release tag matches the current tag in the program - If there's a match, ignore - """ - response = requests.get(update_check_endpoint).json() - if response['tag_name'] == program_version_number: - pass - else: - update_prompt = input(f"[?] A new release is available ({response['tag_name']}). Would you like to install it? (y/n) ") - if update_prompt.lower() == "y": - files_to_update = ['scraper.py', 'test_find_multiple_users.py', 'README.md', 'requirements.txt'] - for file in tqdm(files_to_update, desc=f'Updating'): - data = requests.get(f'https://raw.githubusercontent.com/rly0nheart/YouTube-Comment-Scraper/master/{file}') - with open(file, "wb") as f: - f.write(data.content) - f.close() - print(f"[+] Updated: Re-run program.");exit() - else: - pass - - -def get_comment_dict(video_url, max_comments=100): - """ - Creates a dictionary mapping comment-authors - to a list of their comments - """ - downloader = YoutubeCommentDownloader() - comment_dict = defaultdict(list) - comments = downloader.get_comments_from_url(video_url) - for comment in islice(comments, max_comments): - comment_dict[comment['author']].append(comment) - - return comment_dict - -def find_multiple_authors(video_urls): - - # video_dict maps the video url id to the - # comment dict for that video - video_dict = {} - for url in video_urls: - vid_uid = url.split('=')[1].split('&')[0] - print('[~] Getting comments for video: ', vid_uid) - video_dict[vid_uid] = get_comment_dict(url) - - # Iterate over the possible combinations of videos - for item1, item2 in combinations(video_dict.items(), r=2): - # Unpack from tuple - vid_id1, dict1 = item1 - vid_id2, dict2 = item2 - # Use set intersection to find common authors - common_authors = dict1.keys() & dict2.keys() - print(f'Videos: {vid_id1} & {vid_id2} have {len(common_authors)}') - print(common_authors) - for author in common_authors: - print(f'[+] Author: {author}') - print(f'[+] Video {vid_id1} comments: ') - # Iterate over each comment author left on video1 - # and print first 100 chars - for i, comment in enumerate(dict1[author]): - print(i+1, comment['text'][:100]) - print(f'[+] Video {vid_id2} comments: ') - for i, comment in enumerate(dict2[author]): - print(i+1, comment['text'][:100]) - - print() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser('YouTube-Comment-Scraper — by Richard Mwewa', epilog='scrapes youtube comments and checks whether a user commented on the given videos') - parser.add_argument('videos', nargs='+', help='list of youtube video urls') - parser.add_argument('-v', '--version', version='2022.1.0.0', action='version') - args = parser.parse_args() - try: - check_and_get_updates() - find_multiple_authors(args.videos) - - except KeyboardInterrupt: - print('[!] Process interrupted with Ctrl+C.') - - except Exception as e: - print('[!] An error occurred:', e) -