mirror of
https://github.com/bellingcat/youtube-comment-scraper.git
synced 2026-06-08 03:28:32 +03:00
Updated scraper.py and requirements.txt
This commit is contained in:
@@ -1 +1,3 @@
|
||||
tqdm
|
||||
requests
|
||||
youtube-comment-downloader
|
||||
63
scraper.py
63
scraper.py
@@ -1,3 +1,5 @@
|
||||
import tqdm
|
||||
import requests
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from itertools import combinations
|
||||
@@ -5,6 +7,44 @@ from itertools import islice
|
||||
from youtube_comment_downloader import YoutubeCommentDownloader
|
||||
|
||||
|
||||
program_version_number = '2022.1.0.0'
|
||||
update_check_endpoint = "https://api.github.com/repos/rly0nheart/YouTube-Comment-Scraper/releases/latest"
|
||||
|
||||
def notice():
|
||||
notice_msg = f"""
|
||||
YouTube-Comment-Scraper {program_version_number} Copyright (C) 2022 Richard Mwewa
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
"""
|
||||
print(notice_msg)
|
||||
|
||||
|
||||
def check_and_get_updates():
|
||||
notice()
|
||||
"""
|
||||
Checks if the release tag matches the current tag in the program
|
||||
If there's a match, ignore
|
||||
"""
|
||||
response = requests.get(update_check_endpoint).json()
|
||||
if response['tag_name'] == program_version_number:
|
||||
pass
|
||||
else:
|
||||
update_prompt = input(f"[?] A new release is available ({response['tag_name']}). Would you like to install it? (y/n) ")
|
||||
if update_prompt.lower() == "y":
|
||||
files_to_update = ['scraper.py', 'test_find_multiple_users.py', 'README.md', 'requirements.txt']
|
||||
for file in tqdm(files_to_update, desc=f'Updating'):
|
||||
data = requests.get(f'https://raw.githubusercontent.com/rly0nheart/YouTube-Comment-Scraper/master/{file}')
|
||||
with open(file, "wb") as f:
|
||||
f.write(data.content)
|
||||
f.close()
|
||||
print(f"[+] Updated: Re-run program.");exit()
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
def get_comment_dict(video_url, max_comments=100):
|
||||
"""
|
||||
Creates a dictionary mapping comment-authors
|
||||
@@ -25,7 +65,7 @@ def find_multiple_authors(video_urls):
|
||||
video_dict = {}
|
||||
for url in video_urls:
|
||||
vid_uid = url.split('=')[1].split('&')[0]
|
||||
print('Getting comments for video: ', vid_uid)
|
||||
print('[~] Getting comments for video: ', vid_uid)
|
||||
video_dict[vid_uid] = get_comment_dict(url)
|
||||
|
||||
# Iterate over the possible combinations of videos
|
||||
@@ -38,13 +78,13 @@ def find_multiple_authors(video_urls):
|
||||
print(f'Videos: {vid_id1} & {vid_id2} have {len(common_authors)}')
|
||||
print(common_authors)
|
||||
for author in common_authors:
|
||||
print(f'Author: {author}')
|
||||
print(f'Video {vid_id1} comments: ')
|
||||
print(f'[+] Author: {author}')
|
||||
print(f'[+] Video {vid_id1} comments: ')
|
||||
# Iterate over each comment author left on video1
|
||||
# and print first 100 chars
|
||||
for i, comment in enumerate(dict1[author]):
|
||||
print(i+1, comment['text'][:100])
|
||||
print(f'Video {vid_id2} comments: ')
|
||||
print(f'[+] Video {vid_id2} comments: ')
|
||||
for i, comment in enumerate(dict2[author]):
|
||||
print(i+1, comment['text'][:100])
|
||||
|
||||
@@ -52,8 +92,17 @@ def find_multiple_authors(video_urls):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(prog='scraper.py')
|
||||
parser.add_argument('videos', nargs='+', help='List of YouTube video urls')
|
||||
parser = argparse.ArgumentParser('YouTube-Comment-Scraper — by Richard Mwewa', epilog='scrapes youtube comments and checks whether a user commented on the given videos')
|
||||
parser.add_argument('videos', nargs='+', help='list of youtube video urls')
|
||||
parser.add_argument('-v', '--version', version='2022.1.0.0', action='version')
|
||||
args = parser.parse_args()
|
||||
find_multiple_authors(args.videos)
|
||||
try:
|
||||
check_and_get_updates()
|
||||
find_multiple_authors(args.videos)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print('[!] Process interrupted with Ctrl+C.')
|
||||
|
||||
except Exception as e:
|
||||
print('[!] An error occurred:', e)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user