23 Commits

Author SHA1 Message Date
Galen Reich
b9150ffbb1 Add archiving notice to README 2025-01-21 14:46:58 +00:00
Richard Mwewa
b9211c936a Update README.md 2023-08-08 07:28:27 +02:00
Richard Mwewa
21fec1ab1f Update README.md 2023-03-07 19:28:06 +02:00
Richard Mwewa
4010fe3c09 Update scraper.py 2022-11-18 02:25:10 +02:00
Richard Mwewa
8ec8e2d64c Update setup.py 2022-11-18 02:24:41 +02:00
Richard Mwewa
b32389aa63 Update 2022-11-18 02:23:33 +02:00
Richard Mwewa
fcade4b253 Create Dockerfile 2022-11-18 01:44:54 +02:00
Richard Mwewa
430845d008 Update and rename test_find_multiple_users.py to test_find_multiple_authors.py 2022-11-08 03:11:26 +02:00
Richard Mwewa
4cc20d2a4b Update scraper.py 2022-11-08 02:50:59 +02:00
Richard Mwewa
f86e31bcf2 Update main.py 2022-11-08 02:50:19 +02:00
Richard Mwewa
455fe8a318 Update main.py 2022-11-07 23:14:35 +02:00
Richard Mwewa
bed4b37b5e Delete .github directory 2022-11-07 22:44:25 +02:00
Richard Mwewa
3e2a001890 Create __init__.py 2022-11-07 22:41:49 +02:00
Richard Mwewa
d2b887b576 Create main.py 2022-11-07 22:41:08 +02:00
Richard Mwewa
21944ef567 Update python-app.yml 2022-11-07 22:40:26 +02:00
Richard Mwewa
afed4ca88c Update scraper.py 2022-11-07 21:49:21 +02:00
Richard Mwewa
15a7b3bccb Create scraper.py 2022-11-07 21:48:33 +02:00
Richard Mwewa
109476ae9c Create test_find_multiple_users.py 2022-11-07 21:46:38 +02:00
Richard Mwewa
a2f20d150e Delete requirements.txt 2022-11-07 21:44:58 +02:00
Richard Mwewa
ac1aa09cb6 Delete test_find_multiple_authors.py 2022-11-07 21:44:41 +02:00
Richard Mwewa
5ce2151723 Delete scraper.py 2022-11-07 21:44:32 +02:00
Richard Mwewa
d9843fabac Create setup.py 2022-11-07 21:44:16 +02:00
Richard Mwewa
a9ba17dc0c Update README.md 2022-11-07 21:42:54 +02:00
10 changed files with 183 additions and 178 deletions

View File

@@ -1,39 +0,0 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Python application
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest test_find_multiple_authors.py

14
Dockerfile Normal file
View File

@@ -0,0 +1,14 @@
# syntax=docker/dockerfile:1
FROM python:latest
WORKDIR /app
COPY . .
RUN pip install --upgrade pip
RUN pip install build
RUN python -m build
RUN pip install dist/*.whl
ENTRYPOINT ["youtube_comment_scraper"]

View File

@@ -1,47 +1,54 @@
# YouTube-Comment-Scraper
A script to scrape youtube comments and checks whether a user commented on the given videos
> [!WARNING]
> The respository was archived in January 2025 after discovering that it was no longer functional.
>
> We encourage you to use the [youtube-comment-downloader](https://github.com/egbertbouman/youtube-comment-downloader) project, which is more fully featured, instead.
Scrapes youtube comments and checks whether a user commented on the given videos
# Installation
**1. Clone the project**
## Install with pip
```
git clone https://github.com/rly0nheart/YouTube-Comment-Scraper.git
pip install git+https://github.com/bellingcat/youtube-comment-scraper
```
**2. Move to YouTube-Comment-Scraper directory**
## Build from source
1. Clone the repository
```
cd YouTube-Comment-Scraper
git clone https://github.com/bellingcat/youtube-comment-scraper
```
**3. Install dependencies**
## Note
2. Move to the cloned project's directory
```
pip install -r requirements.txt
cd youtube-comment-scraper
```
3. Install the `build` package (If not already installed)
```
pip install build
```
4. Build the project
```
python -m build
```
5. Install the built package
```
pip install dist/*.whl
```
# Usage
## PyPi Package
```
python scraper.py <youtube_video_url_1> <youtube_video_url_2> <youtube_video_url_3>
```
> *Alternatively, you could grant execution permission to the downloader and run it as shown below*
**1. Grant execution permission**
```
chmod +x scraper.py
```
**2. Run scraper.py**
```
./scraper.py <youtube_video_url_1> <youtube_video_url_2> <youtube_video_url_3>
youtube_comment_scraper <video_urls>
```
## Note
> Upon run, the scraper will first check for updates. If found, users will be prompted to download the updates
>> The scraper uses [Egbert Bouman's](https://github.com/egbertbouman) [YouTube-Comment-Downloader](https://github.com/egbertbouman/youtube-comment-downloader) to get the comments
# Donations
If you would like to donate, you could Buy A Coffee for the developer using the button below
If you would like `youtube-comment-scraper` and would like to show support, you could Buy A Coffee for the developer using the button below
<a href="https://www.buymeacoffee.com/189381184" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a>
<a href="https://www.buymeacoffee.com/_rly0nheart" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a>
Your support will be much appreciated!
Your support will be much appreciated!😊

View File

@@ -1,3 +0,0 @@
tqdm
requests
youtube-comment-downloader

View File

@@ -1,108 +0,0 @@
import tqdm
import requests
import argparse
from collections import defaultdict
from itertools import combinations
from itertools import islice
from youtube_comment_downloader import YoutubeCommentDownloader
program_version_number = '2022.1.0.0'
update_check_endpoint = "https://api.github.com/repos/rly0nheart/YouTube-Comment-Scraper/releases/latest"
def notice():
notice_msg = f"""
YouTube-Comment-Scraper {program_version_number} Copyright (C) 2022 Richard Mwewa
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
"""
print(notice_msg)
def check_and_get_updates():
notice()
"""
Checks if the release tag matches the current tag in the program
If there's a match, ignore
"""
response = requests.get(update_check_endpoint).json()
if response['tag_name'] == program_version_number:
pass
else:
update_prompt = input(f"[?] A new release is available ({response['tag_name']}). Would you like to install it? (y/n) ")
if update_prompt.lower() == "y":
files_to_update = ['scraper.py', 'test_find_multiple_users.py', 'README.md', 'requirements.txt']
for file in tqdm(files_to_update, desc=f'Updating'):
data = requests.get(f'https://raw.githubusercontent.com/rly0nheart/YouTube-Comment-Scraper/master/{file}')
with open(file, "wb") as f:
f.write(data.content)
f.close()
print(f"[+] Updated: Re-run program.");exit()
else:
pass
def get_comment_dict(video_url, max_comments=100):
"""
Creates a dictionary mapping comment-authors
to a list of their comments
"""
downloader = YoutubeCommentDownloader()
comment_dict = defaultdict(list)
comments = downloader.get_comments_from_url(video_url)
for comment in islice(comments, max_comments):
comment_dict[comment['author']].append(comment)
return comment_dict
def find_multiple_authors(video_urls):
# video_dict maps the video url id to the
# comment dict for that video
video_dict = {}
for url in video_urls:
vid_uid = url.split('=')[1].split('&')[0]
print('[~] Getting comments for video: ', vid_uid)
video_dict[vid_uid] = get_comment_dict(url)
# Iterate over the possible combinations of videos
for item1, item2 in combinations(video_dict.items(), r=2):
# Unpack from tuple
vid_id1, dict1 = item1
vid_id2, dict2 = item2
# Use set intersection to find common authors
common_authors = dict1.keys() & dict2.keys()
print(f'Videos: {vid_id1} & {vid_id2} have {len(common_authors)}')
print(common_authors)
for author in common_authors:
print(f'[+] Author: {author}')
print(f'[+] Video {vid_id1} comments: ')
# Iterate over each comment author left on video1
# and print first 100 chars
for i, comment in enumerate(dict1[author]):
print(i+1, comment['text'][:100])
print(f'[+] Video {vid_id2} comments: ')
for i, comment in enumerate(dict2[author]):
print(i+1, comment['text'][:100])
print()
if __name__ == '__main__':
parser = argparse.ArgumentParser('YouTube-Comment-Scraper — by Richard Mwewa', epilog='scrapes youtube comments and checks whether a user commented on the given videos')
parser.add_argument('videos', nargs='+', help='list of youtube video urls')
parser.add_argument('-v', '--version', version='2022.1.0.0', action='version')
args = parser.parse_args()
try:
check_and_get_updates()
find_multiple_authors(args.videos)
except KeyboardInterrupt:
print('[!] Process interrupted with Ctrl+C.')
except Exception as e:
print('[!] An error occurred:', e)

31
setup.py Normal file
View File

@@ -0,0 +1,31 @@
import setuptools
with open('README.md', 'r', encoding='utf-8') as file:
long_description = file.read()
setuptools.setup(
name='youtube-comment-scraper',
version='2022.1.2.0',
author='Richard Mwewa',
author_email='rly0nheart@duck.com',
packages=['youtube_comment_scraper'],
description='YouTube Comment Scraper',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/rly0nheart/youtube-comment-scraper',
license='GNU General Public License v3 (GPLv3)',
install_requires=['requests', 'youtube-comment-downloader'],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Information Technology',
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
'Operating System :: OS Independent',
'Natural Language :: English',
'Programming Language :: Python :: 3'
],
entry_points={
'console_scripts': [
'youtube_comment_scraper=youtube_comment_scraper.main:main',
]
},
)

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,20 @@
import argparse
from youtube_comment_scraper.scraper import YouTubeCommentScraper
def create_parser():
parser = argparse.ArgumentParser('YouTube-Comment-Scraper — by Richard Mwewa | https://about.me/rly0nheart', epilog='scrapes youtube comments and checks whether a user commented on the given videos')
parser.add_argument('videos', nargs='+', help='list of youtube video urls')
return parser
def main():
_parser = create_parser()
args = _parser.parse_args()
try:
YouTubeCommentScraper().find_multiple_authors(args.videos)
except KeyboardInterrupt:
print("[x] Process interrupted with Ctrl+C.")
except Exception as e:
print("[!] An error occurred:", e)

View File

@@ -0,0 +1,82 @@
import requests
from itertools import islice
from itertools import combinations
from collections import defaultdict
from youtube_comment_downloader import YoutubeCommentDownloader
class YouTubeCommentScraper:
def __init__(self):
self.program_version_number = '2022.1.2.0'
self.update_check_endpoint = "https://api.github.com/repos/rly0nheart/youtube-comment-scraper/releases/latest"
def notice(self):
notice_msg = f"""
YouTube-Comment-Scraper {self.program_version_number} Copyright (C) 2022 Richard Mwewa
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
"""
print(notice_msg)
def check_updates(self):
self.notice()
"""
Checks if the release tag matches the current tag in the program
If there's a match, ignore
"""
response = requests.get(self.update_check_endpoint).json()
if response['tag_name'] == self.program_version_number:
pass
else:
print(f"[!] A new release is available ({response['tag_name']}). Run 'pip install --upgrade youtube-comment-scraper' to get the updates.\n")
def get_comment_dictionary(self, video_url, max_comments=100):
"""
Creates a dictionary mapping comment-authors
to a list of their comments
"""
downloader = YoutubeCommentDownloader()
comment_dictionary = defaultdict(list)
comments = downloader.get_comments_from_url(video_url)
for comment in islice(comments, max_comments):
comment_dictionary[comment['author']].append(comment)
return comment_dictionary
def find_multiple_authors(self, video_urls):
self.check_updates()
# video_dictionary maps the video url id to the
# comment dict for that video
video_dictionary = {}
for url in video_urls:
video_uid = url.split('=')[1].split('&')[0]
print('[*] Getting comments for video: ', video_uid)
video_dictionary[video_uid] = self.get_comment_dictionary(url)
# Iterate over the possible combinations of videos
for item_1, item_2 in combinations(video_dictionary.items(), r=2):
# Unpack from tuple
video_id_1, dictionary_1 = item_1
video_id_2, dictionary_2 = item_2
# Use set intersection to find common authors
common_authors = dictionary_1.keys() & dictionary_2.keys()
print(f'Videos: {video_id_1} & {video_id_2} have {len(common_authors)}')
print(common_authors)
for author in common_authors:
print(f'[+] Author: {author}')
print(f'[+] Video {video_id_1} comments: ')
# Iterate over each comment author left on video1
# and print first 100 chars
for count, comment in enumerate(dictionary_1[author], start=1):
print(count, comment['text'][:100])
print(f'[+] Video {video_id_2} comments: ')
for count, comment in enumerate(dictionary_2[author], start=1):
print(count, comment['text'][:100])
print()

View File

@@ -1,10 +1,10 @@
from scraper import find_multiple_authors
from youtube_comment_scraper.scraper import YouTubeCommentScraper
def test_find_multiple_users():
def test_find_multiple_authors():
# List contains, videos from Google's YouTube channel
vids = [
'https://www.youtube.com/watch?v=8qGV_O_y4DA',
'https://www.youtube.com/watch?v=WSkETCRe7Ic',
'https://www.youtube.com/watch?v=cdgQpa1pUUE'
]
find_multiple_authors(vids)
YouTubeCommentScraper().find_multiple_users(vids)