Add archiving notice to README

Update README.md
2026-06-09 20:18:33 +03:00 · 2025-01-21 14:46:58 +00:00 · 2023-08-08 07:28:27 +02:00 · 2023-03-07 19:28:06 +02:00 · 2022-11-18 02:25:10 +02:00 · 2022-11-18 02:24:41 +02:00
10 changed files with 183 additions and 178 deletions
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -1,39 +0,0 @@
-# This workflow will install Python dependencies, run tests and lint with a single version of Python
-# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-
-name: Python application
-
-on:
-  push:
-    branches: [ "master" ]
-  pull_request:
-    branches: [ "master" ]
-
-permissions:
-  contents: read
-
-jobs:
-  build:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python 3.10
-      uses: actions/setup-python@v3
-      with:
-        python-version: "3.10"
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install flake8 pytest
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
-      run: |
-        pytest test_find_multiple_authors.py
--- a/14
+++ b/14
@@ -0,0 +1,14 @@
+# syntax=docker/dockerfile:1
+
+FROM python:latest
+
+WORKDIR /app
+
+COPY . .
+
+RUN pip install --upgrade pip
+RUN pip install build
+RUN python -m build
+RUN pip install dist/*.whl
+
+ENTRYPOINT ["youtube_comment_scraper"]
--- a/README.md
+++ b/README.md
@@ -1,47 +1,54 @@
 # YouTube-Comment-Scraper
-A script to scrape youtube comments and checks whether a user commented on the given videos
+
+> [!WARNING]  
+> The respository was archived in January 2025 after discovering that it was no longer functional.
+> 
+> We encourage you to use the [youtube-comment-downloader](https://github.com/egbertbouman/youtube-comment-downloader) project, which is more fully featured, instead.
+
+Scrapes youtube comments and checks whether a user commented on the given videos

 # Installation
-**1. Clone the project**
+## Install with pip
 ```
-git clone https://github.com/rly0nheart/YouTube-Comment-Scraper.git
+pip install git+https://github.com/bellingcat/youtube-comment-scraper
 ```

-**2. Move to YouTube-Comment-Scraper directory**
+## Build from source
+1. Clone the repository
 ```
-cd YouTube-Comment-Scraper
+git clone https://github.com/bellingcat/youtube-comment-scraper
 ```
-
-**3. Install dependencies**
-## Note
+2. Move to the cloned project's directory
 ```
-pip install -r requirements.txt
+cd youtube-comment-scraper
+```
+3. Install the `build` package (If not already installed)
+```
+pip install build
+```
+4. Build the project
+```
+python -m build
+```
+5. Install the built package
+```
+pip install dist/*.whl
 ```

 # Usage
+## PyPi Package
 ```
-python scraper.py <youtube_video_url_1> <youtube_video_url_2> <youtube_video_url_3>
-```
-
-> *Alternatively, you could grant execution permission to the downloader and run it as shown below*
-
-**1. Grant execution permission**
-```
-chmod +x scraper.py
-```
-
-**2. Run scraper.py**
-```
-./scraper.py <youtube_video_url_1> <youtube_video_url_2> <youtube_video_url_3>
+youtube_comment_scraper <video_urls>
 ```

 ## Note
 > Upon run, the scraper will first check for updates. If found, users will be prompted to download the updates
+>> The scraper uses [Egbert Bouman's](https://github.com/egbertbouman) [YouTube-Comment-Downloader](https://github.com/egbertbouman/youtube-comment-downloader) to get the comments

 # Donations
-If you would like to donate, you could Buy A Coffee for the developer using the button below
+If you would like `youtube-comment-scraper` and would like to show support, you could Buy A Coffee for the developer using the button below

-<a href="https://www.buymeacoffee.com/189381184" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a>
+<a href="https://www.buymeacoffee.com/_rly0nheart" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a>

-Your support will be much appreciated!
+Your support will be much appreciated!😊

--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +0,0 @@
-tqdm
-requests
-youtube-comment-downloader
--- a/scraper.py
+++ b/scraper.py
@@ -1,108 +0,0 @@
-import tqdm
-import requests
-import argparse
-from collections import defaultdict
-from itertools import combinations
-from itertools import islice
-from youtube_comment_downloader import YoutubeCommentDownloader
-
-
-program_version_number = '2022.1.0.0'
-update_check_endpoint = "https://api.github.com/repos/rly0nheart/YouTube-Comment-Scraper/releases/latest"
-
-def notice():
-    notice_msg = f"""
-    YouTube-Comment-Scraper {program_version_number} Copyright (C) 2022  Richard Mwewa
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-    """
-    print(notice_msg)
-
-
-def check_and_get_updates():
-    notice()
-    """
-    Checks if the release tag matches the current tag in the program
-    If there's a match, ignore
-    """
-    response = requests.get(update_check_endpoint).json()
-    if response['tag_name'] == program_version_number:
-        pass
-    else:
-        update_prompt = input(f"[?] A new release is available ({response['tag_name']}). Would you like to install it? (y/n) ")
-        if update_prompt.lower() == "y":
-            files_to_update = ['scraper.py', 'test_find_multiple_users.py', 'README.md', 'requirements.txt']
-            for file in tqdm(files_to_update, desc=f'Updating'):
-                data = requests.get(f'https://raw.githubusercontent.com/rly0nheart/YouTube-Comment-Scraper/master/{file}')
-                with open(file, "wb") as f:
-                    f.write(data.content)
-                    f.close()
-            print(f"[+] Updated: Re-run program.");exit()
-        else:
-            pass
-
-
-def get_comment_dict(video_url, max_comments=100):
-    """
-    Creates a dictionary mapping comment-authors 
-    to a list of their comments
-    """
-    downloader = YoutubeCommentDownloader()
-    comment_dict = defaultdict(list)
-    comments = downloader.get_comments_from_url(video_url)
-    for comment in islice(comments, max_comments):
-        comment_dict[comment['author']].append(comment)
-
-    return comment_dict
-
-def find_multiple_authors(video_urls):
-
-    # video_dict maps the video url id to the 
-    # comment dict for that video
-    video_dict = {}
-    for url in video_urls:
-        vid_uid = url.split('=')[1].split('&')[0]
-        print('[~] Getting comments for video: ', vid_uid)
-        video_dict[vid_uid] = get_comment_dict(url)
-
-    # Iterate over the possible combinations of videos
-    for item1, item2 in combinations(video_dict.items(), r=2):
-        # Unpack from tuple
-        vid_id1, dict1 = item1
-        vid_id2, dict2 = item2
-        # Use set intersection to find common authors
-        common_authors = dict1.keys() & dict2.keys()
-        print(f'Videos: {vid_id1} & {vid_id2} have {len(common_authors)}')
-        print(common_authors)
-        for author in common_authors:
-            print(f'[+] Author: {author}')
-            print(f'[+] Video {vid_id1} comments: ')
-            # Iterate over each comment author left on video1
-            # and print first 100 chars
-            for i, comment in enumerate(dict1[author]):
-                print(i+1, comment['text'][:100])
-            print(f'[+] Video {vid_id2} comments: ')
-            for i, comment in enumerate(dict2[author]):
-                print(i+1, comment['text'][:100])
-
-            print()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser('YouTube-Comment-Scraper — by Richard Mwewa', epilog='scrapes youtube comments and checks whether a user commented on the given videos')
-    parser.add_argument('videos', nargs='+', help='list of youtube video urls')
-    parser.add_argument('-v', '--version', version='2022.1.0.0', action='version')
-    args = parser.parse_args()
-    try:
-        check_and_get_updates()
-        find_multiple_authors(args.videos)
-
-    except KeyboardInterrupt:
-        print('[!] Process interrupted with Ctrl+C.')
-    
-    except Exception as e:
-        print('[!] An error occurred:', e)
-
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,31 @@
+import setuptools
+
+with open('README.md', 'r', encoding='utf-8') as file:
+    long_description = file.read()
+
+setuptools.setup(
+    name='youtube-comment-scraper',
+    version='2022.1.2.0',
+    author='Richard Mwewa',
+    author_email='rly0nheart@duck.com',
+    packages=['youtube_comment_scraper'],
+    description='YouTube Comment Scraper',
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    url='https://github.com/rly0nheart/youtube-comment-scraper',
+    license='GNU General Public License v3 (GPLv3)',
+    install_requires=['requests', 'youtube-comment-downloader'],
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Information Technology',
+        'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',  
+        'Operating System :: OS Independent',
+        'Natural Language :: English',
+        'Programming Language :: Python :: 3'
+        ],
+    entry_points={
+        'console_scripts': [
+            'youtube_comment_scraper=youtube_comment_scraper.main:main',
+        ]
+    },
+)
--- a/youtube_comment_scraper/init.py
+++ b/youtube_comment_scraper/init.py
@@ -0,0 +1 @@
+
--- a/youtube_comment_scraper/main.py
+++ b/youtube_comment_scraper/main.py
@@ -0,0 +1,20 @@
+import argparse
+from youtube_comment_scraper.scraper import YouTubeCommentScraper
+
+
+def create_parser():
+    parser = argparse.ArgumentParser('YouTube-Comment-Scraper — by Richard Mwewa | https://about.me/rly0nheart', epilog='scrapes youtube comments and checks whether a user commented on the given videos')
+    parser.add_argument('videos', nargs='+', help='list of youtube video urls')
+    return parser
+    
+
+def main():
+    _parser = create_parser()
+    args = _parser.parse_args()
+    try:
+        YouTubeCommentScraper().find_multiple_authors(args.videos)
+    except KeyboardInterrupt:
+        print("[x] Process interrupted with Ctrl+C.")
+
+    except Exception as e:
+        print("[!] An error occurred:", e)
--- a/youtube_comment_scraper/scraper.py
+++ b/youtube_comment_scraper/scraper.py
@@ -0,0 +1,82 @@
+import requests
+from itertools import islice
+from itertools import combinations
+from collections import defaultdict
+from youtube_comment_downloader import YoutubeCommentDownloader
+
+
+class YouTubeCommentScraper:
+    def __init__(self):
+        self.program_version_number = '2022.1.2.0'
+        self.update_check_endpoint = "https://api.github.com/repos/rly0nheart/youtube-comment-scraper/releases/latest"
+        
+        
+    def notice(self):
+        notice_msg = f"""
+        YouTube-Comment-Scraper {self.program_version_number} Copyright (C) 2022  Richard Mwewa
+        
+        This program is free software: you can redistribute it and/or modify
+        it under the terms of the GNU General Public License as published by
+        the Free Software Foundation, either version 3 of the License, or
+        (at your option) any later version.
+        """
+        print(notice_msg)
+
+        
+    def check_updates(self):
+        self.notice()
+        """
+        Checks if the release tag matches the current tag in the program
+        If there's a match, ignore
+        """
+        response = requests.get(self.update_check_endpoint).json()
+        if response['tag_name'] == self.program_version_number:
+            pass
+        else:
+            print(f"[!] A new release is available ({response['tag_name']}). Run 'pip install --upgrade youtube-comment-scraper' to get the updates.\n")
+
+            
+    def get_comment_dictionary(self, video_url, max_comments=100):
+        """
+        Creates a dictionary mapping comment-authors
+        to a list of their comments
+        """
+        downloader = YoutubeCommentDownloader()
+        comment_dictionary = defaultdict(list)
+        comments = downloader.get_comments_from_url(video_url)
+        for comment in islice(comments, max_comments):
+            comment_dictionary[comment['author']].append(comment)
+        
+        return comment_dictionary
+
+        
+    def find_multiple_authors(self, video_urls):
+        self.check_updates()
+        # video_dictionary maps the video url id to the 
+        # comment dict for that video
+        video_dictionary = {}
+        for url in video_urls:
+            video_uid = url.split('=')[1].split('&')[0]
+            print('[*] Getting comments for video: ', video_uid)
+            video_dictionary[video_uid] = self.get_comment_dictionary(url)
+            
+        # Iterate over the possible combinations of videos
+        for item_1, item_2 in combinations(video_dictionary.items(), r=2):
+            # Unpack from tuple
+            video_id_1, dictionary_1 = item_1
+            video_id_2, dictionary_2 = item_2
+            # Use set intersection to find common authors
+            common_authors = dictionary_1.keys() & dictionary_2.keys()
+            print(f'Videos: {video_id_1} & {video_id_2} have {len(common_authors)}')
+            print(common_authors)
+            for author in common_authors:
+                print(f'[+] Author: {author}')
+                print(f'[+] Video {video_id_1} comments: ')
+                # Iterate over each comment author left on video1
+                # and print first 100 chars
+                for count, comment in enumerate(dictionary_1[author], start=1):
+                    print(count, comment['text'][:100])
+                print(f'[+] Video {video_id_2} comments: ')
+                for count, comment in enumerate(dictionary_2[author], start=1):
+                    print(count, comment['text'][:100])
+                print()
--- a/youtube_comment_scraper/test_find_multiple_authors.py
+++ b/youtube_comment_scraper/test_find_multiple_authors.py
@@ -1,10 +1,10 @@
-from scraper import find_multiple_authors
+from youtube_comment_scraper.scraper import YouTubeCommentScraper
     
-def test_find_multiple_users():
+def test_find_multiple_authors():
    # List contains, videos from Google's YouTube channel
    vids = [
        'https://www.youtube.com/watch?v=8qGV_O_y4DA',
        'https://www.youtube.com/watch?v=WSkETCRe7Ic',
        'https://www.youtube.com/watch?v=cdgQpa1pUUE'
    ]
-    find_multiple_authors(vids)
+    YouTubeCommentScraper().find_multiple_users(vids)
Author	SHA1	Message	Date
Galen Reich	b9150ffbb1	Add archiving notice to README	2025-01-21 14:46:58 +00:00
Richard Mwewa	b9211c936a	Update README.md	2023-08-08 07:28:27 +02:00
Richard Mwewa	21fec1ab1f	Update README.md	2023-03-07 19:28:06 +02:00
Richard Mwewa	4010fe3c09	Update scraper.py	2022-11-18 02:25:10 +02:00
Richard Mwewa	8ec8e2d64c	Update setup.py	2022-11-18 02:24:41 +02:00
Richard Mwewa	b32389aa63	Update	2022-11-18 02:23:33 +02:00
Richard Mwewa	fcade4b253	Create Dockerfile	2022-11-18 01:44:54 +02:00
Richard Mwewa	430845d008	Update and rename test_find_multiple_users.py to test_find_multiple_authors.py	2022-11-08 03:11:26 +02:00
Richard Mwewa	4cc20d2a4b	Update scraper.py	2022-11-08 02:50:59 +02:00
Richard Mwewa	f86e31bcf2	Update main.py	2022-11-08 02:50:19 +02:00
Richard Mwewa	455fe8a318	Update main.py	2022-11-07 23:14:35 +02:00
Richard Mwewa	bed4b37b5e	Delete .github directory	2022-11-07 22:44:25 +02:00
Richard Mwewa	3e2a001890	Create __init__.py	2022-11-07 22:41:49 +02:00
Richard Mwewa	d2b887b576	Create main.py	2022-11-07 22:41:08 +02:00
Richard Mwewa	21944ef567	Update python-app.yml	2022-11-07 22:40:26 +02:00
Richard Mwewa	afed4ca88c	Update scraper.py	2022-11-07 21:49:21 +02:00
Richard Mwewa	15a7b3bccb	Create scraper.py	2022-11-07 21:48:33 +02:00
Richard Mwewa	109476ae9c	Create test_find_multiple_users.py	2022-11-07 21:46:38 +02:00
Richard Mwewa	a2f20d150e	Delete requirements.txt	2022-11-07 21:44:58 +02:00
Richard Mwewa	ac1aa09cb6	Delete test_find_multiple_authors.py	2022-11-07 21:44:41 +02:00
Richard Mwewa	5ce2151723	Delete scraper.py	2022-11-07 21:44:32 +02:00
Richard Mwewa	d9843fabac	Create setup.py	2022-11-07 21:44:16 +02:00
Richard Mwewa	a9ba17dc0c	Update README.md	2022-11-07 21:42:54 +02:00