Update reddit-post-scraping-tool.py

This commit is contained in:
Richard Mwewa
2022-10-22 00:27:03 +02:00
committed by GitHub
parent 265b75c064
commit 7f5cbc79a3

View File

@@ -1,7 +1,6 @@
import logging
import argparse
import requests
#from pprint import pprint
from datetime import datetime
class postScraper:
@@ -9,28 +8,20 @@ class postScraper:
self.session = requests.session()
self.session.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15'}
def Start(self):
def start(self):
response = self.session.get(f'https://reddit.com/r/{args.subreddit}/{args.listing}.json?limit={args.limit}&t={args.timeframe}').json()
total_posts = 0
found_posts = 0
for post in response['data']['children']:
total_posts += 1
if args.keyword.lower() in post['data']['selftext']:
if args.keyword.lower() in post['data']['selftext'] or args.keyword.lower() in post['data']['title']:
found_posts += 1
print(f'\n\n[+] [post: {total_posts}] \'{args.keyword}\' found in body:')
print(f'\n\n[+] [post: {total_posts}] \'{args.keyword}\' found:')
self.getPosts(post)
elif args.keyword.lower() in post['data']['title']:
found_posts += 1
print(f'\n\n[+] [post: {total_posts}] \'{args.keyword}\' found in title:')
self.getPosts(post)
else:
pass
logging.info(f"Keyword ('{args.keyword}') was found in {found_posts}/{total_posts} {args.listing} posts from r/{args.subreddit}.")
logging.info(f"Keyword ('{args.keyword}') was found in {found_posts}/{len(response['data']['children'])} {args.listing} posts from r/{args.subreddit}.")
# Getting posts
def getPosts(self, post):
def get_posts(self, post):
post_data = {'Author': post['data']['author'],
'ID': post['data']['id'],
'Subreddit': post["data"]["subreddit_name_prefixed"],
@@ -54,8 +45,8 @@ class postScraper:
'Approved at': post['data']['approved_at_utc'],
'Approved by': post['data']['approved_by'],}
for key, value in post_data.items():
print(f" ├─ {key}: {value}")
for post_key, post_value in post_data.items():
print(f" ├─ {post_key}: {post_value}")
print(post['data']['selftext']+"\n")
@@ -72,8 +63,8 @@ logging.basicConfig(format=f'[%(asctime)s] %(message)s', datefmt=f'%H:%M:%S%p',
if __name__ == '__main__':
try:
postScraper(args).Start()
postScraper(args).start()
except KeyboardInterrupt:
logging.warning(f'Process interrupted with (Ctrl+C).')