Update reddit-post-scraping-tool.py

This commit is contained in:
Richard Mwewa
2022-10-22 00:50:40 +02:00
parent 7f5cbc79a3
commit a0b4ef6382

View File

@@ -1,9 +1,12 @@
import logging import logging
import argparse import argparse
import requests import requests
from rich.tree import Tree
from datetime import datetime from datetime import datetime
from rich import print as xprint
class postScraper:
class RedditPostScraper:
def __init__(self, args): def __init__(self, args):
self.session = requests.session() self.session = requests.session()
self.session.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15'} self.session.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15'}
@@ -14,8 +17,7 @@ class postScraper:
for post in response['data']['children']: for post in response['data']['children']:
if args.keyword.lower() in post['data']['selftext'] or args.keyword.lower() in post['data']['title']: if args.keyword.lower() in post['data']['selftext'] or args.keyword.lower() in post['data']['title']:
found_posts += 1 found_posts += 1
print(f'\n\n[+] [post: {total_posts}] \'{args.keyword}\' found:') self.get_posts(post)
self.getPosts(post)
logging.info(f"Keyword ('{args.keyword}') was found in {found_posts}/{len(response['data']['children'])} {args.listing} posts from r/{args.subreddit}.") logging.info(f"Keyword ('{args.keyword}') was found in {found_posts}/{len(response['data']['children'])} {args.listing} posts from r/{args.subreddit}.")
@@ -28,7 +30,6 @@ class postScraper:
'Visibility': post['data']['subreddit_type'], 'Visibility': post['data']['subreddit_type'],
#'Author': post["data"]["author_fullname"], #'Author': post["data"]["author_fullname"],
'Thumbnail': post["data"]["thumbnail"], 'Thumbnail': post["data"]["thumbnail"],
'Title': post["data"]["title"],
#'Flair': post["data"]["link_flair_text"], #'Flair': post["data"]["link_flair_text"],
'NSFW': post['data']['over_18'], 'NSFW': post['data']['over_18'],
'Gilded': post['data']['gilded'], 'Gilded': post['data']['gilded'],
@@ -45,8 +46,10 @@ class postScraper:
'Approved at': post['data']['approved_at_utc'], 'Approved at': post['data']['approved_at_utc'],
'Approved by': post['data']['approved_by'],} 'Approved by': post['data']['approved_by'],}
post_tree = Tree("\n" + post['data']['title'])
for post_key, post_value in post_data.items(): for post_key, post_value in post_data.items():
print(f" ├─ {post_key}: {post_value}") post_tree.add(f"{post_key}: {post_value}")
xprint(post_tree)
print(post['data']['selftext']+"\n") print(post['data']['selftext']+"\n")
@@ -63,7 +66,7 @@ logging.basicConfig(format=f'[%(asctime)s] %(message)s', datefmt=f'%H:%M:%S%p',
if __name__ == '__main__': if __name__ == '__main__':
try: try:
postScraper(args).start() RedditPostScraper(args).start()
except KeyboardInterrupt: except KeyboardInterrupt:
logging.warning(f'Process interrupted with (Ctrl+C).') logging.warning(f'Process interrupted with (Ctrl+C).')