diff --git a/RPST GUI/RPST/RPST.vbproj b/RPST GUI/RPST/RPST.vbproj
index 379db43..1062cc1 100644
--- a/RPST GUI/RPST/RPST.vbproj
+++ b/RPST GUI/RPST/RPST.vbproj
@@ -13,11 +13,11 @@
https://github.com/bellingcat/reddit-post-scraping-tool
README.md
https://github.com/bellingcat/reddit-post-scraping-tool
- 1.6.1.0
- 1.6.1.0
+ 1.6.2.0
+ 1.6.2.0
LICENSE
True
- 1.6.1
+ 1.6.2
reddit;scraper;reddit-scraper;osint
6.0-recommended
diff --git a/pyproject.toml b/pyproject.toml
index dc85dcc..cdc299d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ packages = ["rpst"]
[project]
name = "reddit-post-scraping-tool"
-version = "1.6.1.0"
+version = "1.6.2.0"
description = "Given a subreddit name and a keyword, RPST returns all top (by default) posts that contain the specified keyword."
readme = "README.md"
requires-python = ">=3.8"
diff --git a/rpst/__main.py b/rpst/__main.py
index 7edee3e..eba61ae 100644
--- a/rpst/__main.py
+++ b/rpst/__main.py
@@ -17,7 +17,7 @@ def run():
try:
# Check for updates
- check_updates(version_tag="1.6.1.0")
+ check_updates(version_tag="1.6.2.0")
# Get posts with the provided/parsed arguments
get_posts(arguments=arguments)
diff --git a/rpst/__rpst.py b/rpst/__rpst.py
index c6a0092..15daae7 100644
--- a/rpst/__rpst.py
+++ b/rpst/__rpst.py
@@ -37,7 +37,7 @@ def write_post_data(post_data: dict, filename: str) -> str:
# Write the data to a JSON file
with open(file_path, "a") as file:
file.write(json.dumps(post_data))
- file.write("\n") # write a newline to separate posts
+ file.write("\n") # write a newline to separate posts.
return file.name
@@ -50,24 +50,24 @@ def check_updates(version_tag: str):
:param version_tag: A string representing the current version of the project.
"""
- # Make a GET request to the GitHub API to get the latest release of the project
+ # Make a GET request to the GitHub API to get the latest release of the project.
response = requests.get(
"https://api.github.com/repos/bellingcat/reddit-post-scraping-tool/releases/latest"
).json()
- # Check if the latest release's tag matches the current version tag
+ # Check if the latest release's tag matches the current version tag.
if response["tag_name"] != version_tag:
- # If not, convert the release notes from Markdown to HTML
+ # If not, convert the release notes from Markdown to HTML.
raw_release_notes = response["body"]
markdown_release_notes = Markdown(raw_release_notes)
- # Log an info message about the new release
+ # Log an info message about the new release.
log.info(
f"A new release of RPST is available ({response['tag_name']}). "
f"Run 'pip install --upgrade reddit-post-scraping-tool' to get the updates."
)
- # Print the release notes
+ # Print the release notes.
xprint(markdown_release_notes)
@@ -82,20 +82,20 @@ def create_post_branch(post: dict, keyword: str, output: bool, tree: Tree) -> Tr
:param tree: Tree where the post branch will be added.
:returns: The main tree with added post branches.
"""
- # Define the data to extract from the post
+ # Define the data to extract from the post.
post_data = {
- # 'Author': post['data']['author'],
+ # "Author": post["data"]["author"],
"ID": post["data"]["id"],
"Subreddit": post["data"]["subreddit_name_prefixed"],
"Visibility": post["data"]["subreddit_type"],
"Thumbnail": post["data"]["thumbnail"],
- "NSFW": post["data"]["over_18"],
"Gilded": post["data"]["gilded"],
"Upvotes": post["data"]["ups"],
"Upvote ratio": post["data"]["upvote_ratio"],
"Downvotes": post["data"]["downs"],
"Awards": post["data"]["total_awards_received"],
"Top award": post["data"]["top_awarded_type"],
+ "Is NSFW?": post["data"]["over_18"],
"Is crosspostable?": post["data"]["is_crosspostable"],
"Score": post["data"]["score"],
"Category": post["data"]["category"],
@@ -104,19 +104,23 @@ def create_post_branch(post: dict, keyword: str, output: bool, tree: Tree) -> Tr
"Approved at": post["data"]["approved_at_utc"],
"Approved by": post["data"]["approved_by"],
}
+
+ # Add the post's branch to the main tree.
+ post_branch = tree.add(f":scroll: {post['data']['title']}")
+
+ # Add each piece of extracted data as a branch of the post_branch.
+ for post_key, post_value in post_data.items():
+ post_branch.add(f"{post_key}: {post_value}", style="dim")
+
+ # If -j/--json is passed, write found posts to a json file.
if output:
+ # This ensures that the post's selftext is also added to the written json file.
+ post_data["Text"] = post["data"]["selftext"]
output_file = write_post_data(filename=keyword, post_data=post_data)
tree.add(
f":page_facing_up: Post data written/appended to "
f"[italic][link file://{output_file}]{output_file}[/]"
)
-
- # Add the post's branch to the main tree.
- post_branch = tree.add(f":scroll: {post['data']['title']}")
-
- # Add each piece of extracted data as a branch of the post_branch
- for post_key, post_value in post_data.items():
- post_branch.add(f"{post_key}: {post_value}", style="dim")
post_branch.add(post["data"]["selftext"], style="italic")
return tree
@@ -150,25 +154,25 @@ def get_posts(arguments: argparse):
# Start a new session
session = requests.session()
- # Set the User-Agent to mimic a Safari browser on a Mac
+ # Set the User-Agent to mimic a Safari browser on a Mac.
session.headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, "
"like Gecko) Version/14.1.1 Safari/605.1.15"
}
# Send a GET request to the specified subreddit and listing,
- # limiting the response by the specified limit and timeframe
+ # limiting the response by the specified limit and timeframe.
response = session.get(
f"https://reddit.com/r/{subreddit}/{listing}"
f".json?limit={limit}&t={timeframe}"
).json()
- # Initialize a counter for the number of posts found that contain the keyword
+ # Initialize a counter for the number of posts found that contain the keyword.
found_posts = 0
# Loop through each post in the response
for post_index, post in enumerate(response["data"]["children"], start=1):
- # If the keyword is found in the post's selftext or title, increment the counter and process the post
+ # If the keyword is found in the post's selftext or title, increment the counter and process the post.
if (
keyword.lower() in post["data"]["selftext"]
or keyword.lower() in post["data"]["title"]