mirror of
https://github.com/bellingcat/reddit-post-scraping-tool.git
synced 2026-06-08 03:28:30 +03:00
dev 1.7.1.0
This commit is contained in:
19
README.md
19
README.md
@@ -4,24 +4,37 @@ Given a subreddit name and a keyword, RPST will return all posts from a specifie
|
||||
[](https://github.com/rly0nheart/reddit-post-scraping-tool/actions/workflows/python-publish.yml) [](https://github.com/rly0nheart/reddit-post-scraping-tool/actions/workflows/codeql.yml)  
|
||||
|
||||
# ✅ Features
|
||||
## GUI
|
||||
## *GUI*
|
||||
- [x] Dark mode (*Right-click*)
|
||||
- [x] Saves results to a JSON file (*Right-click*)
|
||||
- [x] Logs errors to a file
|
||||
- [x] In-App feature to check for Updates
|
||||
|
||||
## CLI
|
||||
## *CLI*
|
||||
- [x] Saves results to JSON (*specifiy* `--json`)
|
||||
- [x] Saves results to CSV (*specify* `--csv`)
|
||||
- [x] Automatically checks for new updates, and notifies user if updates were found.
|
||||
|
||||
# 📃 TODO
|
||||
## GUI
|
||||
## *GUI*
|
||||
- [ ] Make it installable with a setup.exe/setup.msi file.
|
||||
- [x] Add manual dark mode option, that will be persistent in all sessions
|
||||
- [ ] Make it save results to a CSV file
|
||||
|
||||
# 🖥️ Tested environments
|
||||
## *GUI*
|
||||
- [x] Microsoft Windows 11
|
||||
|
||||
## *CLI*
|
||||
- [x] Android Termux
|
||||
- [x] Microsoft Windows 11
|
||||
- [x] Ubuntu 22.04 - latest versions
|
||||
|
||||
# 📖 Wiki
|
||||
[Refer to the Wiki](https://github.com/bellingcat/reddit-post-scraping-tool/wiki) for installation instructions, in addition to all other documentation.
|
||||
|
||||
# 🖼️ Screenshots
|
||||
You can view a collection of screenshots for both the *CLI* and *GUI* [here](https://github.com/bellingcat/reddit-post-scraping-tool/tree/master/images)
|
||||
***
|
||||
<a href="https://www.buymeacoffee.com/_rly0nheart"><img src="https://img.buymeacoffee.com/button-api/?text=Buy me a coffee&emoji=&slug=_rly0nheart&button_colour=40DCA5&font_colour=ffffff&font_family=Comic&outline_colour=000000&coffee_colour=FFDD00" /></a>
|
||||
|
||||
|
||||
@@ -13,11 +13,11 @@
|
||||
<PackageProjectUrl>https://github.com/bellingcat/reddit-post-scraping-tool</PackageProjectUrl>
|
||||
<PackageReadmeFile>README.md</PackageReadmeFile>
|
||||
<RepositoryUrl>https://github.com/bellingcat/reddit-post-scraping-tool</RepositoryUrl>
|
||||
<AssemblyVersion>1.7.0.1</AssemblyVersion>
|
||||
<FileVersion>1.7.0.1</FileVersion>
|
||||
<AssemblyVersion>1.7.1.0</AssemblyVersion>
|
||||
<FileVersion>1.7.1.0</FileVersion>
|
||||
<PackageLicenseFile>LICENSE</PackageLicenseFile>
|
||||
<PackageRequireLicenseAcceptance>True</PackageRequireLicenseAcceptance>
|
||||
<Version>1.7.0</Version>
|
||||
<Version>1.7.1</Version>
|
||||
<PackageTags>reddit;scraper;reddit-scraper;osint</PackageTags>
|
||||
<PackageReleaseNotes></PackageReleaseNotes>
|
||||
<AnalysisLevel>6.0-recommended</AnalysisLevel>
|
||||
|
||||
@@ -7,7 +7,7 @@ packages = ["rpst"]
|
||||
|
||||
[project]
|
||||
name = "reddit-post-scraping-tool"
|
||||
version = "1.7.0.1"
|
||||
version = "1.7.1.0"
|
||||
description = "Given a subreddit name and a keyword, RPST returns all top (by default) posts that contain the specified keyword."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
|
||||
@@ -14,20 +14,20 @@ def run():
|
||||
parser = create_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
log = set_loglevel(args=args)
|
||||
log = set_loglevel(debug_mode=args.debug)
|
||||
|
||||
# Record the start time
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
# Check for updates
|
||||
check_updates(version_tag="1.7.0.1")
|
||||
check_updates(version_tag="1.7.1.0")
|
||||
|
||||
# Get posts with the provided/parsed arguments
|
||||
get_posts(args=args)
|
||||
except KeyboardInterrupt:
|
||||
log.warning("User interruption detected.")
|
||||
log.warning("User interruption detected ([yellow]Ctrl+C[/]).")
|
||||
except Exception as e:
|
||||
log.error(f"An error occurred: {e}")
|
||||
log.error(f"An error occurred: [red]{e}[/]")
|
||||
finally:
|
||||
log.info(f"Finished in {datetime.now() - start_time} seconds.")
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
|
||||
import requests
|
||||
from glyphoji import glyph
|
||||
from rich import print
|
||||
from rich.tree import Tree
|
||||
from rich import print as xprint
|
||||
|
||||
from .utils import convert_timestamp_to_datetime, write_post_data
|
||||
|
||||
@@ -128,4 +128,4 @@ def get_posts(args: argparse):
|
||||
f"{glyph.check_mark_button} Keyword ('{keyword}') was found in "
|
||||
f"{found_posts}/{len(response['data']['children'])} {listing} posts from r/{subreddit}."
|
||||
)
|
||||
xprint(main_tree)
|
||||
print(main_tree)
|
||||
|
||||
@@ -7,8 +7,9 @@ from datetime import datetime
|
||||
|
||||
import requests
|
||||
from glyphoji import glyph
|
||||
from rich import print
|
||||
from rich.tree import Tree
|
||||
from rich import print as xprint
|
||||
|
||||
from rich.markdown import Markdown
|
||||
from rich.logging import RichHandler
|
||||
|
||||
@@ -84,7 +85,7 @@ def create_parser():
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--debug",
|
||||
help="run rpst in debug mode (show network logs)",
|
||||
help="run rpst in debug mode",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
@@ -110,50 +111,30 @@ def check_updates(version_tag: str):
|
||||
raw_release_notes = response["body"]
|
||||
|
||||
# Log an info message about the new release.
|
||||
xprint(
|
||||
print(
|
||||
f"{glyph.up_arrow} A new release of RPST is available ({response['tag_name']}). "
|
||||
f"Run 'pip install --upgrade reddit-post-scraping-tool' to get the updates."
|
||||
)
|
||||
|
||||
# Print the release notes.
|
||||
xprint(Markdown(raw_release_notes))
|
||||
print(Markdown(raw_release_notes))
|
||||
|
||||
|
||||
def set_loglevel(args: argparse) -> logging.getLogger:
|
||||
def set_loglevel(debug_mode: bool) -> logging.getLogger:
|
||||
"""
|
||||
Configures the logging level based on the provided arguments.
|
||||
Configure and return a logging object with the specified log level.
|
||||
|
||||
If `args.debug` is True, the logging level is set to "NOTSET," allowing all log messages to be displayed.
|
||||
Otherwise, the logging level is set to "INFO," and only informational and higher-severity messages are displayed.
|
||||
|
||||
The function also configures a RichHandler for formatting the log messages,
|
||||
including a specific time format and hiding the log level.
|
||||
|
||||
:param args: A namespace object from argparse containing the debugging option (args.debug).
|
||||
:return: A logger object associated with the name "rich."
|
||||
:param debug_mode: If True, the log level is set to "NOTSET". Otherwise, it is set to "INFO".
|
||||
:return: A logging object configured with the specified log level.
|
||||
"""
|
||||
if args.debug:
|
||||
logging.basicConfig(
|
||||
level="NOTSET",
|
||||
format="%(message)s",
|
||||
handlers=[
|
||||
RichHandler(
|
||||
markup=True, log_time_format="[%H:%M:%S%p]", show_level=False
|
||||
)
|
||||
],
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(
|
||||
level="INFO",
|
||||
format="%(message)s",
|
||||
handlers=[
|
||||
RichHandler(
|
||||
markup=True, log_time_format="[%H:%M:%S%p]", show_level=False
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
return logging.getLogger("rich")
|
||||
logging.basicConfig(
|
||||
level="NOTSET" if debug_mode else "INFO",
|
||||
format="%(message)s",
|
||||
handlers=[
|
||||
RichHandler(markup=True, log_time_format="[%I:%M:%S %p]", show_level=False)
|
||||
],
|
||||
)
|
||||
return logging.getLogger("RPST")
|
||||
|
||||
|
||||
def write_post_data(post_data: dict, filename: str, args, tree_branch: Tree):
|
||||
|
||||
Reference in New Issue
Block a user