Create main.py

2026-06-08 03:18:31 +03:00 · 2023-01-19 03:17:11 +02:00
parent 5f4eb9f2c8
commit 9dd22c90c7
1 changed files with 73 additions and 0 deletions
--- a/tiktok_hashtag_analysis/main.py
+++ b/tiktok_hashtag_analysis/main.py
@@ -0,0 +1,73 @@
+import logging
+import argparse
+from tiktok_hashtag_analysis.run_downloader import * # Import everything from run_downloader.py
+from tiktok_hashtag_analysis.hashtag_frequencies import * # Import everything from hashtag_frequencies.py
+
+logger = logging.getLogger()
+
+
+def create_parser() -> argparse.ArgumentParser:
+    """Create the parser and the arguments for the user input."""
+    parser = argparse.ArgumentParser(description="Analyze hashtags within posts scraped from TikTok.")
+    parser.add_argument("module", help="module to use", choices=['download', 'frequencies'])
+    parser.add_argument("-t", type=str, nargs="*", help="List of hashtags to scrape (module: run_downloader)")
+    parser.add_argument("-f", type=str, help="File name containing list of hashtags to scrape (module: run_downloader)")
+    parser.add_argument("-p", action="store_true", help="Download post data (module: run_downloader)")
+    parser.add_argument("-v", action="store_true", help="Download video files (module: run_downloader)")
+    parser.add_argument("-ht", "--hashtag", type=str,
+                        help="The hashtag of scraped posts to analyze (module: hashtag_frequencies)", )
+    parser.add_argument("-n", "--number", type=int, help="The number of top n occurrences (module: hashtag_frequencies)")
+    parser.add_argument("-plt", "--plot", help="Plot the occurrences (module: hashtag_frequencies)", action="store_true")
+    parser.add_argument("-d", "--print", help="List top n hashtags (module: hashtag_frequencies)", action="store_true")
+    return parser
+
+
+def main():
+    parser = create_parser()
+    args = parser.parse_args()
+    if args.module == "download":
+        if not (args.t or args.f):
+            parser.error(
+                "No hashtags were given, please use either the `-t` flag or the `-f` flag to specify one or more hashtags.")
+
+        if not (args.p or args.v):
+            parser.error(
+                "No argument given, please specify either the `-p` flag to download post data or the `-v` flag to download video files, or both."
+            )
+
+        if args.t:
+            hashtags = args.t
+        elif args.f:
+            file_name = args.f
+            hashtags = get_hashtag_list(file_name)
+
+        logger.info(f"Hashtags to scrape: {hashtags}")
+        if not hashtags:
+            raise ValueError(
+                "No hashtags were specified: please use either the `-t` flag to specify a sspace-separated list of one or more hashtags as a command-line argument, or use the `-f` flag to specify a text file of newline-separated hashtags.")
+
+        download_data_type = {"posts": args.p, "videos": args.v}
+
+        scraped_summary_list = get_data(hashtags, download_data_type)
+        if scraped_summary_list:
+            file_methods.log_writer(scraped_summary_list)
+    elif args.module == "frequencies":
+        img_folder = IMAGES
+        check_file(img_folder, "dir")
+        if args.n < 1:
+            raise ValueError(
+                f"Specified argument `n` (the number of hashtags to analyze) must be greater than zero, not: {args.n}.")
+        input_file = data_file = os.path.join(
+            FILES["data"], args.hashtag, FILES["posts"], FILES["data_file"]
+        )
+        if not check_existence(input_file, "file"):
+            raise FileNotFoundError(
+                f"File ({input_file}) for specified argument `hashtag` ({args.hashtag}) does not exist.")
+
+        base = os.path.splitext(input_file)[0]
+        path = f"./{base}_sorted_hashtags.csv"
+        occs = get_occurrences(input_file, args.n)
+        if args.plot:
+            plot(occs, img_folder)
+        else:
+            print_occurrences(occs)