added video link to msToken input, improved handling of output directories without write permission (and added relevant unit test), removed unused requirements.txt things

2026-06-08 03:18:31 +03:00 · 2023-09-06 19:51:16 -05:00
parent 6a56c354e1
commit 91a8aaef38
6 changed files with 93 additions and 42 deletions
--- a/tiktok_hashtag_analysis/auth.py
+++ b/tiktok_hashtag_analysis/auth.py
@@ -15,7 +15,6 @@ class Authorization:
            self.config_file = Path.home() / ".tiktok"

        self.section = "TikTok"
-        self.ms_token = None

    def get_token(self) -> str:
        """Load the "msToken" cookie taken from TikTok, which the scraper requires."""
@@ -64,7 +63,7 @@ class Authorization:
        """Allow user to manually enter the token in the terminal."""

        print(
-            "\nPlease copy and paste your `msToken` cookie taken from your web browser when visiting the TikTok website. See [THIS VIDEO] for more information.\n"
+            "\nPlease copy and paste your `msToken` cookie taken from your web browser when visiting the TikTok website. For more information, watch the video: https://tinyurl.com/tiktok-mstoken\n"
        )

        ms_token = input("msToken: ")
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -7,7 +7,7 @@ import warnings
 import asyncio
 import logging
 import re
-from typing import List, Dict
+from typing import List, Dict, Optional

 import yt_dlp
 import requests
@@ -101,7 +101,9 @@ def aggregate_cooccurring_hashtags(hashtag_file: Path) -> Counter:
 class TikTokDownloader:
    """Main class for scraping data from TikTok."""

-    def __init__(self, hashtags: List[str], data_dir: str, config_file: str = None):
+    def __init__(
+        self, hashtags: List[str], data_dir: Path, config_file: Optional[str] = None
+    ):
        self.hashtags = process_hashtag_list(hashtags)
        logging.info(f"Hashtags to scrape: {hashtags}")

@@ -146,7 +148,8 @@ class TikTokDownloader:
        json_dump(file_path=hashtag_file, data=all_fetched_data)
        logging.info(
            f"Scraped {len(new_fetched_data)} new posts containing the hashtag "
-            f"'{hashtag}', with {len(already_fetched_data)} posts previously scraped"
+            f"'{hashtag}' to output directory {self.data_dir}, with "
+            f"{len(already_fetched_data)} posts previously scraped"
        )

    def get_hashtag_videos(self, hashtag: str):
--- a/tiktok_hashtag_analysis/cli.py
+++ b/tiktok_hashtag_analysis/cli.py
@@ -1,9 +1,12 @@
+import os
 import logging
 import argparse
 from pathlib import Path
-
+from typing import Optional
 from .base import TikTokDownloader, load_hashtags_from_file

+DEFAULT_OUTPUT_DIR = Path.home() / "tiktok_hashtag_data"
+

 def create_parser():
    """Create parser tp parse input command-line arguments."""
@@ -51,7 +54,7 @@ def create_parser():
        "--output-dir",
        type=str,
        help="Directory to save scraped data and visualizations to",
-        default=Path(".").resolve().parent / "data",
+        default=None,
    )
    parser.add_argument(
        "--config",
@@ -64,6 +67,29 @@ def create_parser():
    return parser


+def process_output_dir(
+    specified_output_dir: Optional[str], parser: argparse.ArgumentParser
+) -> Path:
+    """Make sure the output directory can be created or has write permissions."""
+
+    error_message = (
+        lambda _output_dir: f"You don't have write permissions for the specified output directory (`{_output_dir}`). Please specify an output directory that you have write access to."
+    )
+
+    if specified_output_dir is None:
+        return DEFAULT_OUTPUT_DIR
+    else:
+        _output_dir = Path(specified_output_dir).resolve()
+        try:
+            os.makedirs(_output_dir, exist_ok=True)
+            if not os.access(path=_output_dir, mode=os.W_OK):
+                parser.error(error_message(_output_dir))
+            else:
+                return _output_dir
+        except PermissionError:
+            parser.error(error_message(_output_dir))
+
+
 def main():
    """Parse and process command-line arguments, scrape specified hashtags, and perform specified analyses."""

@@ -89,8 +115,10 @@ def main():
    else:
        hashtags = args.hashtags

+    output_dir = process_output_dir(specified_output_dir=args.output_dir, parser=parser)
+
    downloader = TikTokDownloader(
-        hashtags=hashtags, data_dir=args.output_dir, config_file=args.config
+        hashtags=hashtags, data_dir=output_dir, config_file=args.config
    )

    downloader.run(