From 91a8aaef385a16e93f0db09a3128684986fe96cd Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Wed, 6 Sep 2023 19:51:16 -0500
Subject: [PATCH] added video link to msToken input, improved handling of
 output directories without write permission (and added relevant unit test),
 removed unused requirements.txt things

---
 requirements.txt                |  5 ---
 setup.py                        | 26 ---------------
 tests/cli.py                    | 58 +++++++++++++++++++++++++++++++--
 tiktok_hashtag_analysis/auth.py |  3 +-
 tiktok_hashtag_analysis/base.py |  9 +++--
 tiktok_hashtag_analysis/cli.py  | 34 +++++++++++++++++--
 6 files changed, 93 insertions(+), 42 deletions(-)
 delete mode 100644 requirements.txt
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index e4144ef..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-seaborn==0.12.2
-matplotlib==3.7.2
-yt-dlp==2023.7.6
-TikTokApi==6.1.1
-requests==2.31.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 5760f41..52d7d35 100644
--- a/setup.py
+++ b/setup.py
@@ -1,30 +1,6 @@
 from setuptools import setup
 
 
-def read_requirements(filename: str):
-    with open(filename) as requirements_file:
-        import re
-
-        def fix_url_dependencies(req: str) -> str:
-            """Pip and setuptools disagree about how URL dependencies should be handled."""
-            m = re.match(
-                r"^(git\+)?(https|ssh)://(git@)?github\.com/([\w-]+)/(?P<name>[\w-]+)\.git",
-                req,
-            )
-            if m is None:
-                return req
-            else:
-                return f"{m.group('name')} @ {req}"
-
-        requirements = []
-        for line in requirements_file:
-            line = line.strip()
-            if line.startswith("#") or len(line) <= 0:
-                continue
-            requirements.append(fix_url_dependencies(line))
-    return requirements
-
-
 with open("README.md", "r", encoding="utf-8") as file:
     long_description = file.read()
 
@@ -45,8 +21,6 @@ setup(
     long_description_content_type="text/markdown",
     url="https://github.com/bellingcat/tiktok-hashtag-analysis",
     license="MIT License",
-    # install_requires=read_requirements("requirements.txt"),
-    # extras_require={"dev": read_requirements("dev-requirements.txt")},
     install_requires=["seaborn", "matplotlib", "TikTokApi", "requests", "yt_dlp"],
     extras_require={"test": ["pytest", "pytest-cov", "pytest-html", "pytest-metadata"]},
     classifiers=[
diff --git a/tests/cli.py b/tests/cli.py
index dd58f5e..ea7f399 100644
--- a/tests/cli.py
+++ b/tests/cli.py
@@ -1,8 +1,15 @@
+import os
+from pathlib import Path
+
 import pytest
 
-from tiktok_hashtag_analysis.cli import create_parser
+from tiktok_hashtag_analysis.cli import (
+    create_parser,
+    process_output_dir,
+    DEFAULT_OUTPUT_DIR,
+)
 
-ARGUMENTS = [
+PARSER_ARGUMENTS = [
     ("file", "hashtags.txt", "--file"),
     ("download", True, "--download"),
     ("download", True, "-d"),
@@ -17,7 +24,7 @@ ARGUMENTS = [
 ]
 
 
-@pytest.mark.parametrize("attribute,value,flag", ARGUMENTS)
+@pytest.mark.parametrize("attribute,value,flag", PARSER_ARGUMENTS)
 def test_parser(hashtags, attribute, value, flag):
     argument_list = [*hashtags, flag]
 
@@ -29,3 +36,48 @@ def test_parser(hashtags, attribute, value, flag):
 
     assert args.get(attribute) == value
     assert args.get("hashtags") == hashtags
+
+
+def test_process_output_dir(monkeypatch, tmp_path):
+
+    home_dir = Path.home().resolve()
+
+    # Specified nonexistent output directory without write permissions
+    parser = create_parser()
+    specified_output_dir = home_dir.parent / "test"
+    with pytest.raises(SystemExit) as system_exit:
+        result = process_output_dir(
+            specified_output_dir=specified_output_dir, parser=parser
+        )
+    assert system_exit.type == SystemExit
+
+    # Specified existing output directory without write permissions
+    parser = create_parser()
+    specified_output_dir = home_dir.parent
+    with pytest.raises(SystemExit) as system_exit:
+        result = process_output_dir(
+            specified_output_dir=specified_output_dir, parser=parser
+        )
+    assert system_exit.type == SystemExit
+
+    # Unspecified, in current directory without write permissions
+    cwd = os.getcwd()
+    monkeypatch.chdir(specified_output_dir)
+    result = process_output_dir(specified_output_dir=None, parser=parser)
+    monkeypatch.chdir(cwd)
+    assert result == DEFAULT_OUTPUT_DIR
+
+    # Specified nonexisting output directory with write permissions
+    parser = create_parser()
+    specified_output_dir = tmp_path / "test" / "tiktok"
+    result = process_output_dir(
+        specified_output_dir=specified_output_dir, parser=parser
+    )
+    assert result == specified_output_dir
+
+    # Unspecified, in current directory with write permissions
+    cwd = os.getcwd()
+    monkeypatch.chdir(specified_output_dir)
+    result = process_output_dir(specified_output_dir=None, parser=parser)
+    monkeypatch.chdir(cwd)
+    assert result == DEFAULT_OUTPUT_DIR
diff --git a/tiktok_hashtag_analysis/auth.py b/tiktok_hashtag_analysis/auth.py
index 545e2ce..3255ad9 100644
--- a/tiktok_hashtag_analysis/auth.py
+++ b/tiktok_hashtag_analysis/auth.py
@@ -15,7 +15,6 @@ class Authorization:
             self.config_file = Path.home() / ".tiktok"
 
         self.section = "TikTok"
-        self.ms_token = None
 
     def get_token(self) -> str:
         """Load the "msToken" cookie taken from TikTok, which the scraper requires."""
@@ -64,7 +63,7 @@ class Authorization:
         """Allow user to manually enter the token in the terminal."""
 
         print(
-            "\nPlease copy and paste your `msToken` cookie taken from your web browser when visiting the TikTok website. See [THIS VIDEO] for more information.\n"
+            "\nPlease copy and paste your `msToken` cookie taken from your web browser when visiting the TikTok website. For more information, watch the video: https://tinyurl.com/tiktok-mstoken\n"
         )
 
         ms_token = input("msToken: ")
diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py
index d7a9e9e..694b82a 100644
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -7,7 +7,7 @@ import warnings
 import asyncio
 import logging
 import re
-from typing import List, Dict
+from typing import List, Dict, Optional
 
 import yt_dlp
 import requests
@@ -101,7 +101,9 @@ def aggregate_cooccurring_hashtags(hashtag_file: Path) -> Counter:
 class TikTokDownloader:
     """Main class for scraping data from TikTok."""
 
-    def __init__(self, hashtags: List[str], data_dir: str, config_file: str = None):
+    def __init__(
+        self, hashtags: List[str], data_dir: Path, config_file: Optional[str] = None
+    ):
         self.hashtags = process_hashtag_list(hashtags)
         logging.info(f"Hashtags to scrape: {hashtags}")
 
@@ -146,7 +148,8 @@ class TikTokDownloader:
         json_dump(file_path=hashtag_file, data=all_fetched_data)
         logging.info(
             f"Scraped {len(new_fetched_data)} new posts containing the hashtag "
-            f"'{hashtag}', with {len(already_fetched_data)} posts previously scraped"
+            f"'{hashtag}' to output directory {self.data_dir}, with "
+            f"{len(already_fetched_data)} posts previously scraped"
         )
 
     def get_hashtag_videos(self, hashtag: str):
diff --git a/tiktok_hashtag_analysis/cli.py b/tiktok_hashtag_analysis/cli.py
index 3c3bbfd..333ed49 100644
--- a/tiktok_hashtag_analysis/cli.py
+++ b/tiktok_hashtag_analysis/cli.py
@@ -1,9 +1,12 @@
+import os
 import logging
 import argparse
 from pathlib import Path
-
+from typing import Optional
 from .base import TikTokDownloader, load_hashtags_from_file
 
+DEFAULT_OUTPUT_DIR = Path.home() / "tiktok_hashtag_data"
+
 
 def create_parser():
     """Create parser tp parse input command-line arguments."""
@@ -51,7 +54,7 @@ def create_parser():
         "--output-dir",
         type=str,
         help="Directory to save scraped data and visualizations to",
-        default=Path(".").resolve().parent / "data",
+        default=None,
     )
     parser.add_argument(
         "--config",
@@ -64,6 +67,29 @@ def create_parser():
     return parser
 
 
+def process_output_dir(
+    specified_output_dir: Optional[str], parser: argparse.ArgumentParser
+) -> Path:
+    """Make sure the output directory can be created or has write permissions."""
+
+    error_message = (
+        lambda _output_dir: f"You don't have write permissions for the specified output directory (`{_output_dir}`). Please specify an output directory that you have write access to."
+    )
+
+    if specified_output_dir is None:
+        return DEFAULT_OUTPUT_DIR
+    else:
+        _output_dir = Path(specified_output_dir).resolve()
+        try:
+            os.makedirs(_output_dir, exist_ok=True)
+            if not os.access(path=_output_dir, mode=os.W_OK):
+                parser.error(error_message(_output_dir))
+            else:
+                return _output_dir
+        except PermissionError:
+            parser.error(error_message(_output_dir))
+
+
 def main():
     """Parse and process command-line arguments, scrape specified hashtags, and perform specified analyses."""
 
@@ -89,8 +115,10 @@ def main():
     else:
         hashtags = args.hashtags
 
+    output_dir = process_output_dir(specified_output_dir=args.output_dir, parser=parser)
+
     downloader = TikTokDownloader(
-        hashtags=hashtags, data_dir=args.output_dir, config_file=args.config
+        hashtags=hashtags, data_dir=output_dir, config_file=args.config
     )
 
     downloader.run(