From b916512bde5f68072703eeb1813c667c261ec6a7 Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Mon, 11 Sep 2023 21:43:33 -0500 Subject: [PATCH] removed auth module and authorization, since msToken isnt actually required to run scraper --- README.md | 2 - tests/auth.py | 24 ----------- tiktok_hashtag_analysis/auth.py | 71 --------------------------------- tiktok_hashtag_analysis/base.py | 10 +---- 4 files changed, 2 insertions(+), 105 deletions(-) delete mode 100644 tests/auth.py delete mode 100644 tiktok_hashtag_analysis/auth.py diff --git a/README.md b/README.md index db0d6f3..fa210ec 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,6 @@ The tool helps to download posts and videos from TikTok for a given set of hasht You should now be ready to start using it. -The scraper this tool uses requires an `msToken` taken from the TikTok website on your browser. The first time you run the tool, it will ask for this token. You can see how to retrieve the token by accessing your browser's "Developer Tools", and how to input its value into the tool's command-line interface in [this video](https://github.com/bellingcat/tiktok-hashtag-analysis/assets/18430739/b9d40957-c59e-4b6d-a843-13d210f89055). - ## About the tool ### Command-line arguments ``` diff --git a/tests/auth.py b/tests/auth.py deleted file mode 100644 index 6d0c078..0000000 --- a/tests/auth.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tiktok_hashtag_analysis.auth import Authorization - -MS_TOKEN = "thisisafakemstokenfortiktok" - - -def test_auth_input(tmp_path, monkeypatch): - config_file = tmp_path / ".tiktok" - monkeypatch.setattr("builtins.input", lambda _: MS_TOKEN) - auth = Authorization(config_file=config_file) - auth.get_token() - - assert auth.ms_token == MS_TOKEN - - -def test_auth(tmp_path): - config_file = tmp_path / ".tiktok" - auth = Authorization(config_file=config_file) - - auth.dump_token(ms_token=MS_TOKEN) - auth.get_token() - - assert auth.ms_token == MS_TOKEN diff --git a/tiktok_hashtag_analysis/auth.py b/tiktok_hashtag_analysis/auth.py deleted file mode 100644 index 16252ab..0000000 --- a/tiktok_hashtag_analysis/auth.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -import configparser -from pathlib import Path -import logging -from typing import Optional - - -class Authorization: - """Handle authorization for TikTok, using the `msToken`.""" - - def __init__(self, config_file: Optional[str] = None): - if config_file: - self.config_file = Path(config_file) - else: - self.config_file = Path.home() / ".tiktok" - - self.section = "TikTok" - - def get_token(self) -> str: - """Load the "msToken" cookie taken from TikTok, which the scraper requires.""" - - # Step 1: check if MS_TOKEN is defined as environment variable - if ms_token := os.environ.get("MS_TOKEN"): - self.ms_token = ms_token - logging.debug("Loaded token from environment variable") - - # Step 2: check if MS_TOKEN is defined in config file - elif self.config_file.is_file(): - if ms_token := self.load_token(): - self.ms_token = ms_token - logging.debug(f"Loaded token from config file: {self.config_file}") - - # Step 3: have user enter MS_TOKEN via terminal - else: - ms_token = self.input_token() - self.dump_token(ms_token=ms_token) - self.ms_token = ms_token - logging.debug( - f"Loaded token from user input and saved to config file: {self.config_file}" - ) - - return self.ms_token - - def load_token(self) -> Optional[str]: - """Parse a config file and extract the token.""" - - config = configparser.ConfigParser() - config.read(self.config_file) - return config.get(section=self.section, option="MS_TOKEN", fallback=None) - - def dump_token(self, ms_token: str): - """Write the token to a config file.""" - - config = configparser.ConfigParser() - config.read(self.config_file) - config.add_section(self.section) - config.set(section=self.section, option="MS_TOKEN", value=ms_token) - - with open(self.config_file, "w", encoding="utf-8") as f: - config.write(f) - - def input_token(self) -> str: - """Allow user to manually enter the token in the terminal.""" - - print( - "\nPlease copy and paste your `msToken` cookie taken from your web browser when visiting the TikTok website. For more information, watch the video: https://tinyurl.com/tiktok-mstoken\n" - ) - - ms_token = input("msToken: ") - - return ms_token diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py index 71b01d4..71d7e63 100644 --- a/tiktok_hashtag_analysis/base.py +++ b/tiktok_hashtag_analysis/base.py @@ -26,7 +26,6 @@ from tenacity import ( from playwright._impl._api_types import Error from TikTokApi import TikTokApi -from .auth import Authorization warnings.filterwarnings("ignore", message="Glyph (.*) missing from current font") sns.set_theme(style="darkgrid") @@ -53,7 +52,7 @@ def load_hashtags_from_file(file: str) -> List[str]: # Retry upon encountering transient playwright errors @retry(retry=retry_if_exception_type(Error), stop=stop_after_attempt(3)) -async def _fetch_hashtag_data(hashtag: str, ms_token: str, limit: int) -> List[Dict]: +async def _fetch_hashtag_data(hashtag: str, limit: int) -> List[Dict]: """Fetch data for videos containing a specified hashtag, asynchronously.""" data = [] async with TikTokApi() as api: @@ -148,9 +147,6 @@ class TikTokDownloader: logger.info(f"Hashtags to scrape: {self.hashtags}") logger.info(f"Writing data to directory: {self.data_dir}") - self.auth = Authorization(config_file=config_file) - self.ms_token = self.auth.get_token() - def prioritize_hashtags(self): """Order hashtags based on whether they've been scraped before, and the time they were most recently scraped""" @@ -177,9 +173,7 @@ class TikTokDownloader: already_fetched_ids = set(video["id"] for video in already_fetched_data) # Scrape posts that use the specified hashtag - fetched_data = asyncio.run( - _fetch_hashtag_data(hashtag=hashtag, ms_token=self.ms_token, limit=limit) - ) + fetched_data = asyncio.run(_fetch_hashtag_data(hashtag=hashtag, limit=limit)) fetched_ids = set(video["id"] for video in fetched_data) if len(fetched_data) == 0: