diff --git a/README.md b/README.md index 166550c..db0d6f3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The tool helps to download posts and videos from TikTok for a given set of hasht ## Pre-requisites 1. Make sure you have Python 3.9 or a later version installed 2. Install the tool with pip: `pip install tiktok-hashtag-analysis` - 1. or directly from the repo version: `pip install git+https://github.com/bellingcat/tiktok-hashtag-analysis` + - Alternatively you can install directly from the latest version on GitHub: `pip install git+https://github.com/bellingcat/tiktok-hashtag-analysis` You should now be ready to start using it. @@ -150,6 +150,3 @@ pytest ``` This repo uses [black](https://github.com/psf/black) to format source code and [mypy](https://mypy.readthedocs.io/en/stable/) for static type checking. Before submitting a pull request, please run both tools on the source code. - -- yt-dlp warning: (unable to find video in feed) -https://www.tiktok.com/@sa_diya_34/video/7261180335763754242 diff --git a/tests/cli.py b/tests/cli.py index 58999cb..cf40f5a 100644 --- a/tests/cli.py +++ b/tests/cli.py @@ -42,7 +42,6 @@ def test_parser(hashtags, attribute, value, flag): def test_process_output_dir(monkeypatch, tmp_path): - home_dir = Path.home().resolve() # Specified nonexistent output directory without write permissions diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py index 92d6d1e..71b01d4 100644 --- a/tiktok_hashtag_analysis/base.py +++ b/tiktok_hashtag_analysis/base.py @@ -31,6 +31,8 @@ from .auth import Authorization warnings.filterwarnings("ignore", message="Glyph (.*) missing from current font") sns.set_theme(style="darkgrid") +logger = logging.getLogger(__name__) + def process_hashtag_list(hashtags: List[str]) -> List[str]: """Convert a list of hashtags to a standard form (remove whitespace, make @@ -95,7 +97,7 @@ def download_file_and_save(url: str, filepath: Path): path_with_ext = filepath.with_suffix(f".{ext}") with open(path_with_ext, "wb") as f: f.write(r.content) - logging.debug(f"Saved file to: {path_with_ext}") + logger.debug(f"Saved file to: {path_with_ext}") def download_gallery(video_data: Dict, video_dir: Path): @@ -143,8 +145,8 @@ class TikTokDownloader: os.makedirs(self.data_dir, exist_ok=True) self.prioritize_hashtags() - logging.info(f"Hashtags to scrape: {self.hashtags}") - logging.info(f"Writing data to directory: {self.data_dir}") + logger.info(f"Hashtags to scrape: {self.hashtags}") + logger.info(f"Writing data to directory: {self.data_dir}") self.auth = Authorization(config_file=config_file) self.ms_token = self.auth.get_token() @@ -181,7 +183,7 @@ class TikTokDownloader: fetched_ids = set(video["id"] for video in fetched_data) if len(fetched_data) == 0: - logging.warning(f"No posts were found for the hashtag: {hashtag}") + logger.warning(f"No posts were found for the hashtag: {hashtag}") # Determine which newly scraped posts haven't been scraped before old_fetched_data = [ @@ -193,7 +195,7 @@ class TikTokDownloader: # Merge new and old data and write to file all_fetched_data = old_fetched_data + fetched_data json_dump(file_path=hashtag_file, data=all_fetched_data) - logging.info( + logger.info( f"Scraped {new_post_count} new posts containing the hashtag " f"'{hashtag}', with {old_post_count} posts previously scraped" ) @@ -232,25 +234,27 @@ class TikTokDownloader: # Download audio and image files for all image gallery posts if len(galleries_to_download) > 0: - logging.info(f"Downloading image galleries for hashtag {hashtag}") + logger.info(f"Downloading image galleries for hashtag {hashtag}") for video in galleries_to_download: - logging.debug(f"Downloading image gallery for video: {video['id']}") + logger.debug(f"Downloading image gallery for video: {video['id']}") download_gallery(video_data=video, video_dir=video_dir) # Download video files for all video posts if len(urls_to_download) > 0: - logging.info(f"Downloading media for hashtag {hashtag}") + logger.info(f"Downloading media for hashtag {hashtag}") + ydl_opts = { "outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s"), "ignore_errors": True, + "quiet": logger.getEffectiveLevel() > logging.DEBUG, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: for url in urls_to_download: try: ydl.download([url]) except (HTTPError, TypeError, ExtractorError, DownloadError) as e: - # catch urllib and yt-dlp errors when video not found - logging.warning( + # Catch urllib and yt-dlp errors when video not found + logger.warning( f"Encountered error {e} when attempting to download url: {url}" ) @@ -303,7 +307,7 @@ class TikTokDownloader: plot_file = self.data_dir / hashtag / "plots" / f"{hashtag}__{current_time}.png" plot_file.parent.mkdir(exist_ok=True, parents=True) plt.savefig(plot_file, bbox_inches="tight", facecolor="white", dpi=300) - logging.info(f"Plot saved to file: {plot_file}") + logger.info(f"Plot saved to file: {plot_file}") def run(self, limit: int, download: bool, plot: bool, table: bool, number: int): """Execute the specified operations on all specified hashtags.""" diff --git a/tiktok_hashtag_analysis/cli.py b/tiktok_hashtag_analysis/cli.py index 10818fb..37141d5 100644 --- a/tiktok_hashtag_analysis/cli.py +++ b/tiktok_hashtag_analysis/cli.py @@ -7,6 +7,8 @@ from .base import TikTokDownloader, load_hashtags_from_file DEFAULT_OUTPUT_DIR = Path.home() / "tiktok_hashtag_data" +logger = logging.getLogger(__name__) + def create_parser(): """Create parser tp parse input command-line arguments."""