configured verbosity argument with logging level

2026-06-07 19:08:32 +03:00 · 2023-09-11 21:29:37 -05:00
parent 6fa1e5026c
commit 92861e0e5d
4 changed files with 18 additions and 16 deletions
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ The tool helps to download posts and videos from TikTok for a given set of hasht
 ## Pre-requisites
 1. Make sure you have Python 3.9 or a later version installed
 2. Install the tool with pip: `pip install tiktok-hashtag-analysis`
-   1. or directly from the repo version: `pip install git+https://github.com/bellingcat/tiktok-hashtag-analysis`
+   - Alternatively you can install directly from the latest version on GitHub: `pip install git+https://github.com/bellingcat/tiktok-hashtag-analysis`

 You should now be ready to start using it.

@@ -150,6 +150,3 @@ pytest
 ```

 This repo uses [black](https://github.com/psf/black) to format source code and [mypy](https://mypy.readthedocs.io/en/stable/) for static type checking. Before submitting a pull request, please run both tools on the source code.
-
- yt-dlp warning: (unable to find video in feed)
-https://www.tiktok.com/@sa_diya_34/video/7261180335763754242
--- a/tests/cli.py
+++ b/tests/cli.py
@@ -42,7 +42,6 @@ def test_parser(hashtags, attribute, value, flag):


 def test_process_output_dir(monkeypatch, tmp_path):
-
    home_dir = Path.home().resolve()

    # Specified nonexistent output directory without write permissions
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -31,6 +31,8 @@ from .auth import Authorization
 warnings.filterwarnings("ignore", message="Glyph (.*) missing from current font")
 sns.set_theme(style="darkgrid")

+logger = logging.getLogger(__name__)
+

 def process_hashtag_list(hashtags: List[str]) -> List[str]:
    """Convert a list of hashtags to a standard form (remove whitespace, make
@@ -95,7 +97,7 @@ def download_file_and_save(url: str, filepath: Path):
    path_with_ext = filepath.with_suffix(f".{ext}")
    with open(path_with_ext, "wb") as f:
        f.write(r.content)
-        logging.debug(f"Saved file to: {path_with_ext}")
+        logger.debug(f"Saved file to: {path_with_ext}")


 def download_gallery(video_data: Dict, video_dir: Path):
@@ -143,8 +145,8 @@ class TikTokDownloader:
        os.makedirs(self.data_dir, exist_ok=True)

        self.prioritize_hashtags()
-        logging.info(f"Hashtags to scrape: {self.hashtags}")
-        logging.info(f"Writing data to directory: {self.data_dir}")
+        logger.info(f"Hashtags to scrape: {self.hashtags}")
+        logger.info(f"Writing data to directory: {self.data_dir}")

        self.auth = Authorization(config_file=config_file)
        self.ms_token = self.auth.get_token()
@@ -181,7 +183,7 @@ class TikTokDownloader:
        fetched_ids = set(video["id"] for video in fetched_data)

        if len(fetched_data) == 0:
-            logging.warning(f"No posts were found for the hashtag: {hashtag}")
+            logger.warning(f"No posts were found for the hashtag: {hashtag}")

        # Determine which newly scraped posts haven't been scraped before
        old_fetched_data = [
@@ -193,7 +195,7 @@ class TikTokDownloader:
        # Merge new and old data and write to file
        all_fetched_data = old_fetched_data + fetched_data
        json_dump(file_path=hashtag_file, data=all_fetched_data)
-        logging.info(
+        logger.info(
            f"Scraped {new_post_count} new posts containing the hashtag "
            f"'{hashtag}', with {old_post_count} posts previously scraped"
        )
@@ -232,25 +234,27 @@ class TikTokDownloader:

        # Download audio and image files for all image gallery posts
        if len(galleries_to_download) > 0:
-            logging.info(f"Downloading image galleries for hashtag {hashtag}")
+            logger.info(f"Downloading image galleries for hashtag {hashtag}")
        for video in galleries_to_download:
-            logging.debug(f"Downloading image gallery for video: {video['id']}")
+            logger.debug(f"Downloading image gallery for video: {video['id']}")
            download_gallery(video_data=video, video_dir=video_dir)

        # Download video files for all video posts
        if len(urls_to_download) > 0:
-            logging.info(f"Downloading media for hashtag {hashtag}")
+            logger.info(f"Downloading media for hashtag {hashtag}")
+
        ydl_opts = {
            "outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s"),
            "ignore_errors": True,
+            "quiet": logger.getEffectiveLevel() > logging.DEBUG,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            for url in urls_to_download:
                try:
                    ydl.download([url])
                except (HTTPError, TypeError, ExtractorError, DownloadError) as e:
-                    # catch urllib and yt-dlp errors when video not found
-                    logging.warning(
+                    # Catch urllib and yt-dlp errors when video not found
+                    logger.warning(
                        f"Encountered error {e} when attempting to download url: {url}"
                    )

@@ -303,7 +307,7 @@ class TikTokDownloader:
        plot_file = self.data_dir / hashtag / "plots" / f"{hashtag}__{current_time}.png"
        plot_file.parent.mkdir(exist_ok=True, parents=True)
        plt.savefig(plot_file, bbox_inches="tight", facecolor="white", dpi=300)
-        logging.info(f"Plot saved to file: {plot_file}")
+        logger.info(f"Plot saved to file: {plot_file}")

    def run(self, limit: int, download: bool, plot: bool, table: bool, number: int):
        """Execute the specified operations on all specified hashtags."""
--- a/tiktok_hashtag_analysis/cli.py
+++ b/tiktok_hashtag_analysis/cli.py
@@ -7,6 +7,8 @@ from .base import TikTokDownloader, load_hashtags_from_file

 DEFAULT_OUTPUT_DIR = Path.home() / "tiktok_hashtag_data"

+logger = logging.getLogger(__name__)
+

 def create_parser():
    """Create parser tp parse input command-line arguments."""