specified filepath argument in tiktok-scraper to avoid chdir commands

2026-06-08 03:18:31 +03:00 · 2022-05-06 02:36:21 -05:00
parent 0cb9d4b1b9
commit 595a6e6535
2 changed files with 20 additions and 23 deletions
--- a/tiktok_downloader/file_methods.py
+++ b/tiktok_downloader/file_methods.py
@@ -42,7 +42,7 @@ def check_file(file_path: str, file_type: str):
        create_file(file_path, file_type)


-def download_posts(settings: Dict, tag: str):
+def download_posts(settings: Dict, tag: str, output_dir: Any):
    """Run the tiktok-scraper command to download posts for a given hashtag.

    Returns the path to the downloaded file of posts. If no file was downloaded,
@@ -52,18 +52,16 @@ def download_posts(settings: Dict, tag: str):
    reused to return to the original folder of execution of run_downloader script.
    """
    path = os.path.join(settings["data"], tag, settings["posts"])
-    os.chdir(path)
-    tiktok_command = f"tiktok-scraper hashtag {tag} -t 'json'"
+    os.makedirs(path, exist_ok=True)
+    tiktok_command = f"tiktok-scraper hashtag {tag} -t 'json' --filepath {output_dir}"
    output = subprocess.check_output(tiktok_command, shell=True, encoding="utf-8")
    new_file = output.split()[-1]
    if "json" in new_file:
-        os.chdir("../../../tiktok_downloader")
        return new_file
    else:
        logger.warn(
            f"Something's wrong with what is returned by tiktok-scraper for the hashtag {tag} - *{new_file}* is not a json file.\n\ntiktok-scraper returned {output}"
        )
-        os.chdir("../../../tiktok_downloader")


 def download_videos(settings: Dict, tag: str):
@@ -78,21 +76,19 @@ def download_videos(settings: Dict, tag: str):
    reused to return to the original folder of execution of run_downloader script.
    """
    path = os.path.join(settings["data"], tag, settings["videos"])
-    os.chdir(path)
-    tiktok_command = f"tiktok-scraper hashtag {tag} -d"
+    os.makedirs(path, exist_ok=True)
+    tiktok_command = f"tiktok-scraper hashtag {tag} -d --filepath {path}"
    result = subprocess.check_output(tiktok_command, shell=True)
-    downloaded_list_tmp = os.listdir(f"./#{tag}")
+    downloaded_list_tmp = os.listdir(os.path.join(path, f"#{tag}"))
    if downloaded_list_tmp:
        downloaded_list = []
        for file in downloaded_list_tmp:
            file = file.split(".")[0]
            downloaded_list.append(file)

-        os.chdir("../../../tiktok_downloader")
        return downloaded_list
    else:
        logger.warn(f"No video files were downloaded for the hashtag {tag}.")
-        os.chdir("../../../tiktok_downloader")
        shutil.rmtree(settings["videos_delete"])


--- a/tiktok_downloader/run_downloader.py
+++ b/tiktok_downloader/run_downloader.py
@@ -12,6 +12,7 @@ import time
 import argparse
 import logging, logging.config
 from typing import List, Tuple, Dict, Any, Optional
+from tempfile import TemporaryDirectory

 import global_data
 import file_methods
@@ -77,19 +78,19 @@ def get_posts(settings: dict, tag: str) -> Optional[Tuple[str, int]]:
    3. Calls `data_methods.update_posts` to update the ID list with the IDs of
    newly downloaded posts.
    """
-    file_path = file_methods.download_posts(settings, tag)
-    number_scraped = None
-    if file_path:
-        new_data = data_methods.extract_posts(settings, file_path, tag)
-        if new_data:
-            data_file = os.path.join(
-                settings["data"], tag, settings["posts"], settings["data_file"]
-            )
-            data_methods.update_posts(data_file, "file", new_data[1])
-            number_scraped = data_methods.update_posts(
-                settings["post_ids"], "file", new_data[0], tag
-            )
-        file_methods.delete_file(file_path, "file")
+    with TemporaryDirectory() as temp_dir:
+        file_path = file_methods.download_posts(settings, tag, temp_dir)
+        number_scraped = None
+        if file_path:
+            new_data = data_methods.extract_posts(settings, file_path, tag)
+            if new_data:
+                data_file = os.path.join(
+                    settings["data"], tag, settings["posts"], settings["data_file"]
+                )
+                data_methods.update_posts(data_file, "file", new_data[1])
+                number_scraped = data_methods.update_posts(
+                    settings["post_ids"], "file", new_data[0], tag
+                )

    return number_scraped