diff --git a/tiktok_downloader/run_downloader.py b/tiktok_downloader/run_downloader.py index d2e53bf..d921bad 100644 --- a/tiktok_downloader/run_downloader.py +++ b/tiktok_downloader/run_downloader.py @@ -1,7 +1,7 @@ import os, sys import time import json -import argparse, importlib +import argparse import global_data import file_methods @@ -13,11 +13,11 @@ The run_downloader.py dowloads data using the tiktok-scraper (https://github.com 1. "-p" option is used by the user to download posts only 2. "-v" option is use to download videos only 3. "-p -v" is used to download posts and videos -4. "--h" is used to specify a list of hashtags as arguments +4. "-t" is used to specify a list of hashtags as arguments 5. "-f" option is used to read the list of hashtags from the user specified file -Example: - 1. The command "python3 run_downloader.py --h london paris newyork -p" will download posts for hashtags london, paris and newyork. +Example: + 1. The command "python3 run_downloader.py --h london paris newyork -p" will download posts for hashtags london, paris and newyork. 2. The command "python3 run_downloader.py -f hashtag_list -p -v" will download posts and videos for hashtags in the file hashtag_list. @@ -42,17 +42,15 @@ hashtag_list - this file contains the list of hashtags that the user wants to do """ -command = "python3 post_downloader.py " def get_hashtag_list(file_name): try: - f = importlib.import_module(file_name) # exec(f"from {file_name} import hashtag_list") - print(f.hashtag_list) - return f.hashtag_list - except ImportError as error: - print("ImportError: " + str(error)) - print(f"Please provide at least one hashtag either by entering as an argument or by adding hashtags to the variable hashtag_list in the file {file_name}") - sys.exit() + with open(file_name) as f: + gn = (line.strip() for line in f if not line.startswith("#")) + tags = list(line for line in gn if line) + return tags + except IOError as error: + print(error) def create_parser(): @@ -60,8 +58,7 @@ def create_parser(): parser = argparse.ArgumentParser(description="Download the tiktoks for the requested hashtags") # Adding the arguments - #parser.add_argument("--h", type=str, nargs="*", required=True, help="List of hashtags") - parser.add_argument("--h", type=str, nargs="*", help="List of hashtags") + parser.add_argument("-t", type=str, nargs="*", help="List of hashtags") parser.add_argument("-f", type=str, help="File name with the list of hashtags") parser.add_argument("-p", action="store_true", help="Download posts") parser.add_argument("-v", action="store_true", help="Download videos") @@ -71,7 +68,7 @@ def create_parser(): def set_download_settings(download_data_type): """ - Loads the constants from global_data into the dict called settings and returns it. + Loads the constants from global_data into the dict called settings and returns it. Purpose - easy access to global constants by various functions. """ settings = {} @@ -83,25 +80,16 @@ def set_download_settings(download_data_type): settings["scraper"] = global_data.PARAMETERS["scraper_attempts"] file_methods.check_file(f"{settings['data']}/{settings['ids']}", "dir") file_methods.check_file(f"{settings['data']}/{settings['log']}", "dir") - if download_data_type == "posts": + if download_data_type["posts"]: settings["posts"] = global_data.FILES["posts"] settings["post_ids"] = global_data.FILES["post_ids"] settings["data_file"] = global_data.FILES["data_file"] - return settings - elif download_data_type == "videos": + + if download_data_type == "videos": settings["videos"] = global_data.FILES["videos"] settings["video_ids"] = global_data.FILES["video_ids"] - return settings - elif download_data_type == "posts-videos": - settings["posts"] = global_data.FILES["posts"] - settings["post_ids"] = global_data.FILES["post_ids"] - settings["data_file"] = global_data.FILES["data_file"] - settings["videos"] = global_data.FILES["videos"] - settings["video_ids"] = global_data.FILES["video_ids"] - return settings - else: - print(f"ERROR: The download_data_type must be either posts, videos or posts-videos.") - sys.exit() + + return settings @@ -120,17 +108,17 @@ def get_posts(settings, tag): data_methods.update_posts(data_file, "file", new_data[1]) log = data_methods.update_posts(settings["post_ids"], "file", new_data[0], tag) file_methods.delete_file(file_path, "file") - + return log -def get_videos(settings, tag): +def get_videos(settings, tag): """ 1. calls download_videos in file_methods.py to get the videos for a given hashtag 2. calls extract_videos from data_methods.py to extract new videos if any 3. calls update_videos from data_methods.py to update the id-list with the ids of newly downloaded videos. - 4. the clean_video_files function deletes the residual video folder after the data processing + 4. the clean_video_files function deletes the residual video folder after the data processing """ log = () download_list = file_methods.download_videos(settings, tag) @@ -148,14 +136,14 @@ def get_videos(settings, tag): def get_data(hashtags, download_data_type): """ The function checks for the user option "-p", "-v" or both and then - triggers the functions get_posts, get_videos or both, respectively. + triggers the functions get_posts, get_videos or both, respectively. """ counter = 0 total_hashtags = len(hashtags) total_hashtags_offset = total_hashtags - 1 log_data = [] - - if download_data_type == "posts": + + if download_data_type["posts"]: settings = set_download_settings(download_data_type) while counter < total_hashtags: tag = hashtags[counter] @@ -165,12 +153,13 @@ def get_data(hashtags, download_data_type): if res: log = ( res[0], ( "posts", res[1] ) ) log_data.append(log) - data_methods.print_total(settings["post_ids"], tag, download_data_type) - + data_methods.print_total(settings["post_ids"], tag, "posts") + counter += 1 if counter < total_hashtags_offset: time.sleep(settings["sleep"]) - elif download_data_type == "videos": + + if download_data_type == "videos": settings = set_download_settings(download_data_type) while counter < total_hashtags: tag = hashtags[counter] @@ -181,43 +170,16 @@ def get_data(hashtags, download_data_type): if res: res = ( res[0], ( "videos", res[1])) log_data.append(res) - data_methods.print_total(settings["video_ids"], tag, download_data_type) - + data_methods.print_total(settings["video_ids"], tag, "videos") + counter += 1 if counter < total_hashtags_offset: time.sleep(settings["sleep"]) - elif download_data_type == "posts-videos": - settings = set_download_settings(download_data_type) - while counter < total_hashtags: - tag = hashtags[counter] - file_methods.check_file(os.path.join(settings["data"], tag, settings["posts"]), "dir") - file_methods.check_file(os.path.join(settings["data"], tag, settings["posts"], settings["data_file"]), "file") - file_methods.check_file(os.path.join(settings["data"], tag, settings["videos"]), "dir") - settings["videos_delete"] = settings['data'] + f"/{tag}/videos/#{tag}" - settings["videos_to"] = settings['data'] + f"/{tag}/videos" - requests = [("posts", "post_ids", get_posts), ("videos", "video_ids", get_videos)] - total_reqs_offset = len(requests) - 1 - req_counter = 0 - for req in requests: - res = req[2](settings, tag) - if res: - res = ( res[0], (req[0], res[1]) ) - log_data.append(res) - data_methods.print_total(settings[req[1]], tag, req[0]) - if req_counter < total_reqs_offset: - time.sleep(settings["sleep"]) - req_counter += 1 - - counter += 1 - if counter < total_hashtags_offset: - time.sleep(settings["sleep"]) - else: - print(f"ERROR: The download_data_type must be either posts, videos or posts-videos.") - sys.exit() return log_data + def get_hashtags(file_name, hashtag_list): """ Loads and returns the list of hashtags from user specified file. @@ -225,42 +187,47 @@ def get_hashtags(file_name, hashtag_list): try: from hashtag_list import hashtag_list return hashtag_list - except: - print(f"ERROR: something went wrong while reading the file {file_name}!") - raise + except ImportError: + raise ImportError(f"ERROR: something went wrong while reading the file {file_name}!") if __name__ == "__main__": parser = create_parser() args = parser.parse_args() - if not (args.h or args.f): + if not (args.t or args.f): parser.error("No hashtags were given, please use either --h option or -f to provide hashtags.") - sys.exit() - + if not (args.p or args.v): parser.error("No argument given, please specify either -p for posts or -v videos or both.") - sys.exit() - if args.h: - hashtags = args.h + if args.t: + hashtags = args.t elif args.f: file_name = args.f hashtags = get_hashtag_list(file_name) print(hashtags) if not hashtags: - print("No hashtags were given, please use either --h option or -f to provide hashtags.") - sys.exit(0) + raise Exception("No hashtags were given, please use either --h option or -f to provide hashtags.") if (args.p and args.v): - download_data_type = "posts-videos" + download_data_type = { + "posts": True, + "videos": True + } elif args.p: - download_data_type = "posts" + download_data_type = { + "posts": True, + "videos": False + } else: - download_data_type = "videos" - - try: + download_data_type = { + "posts": False, + "videos": True + } + + try: log_data = get_data(hashtags, download_data_type) if log_data: file_methods.log_writer(log_data)