From 717ec518904c3b5fad990a73faaf5efba2783a25 Mon Sep 17 00:00:00 2001 From: X Date: Sat, 12 Feb 2022 21:12:49 +0100 Subject: [PATCH] second attempt resolve merge conflicts --- analytics/hashtag_frequencies.py | 12 ---------- tiktok_downloader/data_methods.py | 2 ++ tiktok_downloader/file_methods.py | 1 - tiktok_downloader/global_data.py | 10 +++------ tiktok_downloader/hashtag_list.py | 3 --- tiktok_downloader/run_downloader.py | 35 +++-------------------------- 6 files changed, 8 insertions(+), 55 deletions(-) diff --git a/analytics/hashtag_frequencies.py b/analytics/hashtag_frequencies.py index 83e368e..79350db 100644 --- a/analytics/hashtag_frequencies.py +++ b/analytics/hashtag_frequencies.py @@ -55,15 +55,6 @@ def plot(n, length, k, v, img_folder): plt.ylabel(f'Number of occurrences') save_plot(plt, img_folder) plt.show(block=None) - - -def plot(n, length, k, v): - plt.scatter(k, v) - plt.tight_layout() - plt.title(f'Hashtag Distribution') - plt.xlabel(f'Top {n} hashtags from {length} posts.') - plt.ylabel(f'Number of occurrences') - plt.show() return @@ -94,8 +85,6 @@ def save_plot(plt, img_folder): if __name__ == "__main__": img_folder = global_data.IMAGES file_methods.check_file(img_folder, "dir") - -if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("input_file", help="The json hashtag file name") parser.add_argument("n", help="The number of top n occurrences", type=int) @@ -112,7 +101,6 @@ if __name__ == "__main__": if args.plot: length, keys, values = get_occurrences(args.input_file, args.n) plot(args.n, length, keys, values, img_folder) - plot(args.n, length, keys, values) else: length, keys, values = get_occurrences(args.input_file, args.n) print_occurrences(length, keys, values) diff --git a/tiktok_downloader/data_methods.py b/tiktok_downloader/data_methods.py index c35e2a4..e99c79e 100644 --- a/tiktok_downloader/data_methods.py +++ b/tiktok_downloader/data_methods.py @@ -121,3 +121,5 @@ def print_total(file_path, tag, data_type): else: print(f"WARNING: out of total {data_type} for the hashtag {tag} {total.total}, only {total.unique} are unique. Something is going wrong...") return + + diff --git a/tiktok_downloader/file_methods.py b/tiktok_downloader/file_methods.py index 9a98127..915fb31 100644 --- a/tiktok_downloader/file_methods.py +++ b/tiktok_downloader/file_methods.py @@ -66,7 +66,6 @@ def download_videos(settings, tag): try: # tiktok_command = f"tiktok-scraper hashtag {tag} -n {settings['number_of_videos']} -d" tiktok_command = f"tiktok-scraper hashtag {tag} -d" - tiktok_command = f"tiktok-scraper hashtag {tag} -n {settings['number_of_videos']} -d" result = subprocess.run([tiktok_command], capture_output=True, shell=True) if result.stdout: downloaded_list_tmp = os.listdir(f"./#{tag}") diff --git a/tiktok_downloader/global_data.py b/tiktok_downloader/global_data.py index a485c8e..8c8bd51 100644 --- a/tiktok_downloader/global_data.py +++ b/tiktok_downloader/global_data.py @@ -33,12 +33,8 @@ FILES = { tag = "" PARAMETERS = { - "scraper_attempts" : 3, +# "scraper_attempts" : 3, # "number_of_videos" : 3, # Number of videos to be downloaded by tiktok-scraper. -} -COMMANDS = { - "number_of_videos" : 3, # Number of videos to be downloaded by tiktok-scraper. - "post_download" : f"tiktok-scraper hashtag {tag} -t 'json'", - "video_download" : f"tiktok-scraper hashtag {tag} -d", - "sleep" : 8 + "sleep" : 8 } + diff --git a/tiktok_downloader/hashtag_list.py b/tiktok_downloader/hashtag_list.py index bd44d1c..1bc6098 100644 --- a/tiktok_downloader/hashtag_list.py +++ b/tiktok_downloader/hashtag_list.py @@ -1,11 +1,9 @@ hashtag_list = [ # This is a sample hashtag list. Please enter your hashtag list (without the comment). -<<<<<<< HEAD "london", "paris", "newyork", "tokyo" -======= # "london", # "paris", # "newyork", @@ -40,5 +38,4 @@ hashtag_list = [ "xinjiangdance", "westernmedia", "uyghurgenocide" ->>>>>>> bfa90676f121dd88e070dc134791a596a104e784 ] diff --git a/tiktok_downloader/run_downloader.py b/tiktok_downloader/run_downloader.py index fb9cc24..494aa9f 100644 --- a/tiktok_downloader/run_downloader.py +++ b/tiktok_downloader/run_downloader.py @@ -2,7 +2,6 @@ import os, sys import time import json import argparse, importlib -import argparse import global_data import file_methods @@ -20,13 +19,6 @@ def get_hashtag_list(file_name): except ImportError as error: print("ImportError: " + str(error)) print(f"Please provide at least one hashtag either by entering as an argument or by adding hashtags to the variable hashtag_list in the file {file_name}") -def get_hashtag_list(): - try: - from hashtag_list import hashtag_list - return hashtag_list - except ImportError as error: - print("ImportError: " + str(error)) - print(f"Please provide at least one hashtag either by entering as an argument or by adding hashtags to the list hashtag_list in the file hashtag_list.py") sys.exit() @@ -52,20 +44,17 @@ def set_download_settings(download_data_type): settings["logger"] = global_data.FILES["logger"] settings["sleep"] = global_data.PARAMETERS["sleep"] settings["scraper"] = global_data.PARAMETERS["scraper_attempts"] - settings["sleep"] = global_data.COMMANDS["sleep"] file_methods.check_file(f"{settings['data']}/{settings['ids']}", "dir") file_methods.check_file(f"{settings['data']}/{settings['log']}", "dir") if download_data_type == "posts": settings["posts"] = global_data.FILES["posts"] settings["post_ids"] = global_data.FILES["post_ids"] - settings["post_download"] = global_data.COMMANDS["post_download"] settings["data_file"] = global_data.FILES["data_file"] return settings elif download_data_type == "videos": settings["videos"] = global_data.FILES["videos"] settings["video_ids"] = global_data.FILES["video_ids"] - settings["video_download"] = global_data.COMMANDS["video_download"] - settings["number_of_videos"] = global_data.COMMANDS["number_of_videos"] + settings["number_of_videos"] = global_data.PARAMETERS["number_of_videos"] return settings elif download_data_type == "posts-videos": settings["posts"] = global_data.FILES["posts"] @@ -73,11 +62,7 @@ def set_download_settings(download_data_type): settings["data_file"] = global_data.FILES["data_file"] settings["videos"] = global_data.FILES["videos"] settings["video_ids"] = global_data.FILES["video_ids"] - settings["post_download"] = global_data.COMMANDS["post_download"] - settings["videos"] = global_data.FILES["videos"] - settings["video_ids"] = global_data.FILES["video_ids"] - settings["video_download"] = global_data.COMMANDS["video_download"] - settings["number_of_videos"] = global_data.COMMANDS["number_of_videos"] + settings["number_of_videos"] = global_data.PARAMETERS["number_of_videos"] return settings else: print(f"ERROR: The download_data_type must be either posts, videos or posts-videos.") @@ -109,6 +94,7 @@ def get_videos(settings, tag): log = data_methods.update_videos(settings, new_data, tag) else: file_methods.clean_video_files(settings, tag) + return log @@ -213,21 +199,6 @@ if __name__ == "__main__": if not hashtags: print("No hashtags were given, please use either --h option or -f to provide hashtags.") sys.exit(0) - if not (args.p or args.v): - parser.error("No argument given, please specify either -p for posts or -v videos or both.") - sys.exit() - - if args.h: - hashtags = args.h - else: - hashtags = get_hashtags("hashtag_list", "hashtag_list") - - print(hashtags) - if not hashtags: - hashtags = get_hashtag_list() - if not hashtags: - print(f"ERROR: No hashtags found. Please re-run the script with at least one hashtag!!!") - sys.exit(0) if (args.p and args.v): download_data_type = "posts-videos"