mirror of
https://github.com/bellingcat/tiktok-hashtag-analysis.git
synced 2026-06-08 03:18:31 +03:00
resolve merge conflicts
This commit is contained in:
@@ -2,13 +2,10 @@ import os, sys
|
||||
import csv, json
|
||||
import argparse
|
||||
import matplotlib.pyplot as plt
|
||||
<<<<<<< HEAD
|
||||
from datetime import datetime
|
||||
|
||||
sys.path.insert(0, '../tiktok_downloader')
|
||||
import file_methods, global_data
|
||||
=======
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
|
||||
|
||||
|
||||
@@ -49,7 +46,6 @@ def get_occurrences(filename, n=1 , sort=True):
|
||||
|
||||
|
||||
|
||||
<<<<<<< HEAD
|
||||
def plot(n, length, k, v, img_folder):
|
||||
plt.scatter(k, v)
|
||||
plt.tight_layout()
|
||||
@@ -59,7 +55,8 @@ def plot(n, length, k, v, img_folder):
|
||||
plt.ylabel(f'Number of occurrences')
|
||||
save_plot(plt, img_folder)
|
||||
plt.show(block=None)
|
||||
=======
|
||||
|
||||
|
||||
def plot(n, length, k, v):
|
||||
plt.scatter(k, v)
|
||||
plt.tight_layout()
|
||||
@@ -67,7 +64,6 @@ def plot(n, length, k, v):
|
||||
plt.xlabel(f'Top {n} hashtags from {length} posts.')
|
||||
plt.ylabel(f'Number of occurrences')
|
||||
plt.show()
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
return
|
||||
|
||||
|
||||
@@ -84,7 +80,6 @@ def print_occurrences(l, k, v):
|
||||
return
|
||||
|
||||
|
||||
<<<<<<< HEAD
|
||||
def save_plot(plt, img_folder):
|
||||
try:
|
||||
now = datetime.now()
|
||||
@@ -99,10 +94,8 @@ def save_plot(plt, img_folder):
|
||||
if __name__ == "__main__":
|
||||
img_folder = global_data.IMAGES
|
||||
file_methods.check_file(img_folder, "dir")
|
||||
=======
|
||||
|
||||
if __name__ == "__main__":
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("input_file", help="The json hashtag file name")
|
||||
parser.add_argument("n", help="The number of top n occurrences", type=int)
|
||||
@@ -118,11 +111,8 @@ if __name__ == "__main__":
|
||||
path = f"./{base}_sorted_hashtags.csv"
|
||||
if args.plot:
|
||||
length, keys, values = get_occurrences(args.input_file, args.n)
|
||||
<<<<<<< HEAD
|
||||
plot(args.n, length, keys, values, img_folder)
|
||||
=======
|
||||
plot(args.n, length, keys, values)
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
else:
|
||||
length, keys, values = get_occurrences(args.input_file, args.n)
|
||||
print_occurrences(length, keys, values)
|
||||
|
||||
@@ -64,12 +64,9 @@ def download_videos(settings, tag):
|
||||
path = os.path.join(settings["data"], tag, settings["videos"])
|
||||
os.chdir(path)
|
||||
try:
|
||||
<<<<<<< HEAD
|
||||
# tiktok_command = f"tiktok-scraper hashtag {tag} -n {settings['number_of_videos']} -d"
|
||||
tiktok_command = f"tiktok-scraper hashtag {tag} -d"
|
||||
=======
|
||||
tiktok_command = f"tiktok-scraper hashtag {tag} -n {settings['number_of_videos']} -d"
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
result = subprocess.run([tiktok_command], capture_output=True, shell=True)
|
||||
if result.stdout:
|
||||
downloaded_list_tmp = os.listdir(f"./#{tag}")
|
||||
|
||||
@@ -4,10 +4,7 @@ IDS = "ids"
|
||||
LOG = "log"
|
||||
POSTS = "posts"
|
||||
VIDEOS = "videos"
|
||||
<<<<<<< HEAD
|
||||
IMAGES = f"{DATA}/img"
|
||||
=======
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
|
||||
# Files
|
||||
POST_IDS = "post_ids.json"
|
||||
@@ -22,10 +19,7 @@ FILES = {
|
||||
"log" : LOG,
|
||||
"posts" : POSTS,
|
||||
"videos" : VIDEOS,
|
||||
<<<<<<< HEAD
|
||||
"images" : IMAGES,
|
||||
=======
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
"post_ids" : f"{DATA}/{IDS}/{POST_IDS}",
|
||||
"video_ids" : f"{DATA}/{IDS}/{VIDEO_IDS}",
|
||||
"data_file" : f"{DATA_FILE}",
|
||||
@@ -38,15 +32,13 @@ FILES = {
|
||||
# Commands
|
||||
tag = ""
|
||||
|
||||
<<<<<<< HEAD
|
||||
PARAMETERS = {
|
||||
"scraper_attempts" : 3,
|
||||
# "number_of_videos" : 3, # Number of videos to be downloaded by tiktok-scraper.
|
||||
=======
|
||||
}
|
||||
COMMANDS = {
|
||||
"number_of_videos" : 3, # Number of videos to be downloaded by tiktok-scraper.
|
||||
"post_download" : f"tiktok-scraper hashtag {tag} -t 'json'",
|
||||
"video_download" : f"tiktok-scraper hashtag {tag} -d",
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
"sleep" : 8
|
||||
"number_of_videos" : 3, # Number of videos to be downloaded by tiktok-scraper.
|
||||
"post_download" : f"tiktok-scraper hashtag {tag} -t 'json'",
|
||||
"video_download" : f"tiktok-scraper hashtag {tag} -d",
|
||||
"sleep" : 8
|
||||
}
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
import os, sys
|
||||
import time
|
||||
import json
|
||||
<<<<<<< HEAD
|
||||
import argparse, importlib
|
||||
=======
|
||||
import argparse
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
|
||||
import global_data
|
||||
import file_methods
|
||||
@@ -15,7 +12,6 @@ import data_methods
|
||||
|
||||
command = "python3 post_downloader.py "
|
||||
|
||||
<<<<<<< HEAD
|
||||
def get_hashtag_list(file_name):
|
||||
try:
|
||||
f = importlib.import_module(file_name) # exec(f"from {file_name} import hashtag_list")
|
||||
@@ -24,7 +20,6 @@ def get_hashtag_list(file_name):
|
||||
except ImportError as error:
|
||||
print("ImportError: " + str(error))
|
||||
print(f"Please provide at least one hashtag either by entering as an argument or by adding hashtags to the variable hashtag_list in the file {file_name}")
|
||||
=======
|
||||
def get_hashtag_list():
|
||||
try:
|
||||
from hashtag_list import hashtag_list
|
||||
@@ -32,7 +27,6 @@ def get_hashtag_list():
|
||||
except ImportError as error:
|
||||
print("ImportError: " + str(error))
|
||||
print(f"Please provide at least one hashtag either by entering as an argument or by adding hashtags to the list hashtag_list in the file hashtag_list.py")
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
sys.exit()
|
||||
|
||||
|
||||
@@ -43,10 +37,7 @@ def create_parser():
|
||||
# Adding the arguments
|
||||
#parser.add_argument("--h", type=str, nargs="*", required=True, help="List of hashtags")
|
||||
parser.add_argument("--h", type=str, nargs="*", help="List of hashtags")
|
||||
<<<<<<< HEAD
|
||||
parser.add_argument("-f", type=str, help="File name with the list of hashtags")
|
||||
=======
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
parser.add_argument("-p", action="store_true", help="Download posts")
|
||||
parser.add_argument("-v", action="store_true", help="Download videos")
|
||||
|
||||
@@ -59,46 +50,34 @@ def set_download_settings(download_data_type):
|
||||
settings["ids"] = global_data.FILES["ids"]
|
||||
settings["log"] = global_data.FILES["log"]
|
||||
settings["logger"] = global_data.FILES["logger"]
|
||||
<<<<<<< HEAD
|
||||
settings["sleep"] = global_data.PARAMETERS["sleep"]
|
||||
settings["scraper"] = global_data.PARAMETERS["scraper_attempts"]
|
||||
=======
|
||||
settings["sleep"] = global_data.COMMANDS["sleep"]
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
file_methods.check_file(f"{settings['data']}/{settings['ids']}", "dir")
|
||||
file_methods.check_file(f"{settings['data']}/{settings['log']}", "dir")
|
||||
if download_data_type == "posts":
|
||||
settings["posts"] = global_data.FILES["posts"]
|
||||
settings["post_ids"] = global_data.FILES["post_ids"]
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
settings["post_download"] = global_data.COMMANDS["post_download"]
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
settings["data_file"] = global_data.FILES["data_file"]
|
||||
return settings
|
||||
elif download_data_type == "videos":
|
||||
settings["videos"] = global_data.FILES["videos"]
|
||||
settings["video_ids"] = global_data.FILES["video_ids"]
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
settings["video_download"] = global_data.COMMANDS["video_download"]
|
||||
settings["number_of_videos"] = global_data.COMMANDS["number_of_videos"]
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
return settings
|
||||
elif download_data_type == "posts-videos":
|
||||
settings["posts"] = global_data.FILES["posts"]
|
||||
settings["post_ids"] = global_data.FILES["post_ids"]
|
||||
settings["data_file"] = global_data.FILES["data_file"]
|
||||
<<<<<<< HEAD
|
||||
settings["videos"] = global_data.FILES["videos"]
|
||||
settings["video_ids"] = global_data.FILES["video_ids"]
|
||||
=======
|
||||
settings["post_download"] = global_data.COMMANDS["post_download"]
|
||||
settings["videos"] = global_data.FILES["videos"]
|
||||
settings["video_ids"] = global_data.FILES["video_ids"]
|
||||
settings["video_download"] = global_data.COMMANDS["video_download"]
|
||||
settings["number_of_videos"] = global_data.COMMANDS["number_of_videos"]
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
return settings
|
||||
else:
|
||||
print(f"ERROR: The download_data_type must be either posts, videos or posts-videos.")
|
||||
@@ -130,10 +109,6 @@ def get_videos(settings, tag):
|
||||
log = data_methods.update_videos(settings, new_data, tag)
|
||||
else:
|
||||
file_methods.clean_video_files(settings, tag)
|
||||
<<<<<<< HEAD
|
||||
|
||||
=======
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
return log
|
||||
|
||||
|
||||
@@ -220,7 +195,6 @@ if __name__ == "__main__":
|
||||
parser = create_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
<<<<<<< HEAD
|
||||
if not (args.h or args.f):
|
||||
parser.error("No hashtags were given, please use either --h option or -f to provide hashtags.")
|
||||
sys.exit()
|
||||
@@ -239,7 +213,6 @@ if __name__ == "__main__":
|
||||
if not hashtags:
|
||||
print("No hashtags were given, please use either --h option or -f to provide hashtags.")
|
||||
sys.exit(0)
|
||||
=======
|
||||
if not (args.p or args.v):
|
||||
parser.error("No argument given, please specify either -p for posts or -v videos or both.")
|
||||
sys.exit()
|
||||
@@ -255,7 +228,6 @@ if __name__ == "__main__":
|
||||
if not hashtags:
|
||||
print(f"ERROR: No hashtags found. Please re-run the script with at least one hashtag!!!")
|
||||
sys.exit(0)
|
||||
>>>>>>> bfa90676f121dd88e070dc134791a596a104e784
|
||||
|
||||
if (args.p and args.v):
|
||||
download_data_type = "posts-videos"
|
||||
|
||||
Reference in New Issue
Block a user