diff --git a/analytics/hashtag_frequencies.py b/analytics/hashtag_frequencies.py deleted file mode 100644 index 2ded3d1..0000000 --- a/analytics/hashtag_frequencies.py +++ /dev/null @@ -1,123 +0,0 @@ -import os, sys -import csv, json -import argparse -import matplotlib.pyplot as plt -from datetime import datetime - -""" -Plots the frequency of hashtags appearing in the set of given posts. -""" - - -sys.path.insert(0, '../tiktok_downloader') -import file_methods, global_data - - - -def get_hashtags(obj): - if not obj: - print(f'ERROR: Empty item, no hashtags to be extracted.') - return - else: - hashtags = {} - tags = [ [tag['name'] for tag in ele['hashtags']] for ele in obj ] - tags = [ set(ele) for ele in tags ] - { tag: (1 if tag not in hashtags and not hashtags.update({tag: 1}) - else hashtags[tag] + 1 and not hashtags.update({tag: hashtags[tag] + 1})) - for ele in tags for tag in ele } - hashtags = sorted(hashtags.items(), key=lambda e: e[1], reverse=True) - - return hashtags - - -def get_occurrences(filename, n=1 , sort=True): - """ - Takes the json file containing posts and returns a dictionary: - local variable occs = { - "total": total posts in the file, - top_n: [[top n hashtags ], [frequencies of corresponding hashtags]] - } - """ - with open(filename) as f: - obj = json.load(f) - l = len(obj) - tags = get_hashtags(obj) - occs = { - "total": l, - "top_n": [] - } - occs["top_n"] = [ [ ele[i] for ele in tags[0:n] ] for i in range(2)] - return occs - - -def plot(n, occs, img_folder): - plt.scatter(occs["top_n"][0], occs["top_n"][1]) - plt.tight_layout() - plt.xticks(rotation=45) - plt.title(f'Hashtag Distribution') - plt.xlabel(f'Top {n} hashtags from {occs["total"]} posts.') - plt.ylabel(f'Number of occurrences') - save_plot(img_folder) - plt.show(block=None) - return - - -def print_occurrences(occs): - """ - Prints the top n hashtags with their frequencies and the ratio of occurrences and total posts, all to the shell. - """ - row_number = 0 - total_posts = occs["total"] - print ("{:<8} {:<15} {:<15} {:<15}".format("Rank", 'Hashtag','Occurrences',f'Frequency (Occurrences/Total-Posts(total_posts))')) - for key,value in zip(occs["top_n"][0], occs["top_n"][1]): - ratio = value/total_posts - print ("{:<8} {:<15} {:<15} {:<15}".format(row_number, key, value, ratio)) - row_number += 1 - return - - -def save_plot(img_folder): - """ - Saves the plot to a png file in the folder /data/imgs/ - """ - try: - now = datetime.now() - current_time = now.strftime("%Y_%m_%d_%H_%M_%S") - plt.savefig(f"{img_folder}/{current_time}.png") - - return - except: raise - - - -if __name__ == "__main__": - """ - Option "n" specifies how many hashtags does the user wants to plot. - "-d" option prints the hashtag frequencies on the shell - "-p" option plots the hashtag frequencies and saves as a png file in the folder /data/imgs/ - - The function get_occurances is triggered to compute and return the top n occurances and the hashtags. - """ - img_folder = global_data.IMAGES - file_methods.check_file(img_folder, "dir") - parser = argparse.ArgumentParser() - parser.add_argument("input_file", help="The json hashtag file name") - parser.add_argument("n", help="The number of top n occurrences", type=int) - parser.add_argument("-p", "--plot", help="Plot the occurrences", action="store_true") - parser.add_argument("-d", "--print", help="List top n hashtags", action="store_true") - args = parser.parse_args() - if args.input_file and args.n: - if args.n < 1: - print(f"Please make sure the number of top occurrences is a positive integer.") - sys.exit() - - base = os.path.splitext(args.input_file)[0] - path = f"./{base}_sorted_hashtags.csv" - if args.plot: - occs = get_occurrences(args.input_file, args.n) - plot(args.n, occs, img_folder) - else: - occs = get_occurrences(args.input_file, args.n) - print_occurrences(occs) - else: - print(f'ERROR: either {args.input_file} or {args.n} or both contains error.') diff --git a/analytics/logging_analytics.py b/analytics/logging_analytics.py deleted file mode 100644 index cba8ca5..0000000 --- a/analytics/logging_analytics.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -Yet to be written ... -""" -