add comments

This commit is contained in:
X
2022-02-25 16:55:02 +01:00
parent d3edf604a9
commit b958ee52fe
5 changed files with 180 additions and 27 deletions

View File

@@ -4,6 +4,11 @@ import argparse
import matplotlib.pyplot as plt
from datetime import datetime
"""
Plots the frequency of hashtags appearing in the set of given posts.
"""
sys.path.insert(0, '../tiktok_downloader')
import file_methods, global_data
@@ -26,6 +31,12 @@ def get_hashtags(obj):
def get_occurrences(filename, n=1 , sort=True):
"""
Takes the json file containing posts and returns the triplet:
l : total posts in the file
k : list of top n hashtags
v_total : frequency of top n hashtags in l
"""
with open(filename) as f:
obj = json.load(f)
l = len(obj)
@@ -34,7 +45,7 @@ def get_occurrences(filename, n=1 , sort=True):
if not sort:
k = list(tags.keys())
v = list(tags.values())
return obj, k, v
return obj, k, v
else:
sorted_tags = {k: v for k,v in sorted(tags.items(), key=lambda item: item[1], reverse=True)}
k = list(sorted_tags.keys())
@@ -59,12 +70,15 @@ def plot(n, length, k, v, img_folder):
def print_occurrences(l, k, v):
"""
Prints the top n hashtags with their frequencies and the ratio of occurrences and total posts, all to the shell.
"""
row_number = 0
total_posts = l
print ("{:<8} {:<15} {:<15} {:<15}".format("Rank", 'Hashtag','Occurrences',f'Frequency (Occurrences/Total-Posts({l}))'))
#print(f'Hashtag Occurrences Frequency(Occurances/Total-Posts)')
for key,value in zip(k, v):
ratio = value/total_posts
ratio = value/total_posts
print ("{:<8} {:<15} {:<15} {:<15}".format(row_number, key, value, ratio))
#print(f'{row_number}\t{key}\t\t{value}\t\t{ratio:.3f}')
row_number += 1
@@ -72,6 +86,9 @@ def print_occurrences(l, k, v):
def save_plot(plt, img_folder):
"""
Saves the plot to a png file in the folder /data/imgs/
"""
try:
now = datetime.now()
current_time = now.strftime("%Y_%m_%d_%H_%M_%S")
@@ -83,6 +100,13 @@ def save_plot(plt, img_folder):
if __name__ == "__main__":
"""
Option "n" specifies how many hashtags does the user wants to plot.
"-d" option prints the hashtag frequencies on the shell
"-p" option plots the hashtag frequencies and saves as a png file in the folder /data/imgs/
The function get_occurances is triggered to compute and return the top n occurances and the hashtags.
"""
img_folder = global_data.IMAGES
file_methods.check_file(img_folder, "dir")
parser = argparse.ArgumentParser()