mirror of
https://github.com/bellingcat/tiktok-hashtag-analysis.git
synced 2026-06-12 05:18:30 +03:00
add comments
This commit is contained in:
@@ -4,6 +4,11 @@ import argparse
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
|
||||
"""
|
||||
Plots the frequency of hashtags appearing in the set of given posts.
|
||||
"""
|
||||
|
||||
|
||||
sys.path.insert(0, '../tiktok_downloader')
|
||||
import file_methods, global_data
|
||||
|
||||
@@ -26,6 +31,12 @@ def get_hashtags(obj):
|
||||
|
||||
|
||||
def get_occurrences(filename, n=1 , sort=True):
|
||||
"""
|
||||
Takes the json file containing posts and returns the triplet:
|
||||
l : total posts in the file
|
||||
k : list of top n hashtags
|
||||
v_total : frequency of top n hashtags in l
|
||||
"""
|
||||
with open(filename) as f:
|
||||
obj = json.load(f)
|
||||
l = len(obj)
|
||||
@@ -34,7 +45,7 @@ def get_occurrences(filename, n=1 , sort=True):
|
||||
if not sort:
|
||||
k = list(tags.keys())
|
||||
v = list(tags.values())
|
||||
return obj, k, v
|
||||
return obj, k, v
|
||||
else:
|
||||
sorted_tags = {k: v for k,v in sorted(tags.items(), key=lambda item: item[1], reverse=True)}
|
||||
k = list(sorted_tags.keys())
|
||||
@@ -59,12 +70,15 @@ def plot(n, length, k, v, img_folder):
|
||||
|
||||
|
||||
def print_occurrences(l, k, v):
|
||||
"""
|
||||
Prints the top n hashtags with their frequencies and the ratio of occurrences and total posts, all to the shell.
|
||||
"""
|
||||
row_number = 0
|
||||
total_posts = l
|
||||
print ("{:<8} {:<15} {:<15} {:<15}".format("Rank", 'Hashtag','Occurrences',f'Frequency (Occurrences/Total-Posts({l}))'))
|
||||
#print(f'Hashtag Occurrences Frequency(Occurances/Total-Posts)')
|
||||
for key,value in zip(k, v):
|
||||
ratio = value/total_posts
|
||||
ratio = value/total_posts
|
||||
print ("{:<8} {:<15} {:<15} {:<15}".format(row_number, key, value, ratio))
|
||||
#print(f'{row_number}\t{key}\t\t{value}\t\t{ratio:.3f}')
|
||||
row_number += 1
|
||||
@@ -72,6 +86,9 @@ def print_occurrences(l, k, v):
|
||||
|
||||
|
||||
def save_plot(plt, img_folder):
|
||||
"""
|
||||
Saves the plot to a png file in the folder /data/imgs/
|
||||
"""
|
||||
try:
|
||||
now = datetime.now()
|
||||
current_time = now.strftime("%Y_%m_%d_%H_%M_%S")
|
||||
@@ -83,6 +100,13 @@ def save_plot(plt, img_folder):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Option "n" specifies how many hashtags does the user wants to plot.
|
||||
"-d" option prints the hashtag frequencies on the shell
|
||||
"-p" option plots the hashtag frequencies and saves as a png file in the folder /data/imgs/
|
||||
|
||||
The function get_occurances is triggered to compute and return the top n occurances and the hashtags.
|
||||
"""
|
||||
img_folder = global_data.IMAGES
|
||||
file_methods.check_file(img_folder, "dir")
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
Reference in New Issue
Block a user