mirror of
https://github.com/bellingcat/tiktok-hashtag-analysis.git
synced 2026-06-08 03:18:31 +03:00
57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import sys
|
|
import json
|
|
import datetime
|
|
import collections
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.dates as mdates
|
|
|
|
|
|
if len(sys.argv) < 3:
|
|
print(f'ERROR: Please make sure the command line has the following format: python3 extract_date.py hashtag_data.json hashtag')
|
|
sys.exit()
|
|
|
|
|
|
def list_to_frequency(li):
|
|
if li and (type(li) == list):
|
|
return collections.Counter(li)
|
|
else:
|
|
print(f"ERROR: either {li} is empty or not a list.")
|
|
|
|
|
|
def eligibility_check(obj):
|
|
if not obj:
|
|
print(f'ERROR: {obj} is empty!')
|
|
return False
|
|
elif type(obj) != int:
|
|
print(f'ERROR: {obj} is not an integer as is expected!')
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
with open(sys.argv[1]) as file:
|
|
object = json.load(file)
|
|
l = len(object)
|
|
date_list = []
|
|
for i in range(0, l):
|
|
obj = object[i]["createTime"]
|
|
if eligibility_check(obj):
|
|
dt_obj = datetime.datetime.fromtimestamp(obj)
|
|
date_list.append(dt_obj.date())
|
|
else:
|
|
print(f'ERROR: Some error occured. Check {obj}.')
|
|
ordered = dict(list_to_frequency(date_list))
|
|
dates = list(ordered.keys())
|
|
total_dates = len(dates)
|
|
frequency = list(ordered.values())
|
|
plt.scatter(dates, frequency)
|
|
plt.gcf().autofmt_xdate()
|
|
date_format = mdates.DateFormatter('%d-%m-%Y')
|
|
plt.gca().xaxis.set_major_formatter(date_format)
|
|
plt.tight_layout()
|
|
plt.title(f'Hashtag Lifecyle - #{sys.argv[2]}')
|
|
plt.xlabel(f'Dates ({total_dates} dates out of {l} posts)')
|
|
plt.ylabel('Posts')
|
|
plt.show()
|