mirror of
https://github.com/bellingcat/tiktok-hashtag-analysis.git
synced 2026-06-12 21:38:30 +03:00
incorporate Tristan's suggestions and further fix logging
This commit is contained in:
36
logging.config
Normal file
36
logging.config
Normal file
@@ -0,0 +1,36 @@
|
||||
[loggers]
|
||||
keys=root,Logger
|
||||
|
||||
[handlers]
|
||||
keys=consoleHandler,fileHandler
|
||||
|
||||
[formatters]
|
||||
keys=consoleFormatter,fileFormatter
|
||||
|
||||
[logger_root]
|
||||
level=DEBUG
|
||||
handlers=consoleHandler
|
||||
|
||||
[logger_Logger]
|
||||
level=DEBUG
|
||||
handlers=consoleHandler,fileHandler
|
||||
qualname=Logger
|
||||
propagate=0
|
||||
|
||||
[handler_consoleHandler]
|
||||
class=StreamHandler
|
||||
level=INFO
|
||||
formatter=consoleFormatter
|
||||
args=(sys.stdout,)
|
||||
|
||||
[handler_fileHandler]
|
||||
class=FileHandler
|
||||
level=WARNING
|
||||
formatter=fileFormatter
|
||||
args=("../logfile.log",)
|
||||
|
||||
[formatter_consoleFormatter]
|
||||
format=%(message)s
|
||||
|
||||
[formatter_fileFormatter]
|
||||
format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
|
||||
@@ -1,10 +1,10 @@
|
||||
from collections import namedtuple
|
||||
import warnings
|
||||
import logging
|
||||
import logging, logging.config
|
||||
|
||||
import file_methods
|
||||
|
||||
logger = logging.getLogger()
|
||||
logging.config.fileConfig("../logging.config")
|
||||
logger = logging.getLogger("Logger")
|
||||
|
||||
|
||||
"""
|
||||
@@ -58,7 +58,7 @@ def extract_posts(settings, file_name, tag):
|
||||
ids.append(post["id"])
|
||||
|
||||
if not ids:
|
||||
warnings.warn(f"No posts were found for {tag} in the file - {file_name}")
|
||||
logger.warn(f"No posts were found for the hashtag: {tag}")
|
||||
|
||||
status = file_methods.check_existence(settings["post_ids"], "file")
|
||||
if not status:
|
||||
@@ -67,7 +67,7 @@ def extract_posts(settings, file_name, tag):
|
||||
else:
|
||||
new_ids = get_difference(tag, settings["post_ids"], ids)
|
||||
if not new_ids:
|
||||
warnings.warn(f"No new posts were found in the downloaded file - {file_name}")
|
||||
logger.warn(f"No new posts were found for the hashtag: {tag}")
|
||||
elif new_ids.filter_posts:
|
||||
new_posts = [post for post in posts if post['id'] in new_ids.ids]
|
||||
new_data = (new_ids.ids, new_posts)
|
||||
@@ -88,7 +88,7 @@ def extract_videos(settings, tag, download_list):
|
||||
else:
|
||||
new_videos = get_difference(tag, settings["video_ids"], download_list)
|
||||
if not new_videos:
|
||||
warnings.warn(f"No new videos were found for the {tag} in the downloaded folder.")
|
||||
logger.warn(f"No new videos were found for the {tag} in the downloaded folder.")
|
||||
return None
|
||||
else:
|
||||
return new_videos.ids
|
||||
@@ -140,4 +140,4 @@ def print_total(file_path, tag, data_type):
|
||||
if (total.total == total.unique):
|
||||
logger.info(f"Scraped {total.total} {data_type} containing the hashtag '{tag}'")
|
||||
else:
|
||||
warnings.warn(f"Out of total {data_type} for the hashtag {tag} {total.total}, only {total.unique} are unique. Something is going wrong...")
|
||||
logger.warn(f"Out of total {data_type} for the hashtag {tag} {total.total}, only {total.unique} are unique. Something is going wrong...")
|
||||
|
||||
@@ -3,14 +3,11 @@ import json
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
import shutil
|
||||
import warnings
|
||||
|
||||
import logging
|
||||
import logging, logging.config
|
||||
|
||||
logging.basicConfig(
|
||||
level = logging.INFO,
|
||||
format = '%(message)s')
|
||||
logger = logging.getLogger()
|
||||
logging.config.fileConfig("../logging.config")
|
||||
logger = logging.getLogger("Logger")
|
||||
|
||||
"""
|
||||
The file contains the functions that operate on files, such as writing or reading from files etc.
|
||||
@@ -65,11 +62,10 @@ def download_posts(settings, tag):
|
||||
os.chdir("../../../tiktok_downloader")
|
||||
return new_file
|
||||
else:
|
||||
warnings.warn(f"Something's wrong with what is returned by tiktok-scraper for the hashtag {tag} - *{new_file}* is not a json file.\n\ntiktok-scraper returned {output}")
|
||||
logger.warn(f"Something's wrong with what is returned by tiktok-scraper for the hashtag {tag} - *{new_file}* is not a json file.\n\ntiktok-scraper returned {output}")
|
||||
os.chdir("../../../tiktok_downloader")
|
||||
|
||||
|
||||
|
||||
def download_videos(settings, tag):
|
||||
"""
|
||||
Runs the tiktok-scraper command to download videos for a given hashtag. Note that all the videos are downloaded that are returned by the tiktok api and as a result, its a time and data consuming process.
|
||||
@@ -90,7 +86,7 @@ def download_videos(settings, tag):
|
||||
os.chdir("../../../tiktok_downloader")
|
||||
return downloaded_list
|
||||
else:
|
||||
warnings.warn(f"No video files were downloaded for the hashtag {tag}.")
|
||||
logger.warn(f"No video files were downloaded for the hashtag {tag}.")
|
||||
os.chdir("../../../tiktok_downloader")
|
||||
shutil.rmtree(settings['videos_delete'])
|
||||
|
||||
@@ -134,7 +130,7 @@ def log_writer(log_data):
|
||||
now_str = now.strftime("%d-%m-%Y %H:%M:%S")
|
||||
data = { now_str : scraped_summary_dict }
|
||||
|
||||
logger.debug(f"Logged post data: {data}")
|
||||
logger.warn(f"Logged post data: {data}")
|
||||
logger.info(f"Successfully scraped {total} total entries")
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
import os
|
||||
import time
|
||||
import argparse
|
||||
import logging
|
||||
import logging, logging.config
|
||||
|
||||
import global_data
|
||||
import file_methods
|
||||
import data_methods
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
logging.config.fileConfig("../logging.config")
|
||||
logger = logging.getLogger("Logger")
|
||||
|
||||
|
||||
def get_hashtag_list(file_name):
|
||||
if not file_methods.check_existence(file_name, 'file'):
|
||||
|
||||
Reference in New Issue
Block a user