made downloading more robust against transient and permanent errors, fixed issue where media file URLs weren't being updated after scraping

This commit is contained in:
Tristan Lee
2023-09-09 00:42:56 -05:00
parent 1f4b956ce9
commit 6fa1e5026c
7 changed files with 124 additions and 50 deletions

View File

@@ -3,7 +3,7 @@ from tiktok_hashtag_analysis.base import TikTokDownloader, load_hashtags_from_fi
def test_scrape(tmp_path, hashtags):
downloader = TikTokDownloader(hashtags=hashtags[:1], data_dir=tmp_path)
downloader.run(download=True, plot=True, table=True, number=20)
downloader.run(limit=1000, download=True, plot=True, table=True, number=20)
def test_load_hashtags_from_file(tmp_path, hashtags):

View File

@@ -13,11 +13,14 @@ PARSER_ARGUMENTS = [
("file", "hashtags.txt", "--file"),
("download", True, "--download"),
("download", True, "-d"),
("limit", 1000, "--limit"),
("number", 20, "--number"),
("plot", True, "--plot"),
("plot", True, "-p"),
("table", True, "--table"),
("table", True, "-t"),
("verbose", True, "--verbose"),
("verbose", True, "-v"),
("output_dir", "/tmp/tiktok_download", "--output-dir"),
("config", "~/.tiktok", "--config"),
("log", "../logfile.log", "--log"),