From a7bd023c213e0091055f8537bea9f8f5262c3bd9 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Fri, 1 Sep 2023 17:05:13 -0500
Subject: [PATCH 1/6] simplified downloading logic (methods for keeping track
 of files less necessary since scraping can be done in Python), added
 functionality to use yt-dlp to download videos, added functionality to
 download TikTok image galleries

---
 Pipfile                                       |  13 -
 Pipfile.lock                                  | 416 ------------------
 README.md                                     |  44 +-
 requirements.txt                              |   2 -
 scripts/release.sh                            |   2 +-
 setup.py                                      |  21 +-
 tiktok_hashtag_analysis/__init__.py           |   1 +
 tiktok_hashtag_analysis/__main__.py           | 139 +++---
 tiktok_hashtag_analysis/base.py               | 259 +++++++++++
 tiktok_hashtag_analysis/data_methods.py       | 161 -------
 tiktok_hashtag_analysis/file_methods.py       | 216 ---------
 tiktok_hashtag_analysis/global_data.py        |  32 --
 .../hashtag_frequencies.py                    |  99 -----
 tiktok_hashtag_analysis/hashtag_list.txt      |   5 -
 tiktok_hashtag_analysis/logging.config        |  36 --
 tiktok_hashtag_analysis/run_downloader.py     | 150 -------
 tiktok_hashtag_analysis/version.py            |  12 -
 17 files changed, 364 insertions(+), 1244 deletions(-)
 delete mode 100644 Pipfile
 delete mode 100644 Pipfile.lock
 delete mode 100644 requirements.txt
 create mode 100644 tiktok_hashtag_analysis/base.py
 delete mode 100644 tiktok_hashtag_analysis/data_methods.py
 delete mode 100644 tiktok_hashtag_analysis/file_methods.py
 delete mode 100644 tiktok_hashtag_analysis/global_data.py
 delete mode 100644 tiktok_hashtag_analysis/hashtag_frequencies.py
 delete mode 100644 tiktok_hashtag_analysis/hashtag_list.txt
 delete mode 100644 tiktok_hashtag_analysis/logging.config
 delete mode 100644 tiktok_hashtag_analysis/run_downloader.py
 delete mode 100644 tiktok_hashtag_analysis/version.py

diff --git a/Pipfile b/Pipfile
deleted file mode 100644
index 2fe1f28..0000000
--- a/Pipfile
+++ /dev/null
@@ -1,13 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-matplotlib = "*"
-seaborn = "*"
-
-[dev-packages]
-
-[requires]
-python_version = "3.10"
diff --git a/Pipfile.lock b/Pipfile.lock
deleted file mode 100644
index 1518e4e..0000000
--- a/Pipfile.lock
+++ /dev/null
@@ -1,416 +0,0 @@
-{
-    "_meta": {
-        "hash": {
-            "sha256": "97c5ef0126b17f586b5fa1d518cf359b7e984e48f8fc2310e9aa79bd384c2374"
-        },
-        "pipfile-spec": 6,
-        "requires": {
-            "python_version": "3.10"
-        },
-        "sources": [
-            {
-                "name": "pypi",
-                "url": "https://pypi.org/simple",
-                "verify_ssl": true
-            }
-        ]
-    },
-    "default": {
-        "contourpy": {
-            "hashes": [
-                "sha256:031154ed61f7328ad7f97662e48660a150ef84ee1bc8876b6472af88bf5a9b98",
-                "sha256:0f9d350b639db6c2c233d92c7f213d94d2e444d8e8fc5ca44c9706cf72193772",
-                "sha256:130230b7e49825c98edf0b428b7aa1125503d91732735ef897786fe5452b1ec2",
-                "sha256:152fd8f730c31fd67fe0ffebe1df38ab6a669403da93df218801a893645c6ccc",
-                "sha256:1c71fdd8f1c0f84ffd58fca37d00ca4ebaa9e502fb49825484da075ac0b0b803",
-                "sha256:24847601071f740837aefb730e01bd169fbcaa610209779a78db7ebb6e6a7051",
-                "sha256:2e9ebb4425fc1b658e13bace354c48a933b842d53c458f02c86f371cecbedecc",
-                "sha256:30676ca45084ee61e9c3da589042c24a57592e375d4b138bd84d8709893a1ba4",
-                "sha256:31a55dccc8426e71817e3fe09b37d6d48ae40aae4ecbc8c7ad59d6893569c436",
-                "sha256:366a0cf0fc079af5204801786ad7a1c007714ee3909e364dbac1729f5b0849e5",
-                "sha256:38e2e577f0f092b8e6774459317c05a69935a1755ecfb621c0a98f0e3c09c9a5",
-                "sha256:3c184ad2433635f216645fdf0493011a4667e8d46b34082f5a3de702b6ec42e3",
-                "sha256:3caea6365b13119626ee996711ab63e0c9d7496f65641f4459c60a009a1f3e80",
-                "sha256:3e927b3868bd1e12acee7cc8f3747d815b4ab3e445a28d2e5373a7f4a6e76ba1",
-                "sha256:4ee3ee247f795a69e53cd91d927146fb16c4e803c7ac86c84104940c7d2cabf0",
-                "sha256:54d43960d809c4c12508a60b66cb936e7ed57d51fb5e30b513934a4a23874fae",
-                "sha256:57119b0116e3f408acbdccf9eb6ef19d7fe7baf0d1e9aaa5381489bc1aa56556",
-                "sha256:58569c491e7f7e874f11519ef46737cea1d6eda1b514e4eb5ac7dab6aa864d02",
-                "sha256:5a011cf354107b47c58ea932d13b04d93c6d1d69b8b6dce885e642531f847566",
-                "sha256:5caeacc68642e5f19d707471890f037a13007feba8427eb7f2a60811a1fc1350",
-                "sha256:5dd34c1ae752515318224cba7fc62b53130c45ac6a1040c8b7c1a223c46e8967",
-                "sha256:60835badb5ed5f4e194a6f21c09283dd6e007664a86101431bf870d9e86266c4",
-                "sha256:62398c80ef57589bdbe1eb8537127321c1abcfdf8c5f14f479dbbe27d0322e66",
-                "sha256:6381fa66866b0ea35e15d197fc06ac3840a9b2643a6475c8fff267db8b9f1e69",
-                "sha256:64757f6460fc55d7e16ed4f1de193f362104285c667c112b50a804d482777edd",
-                "sha256:69f8ff4db108815addd900a74df665e135dbbd6547a8a69333a68e1f6e368ac2",
-                "sha256:6c180d89a28787e4b73b07e9b0e2dac7741261dbdca95f2b489c4f8f887dd810",
-                "sha256:71b0bf0c30d432278793d2141362ac853859e87de0a7dee24a1cea35231f0d50",
-                "sha256:769eef00437edf115e24d87f8926955f00f7704bede656ce605097584f9966dc",
-                "sha256:7f6979d20ee5693a1057ab53e043adffa1e7418d734c1532e2d9e915b08d8ec2",
-                "sha256:87f4d8941a9564cda3f7fa6a6cd9b32ec575830780677932abdec7bcb61717b0",
-                "sha256:89ba9bb365446a22411f0673abf6ee1fea3b2cf47b37533b970904880ceb72f3",
-                "sha256:8acf74b5d383414401926c1598ed77825cd530ac7b463ebc2e4f46638f56cce6",
-                "sha256:9056c5310eb1daa33fc234ef39ebfb8c8e2533f088bbf0bc7350f70a29bde1ac",
-                "sha256:95c3acddf921944f241b6773b767f1cbce71d03307270e2d769fd584d5d1092d",
-                "sha256:9e20e5a1908e18aaa60d9077a6d8753090e3f85ca25da6e25d30dc0a9e84c2c6",
-                "sha256:a1e97b86f73715e8670ef45292d7cc033548266f07d54e2183ecb3c87598888f",
-                "sha256:a877ada905f7d69b2a31796c4b66e31a8068b37aa9b78832d41c82fc3e056ddd",
-                "sha256:a9d7587d2fdc820cc9177139b56795c39fb8560f540bba9ceea215f1f66e1566",
-                "sha256:abf298af1e7ad44eeb93501e40eb5a67abbf93b5d90e468d01fc0c4451971afa",
-                "sha256:ae90d5a8590e5310c32a7630b4b8618cef7563cebf649011da80874d0aa8f414",
-                "sha256:b6d0f9e1d39dbfb3977f9dd79f156c86eb03e57a7face96f199e02b18e58d32a",
-                "sha256:b8d587cc39057d0afd4166083d289bdeff221ac6d3ee5046aef2d480dc4b503c",
-                "sha256:c5210e5d5117e9aec8c47d9156d1d3835570dd909a899171b9535cb4a3f32693",
-                "sha256:cc331c13902d0f50845099434cd936d49d7a2ca76cb654b39691974cb1e4812d",
-                "sha256:ce41676b3d0dd16dbcfabcc1dc46090aaf4688fd6e819ef343dbda5a57ef0161",
-                "sha256:d8165a088d31798b59e91117d1f5fc3df8168d8b48c4acc10fc0df0d0bdbcc5e",
-                "sha256:e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2",
-                "sha256:e96a08b62bb8de960d3a6afbc5ed8421bf1a2d9c85cc4ea73f4bc81b4910500f",
-                "sha256:ed33433fc3820263a6368e532f19ddb4c5990855e4886088ad84fd7c4e561c71",
-                "sha256:efb8f6d08ca7998cf59eaf50c9d60717f29a1a0a09caa46460d33b2924839dbd",
-                "sha256:efe99298ba37e37787f6a2ea868265465410822f7bea163edcc1bd3903354ea9",
-                "sha256:f99e9486bf1bb979d95d5cffed40689cb595abb2b841f2991fc894b3452290e8",
-                "sha256:fc1464c97579da9f3ab16763c32e5c5d5bb5fa1ec7ce509a4ca6108b61b84fab",
-                "sha256:fd7dc0e6812b799a34f6d12fcb1000539098c249c8da54f3566c6a6461d0dbad"
-            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==1.0.7"
-        },
-        "cycler": {
-            "hashes": [
-                "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3",
-                "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==0.11.0"
-        },
-        "fonttools": {
-            "hashes": [
-                "sha256:2bb244009f9bf3fa100fc3ead6aeb99febe5985fa20afbfbaa2f8946c2fbdaf1",
-                "sha256:820466f43c8be8c3009aef8b87e785014133508f0de64ec469e4efb643ae54fb"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==4.38.0"
-        },
-        "kiwisolver": {
-            "hashes": [
-                "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b",
-                "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166",
-                "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c",
-                "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c",
-                "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0",
-                "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4",
-                "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9",
-                "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286",
-                "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767",
-                "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c",
-                "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6",
-                "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b",
-                "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004",
-                "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf",
-                "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494",
-                "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac",
-                "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626",
-                "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766",
-                "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514",
-                "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6",
-                "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f",
-                "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d",
-                "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191",
-                "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d",
-                "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51",
-                "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f",
-                "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8",
-                "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454",
-                "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb",
-                "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da",
-                "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8",
-                "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de",
-                "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a",
-                "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9",
-                "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008",
-                "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3",
-                "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32",
-                "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938",
-                "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1",
-                "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9",
-                "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d",
-                "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824",
-                "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b",
-                "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd",
-                "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2",
-                "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5",
-                "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69",
-                "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3",
-                "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae",
-                "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597",
-                "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e",
-                "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955",
-                "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca",
-                "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a",
-                "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea",
-                "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede",
-                "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4",
-                "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6",
-                "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686",
-                "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408",
-                "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871",
-                "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29",
-                "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750",
-                "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897",
-                "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0",
-                "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2",
-                "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09",
-                "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==1.4.4"
-        },
-        "matplotlib": {
-            "hashes": [
-                "sha256:01b7f521a9a73c383825813af255f8c4485d1706e4f3e2ed5ae771e4403a40ab",
-                "sha256:11011c97d62c1db7bc20509572557842dbb8c2a2ddd3dd7f20501aa1cde3e54e",
-                "sha256:1183877d008c752d7d535396096c910f4663e4b74a18313adee1213328388e1e",
-                "sha256:12f999661589981e74d793ee2f41b924b3b87d65fd929f6153bf0f30675c59b1",
-                "sha256:1c235bf9be052347373f589e018988cad177abb3f997ab1a2e2210c41562cc0c",
-                "sha256:1f4d69707b1677560cd952544ee4962f68ff07952fb9069ff8c12b56353cb8c9",
-                "sha256:1fcc4cad498533d3c393a160975acc9b36ffa224d15a6b90ae579eacee5d8579",
-                "sha256:2787a16df07370dcba385fe20cdd0cc3cfaabd3c873ddabca78c10514c799721",
-                "sha256:29f17b7f2e068dc346687cbdf80b430580bab42346625821c2d3abf3a1ec5417",
-                "sha256:38d38cb1ea1d80ee0f6351b65c6f76cad6060bbbead015720ba001348ae90f0c",
-                "sha256:3f56a7252eee8f3438447f75f5e1148a1896a2756a92285fe5d73bed6deebff4",
-                "sha256:5223affa21050fb6118353c1380c15e23aedfb436bf3e162c26dc950617a7519",
-                "sha256:57ad1aee29043163374bfa8990e1a2a10ff72c9a1bfaa92e9c46f6ea59269121",
-                "sha256:59400cc9451094b7f08cc3f321972e6e1db4cd37a978d4e8a12824bf7fd2f03b",
-                "sha256:68d94a436f62b8a861bf3ace82067a71bafb724b4e4f9133521e4d8012420dd7",
-                "sha256:6adc441b5b2098a4b904bbf9d9e92fb816fef50c55aa2ea6a823fc89b94bb838",
-                "sha256:6d81b11ede69e3a751424b98dc869c96c10256b2206bfdf41f9c720eee86844c",
-                "sha256:73b93af33634ed919e72811c9703e1105185cd3fb46d76f30b7f4cfbbd063f89",
-                "sha256:77b384cee7ab8cf75ffccbfea351a09b97564fc62d149827a5e864bec81526e5",
-                "sha256:79e501eb847f4a489eb7065bb8d3187117f65a4c02d12ea3a19d6c5bef173bcc",
-                "sha256:809119d1cba3ece3c9742eb01827fe7a0e781ea3c5d89534655a75e07979344f",
-                "sha256:80c166a0e28512e26755f69040e6bf2f946a02ffdb7c00bf6158cca3d2b146e6",
-                "sha256:81b409b2790cf8d7c1ef35920f01676d2ae7afa8241844e7aa5484fdf493a9a0",
-                "sha256:994637e2995b0342699b396a320698b07cd148bbcf2dd2fa2daba73f34dd19f2",
-                "sha256:9ceebaf73f1a3444fa11014f38b9da37ff7ea328d6efa1652241fe3777bfdab9",
-                "sha256:9fb8fb19d03abf3c5dab89a8677e62c4023632f919a62b6dd1d6d2dbf42cd9f5",
-                "sha256:acc3b1a4bddbf56fe461e36fb9ef94c2cb607fc90d24ccc650040bfcc7610de4",
-                "sha256:bbddfeb1495484351fb5b30cf5bdf06b3de0bc4626a707d29e43dfd61af2a780",
-                "sha256:bbf269e1d24bc25247095d71c7a969813f7080e2a7c6fa28931a603f747ab012",
-                "sha256:bebcff4c3ed02c6399d47329f3554193abd824d3d53b5ca02cf583bcd94470e2",
-                "sha256:c3f08df2ac4636249b8bc7a85b8b82c983bef1441595936f62c2918370ca7e1d",
-                "sha256:ca94f0362f6b6f424b555b956971dcb94b12d0368a6c3e07dc7a40d32d6d873d",
-                "sha256:d00c248ab6b92bea3f8148714837937053a083ff03b4c5e30ed37e28fc0e7e56",
-                "sha256:d2cfaa7fd62294d945b8843ea24228a27c8e7c5b48fa634f3c168153b825a21b",
-                "sha256:d5f18430f5cfa5571ab8f4c72c89af52aa0618e864c60028f11a857d62200cba",
-                "sha256:debeab8e2ab07e5e3dac33e12456da79c7e104270d2b2d1df92b9e40347cca75",
-                "sha256:dfba7057609ca9567b9704626756f0142e97ec8c5ba2c70c6e7bd1c25ef99f06",
-                "sha256:e0a64d7cc336b52e90f59e6d638ae847b966f68582a7af041e063d568e814740",
-                "sha256:eb9421c403ffd387fbe729de6d9a03005bf42faba5e8432f4e51e703215b49fc",
-                "sha256:faff486b36530a836a6b4395850322e74211cd81fc17f28b4904e1bd53668e3e",
-                "sha256:ff2aa84e74f80891e6bcf292ebb1dd57714ffbe13177642d65fee25384a30894"
-            ],
-            "index": "pypi",
-            "version": "==3.6.3"
-        },
-        "numpy": {
-            "hashes": [
-                "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22",
-                "sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f",
-                "sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9",
-                "sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96",
-                "sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0",
-                "sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a",
-                "sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281",
-                "sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04",
-                "sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468",
-                "sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253",
-                "sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756",
-                "sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a",
-                "sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb",
-                "sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d",
-                "sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0",
-                "sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910",
-                "sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978",
-                "sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5",
-                "sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f",
-                "sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a",
-                "sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5",
-                "sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2",
-                "sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d",
-                "sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95",
-                "sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5",
-                "sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d",
-                "sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780",
-                "sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"
-            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==1.24.2"
-        },
-        "packaging": {
-            "hashes": [
-                "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
-                "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==23.0"
-        },
-        "pandas": {
-            "hashes": [
-                "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813",
-                "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792",
-                "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406",
-                "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373",
-                "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328",
-                "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996",
-                "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf",
-                "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6",
-                "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7",
-                "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc",
-                "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1",
-                "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23",
-                "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a",
-                "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51",
-                "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572",
-                "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31",
-                "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5",
-                "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a",
-                "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003",
-                "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d",
-                "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354",
-                "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee",
-                "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa",
-                "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0",
-                "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9",
-                "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae",
-                "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"
-            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==1.5.3"
-        },
-        "pillow": {
-            "hashes": [
-                "sha256:013016af6b3a12a2f40b704677f8b51f72cb007dac785a9933d5c86a72a7fe33",
-                "sha256:0845adc64fe9886db00f5ab68c4a8cd933ab749a87747555cec1c95acea64b0b",
-                "sha256:0884ba7b515163a1a05440a138adeb722b8a6ae2c2b33aea93ea3118dd3a899e",
-                "sha256:09b89ddc95c248ee788328528e6a2996e09eaccddeeb82a5356e92645733be35",
-                "sha256:0dd4c681b82214b36273c18ca7ee87065a50e013112eea7d78c7a1b89a739153",
-                "sha256:0e51f608da093e5d9038c592b5b575cadc12fd748af1479b5e858045fff955a9",
-                "sha256:0f3269304c1a7ce82f1759c12ce731ef9b6e95b6df829dccd9fe42912cc48569",
-                "sha256:16a8df99701f9095bea8a6c4b3197da105df6f74e6176c5b410bc2df2fd29a57",
-                "sha256:19005a8e58b7c1796bc0167862b1f54a64d3b44ee5d48152b06bb861458bc0f8",
-                "sha256:1b4b4e9dda4f4e4c4e6896f93e84a8f0bcca3b059de9ddf67dac3c334b1195e1",
-                "sha256:28676836c7796805914b76b1837a40f76827ee0d5398f72f7dcc634bae7c6264",
-                "sha256:2968c58feca624bb6c8502f9564dd187d0e1389964898f5e9e1fbc8533169157",
-                "sha256:3f4cc516e0b264c8d4ccd6b6cbc69a07c6d582d8337df79be1e15a5056b258c9",
-                "sha256:3fa1284762aacca6dc97474ee9c16f83990b8eeb6697f2ba17140d54b453e133",
-                "sha256:43521ce2c4b865d385e78579a082b6ad1166ebed2b1a2293c3be1d68dd7ca3b9",
-                "sha256:451f10ef963918e65b8869e17d67db5e2f4ab40e716ee6ce7129b0cde2876eab",
-                "sha256:46c259e87199041583658457372a183636ae8cd56dbf3f0755e0f376a7f9d0e6",
-                "sha256:46f39cab8bbf4a384ba7cb0bc8bae7b7062b6a11cfac1ca4bc144dea90d4a9f5",
-                "sha256:519e14e2c49fcf7616d6d2cfc5c70adae95682ae20f0395e9280db85e8d6c4df",
-                "sha256:53dcb50fbdc3fb2c55431a9b30caeb2f7027fcd2aeb501459464f0214200a503",
-                "sha256:54614444887e0d3043557d9dbc697dbb16cfb5a35d672b7a0fcc1ed0cf1c600b",
-                "sha256:575d8912dca808edd9acd6f7795199332696d3469665ef26163cd090fa1f8bfa",
-                "sha256:5dd5a9c3091a0f414a963d427f920368e2b6a4c2f7527fdd82cde8ef0bc7a327",
-                "sha256:5f532a2ad4d174eb73494e7397988e22bf427f91acc8e6ebf5bb10597b49c493",
-                "sha256:60e7da3a3ad1812c128750fc1bc14a7ceeb8d29f77e0a2356a8fb2aa8925287d",
-                "sha256:653d7fb2df65efefbcbf81ef5fe5e5be931f1ee4332c2893ca638c9b11a409c4",
-                "sha256:6663977496d616b618b6cfa43ec86e479ee62b942e1da76a2c3daa1c75933ef4",
-                "sha256:6abfb51a82e919e3933eb137e17c4ae9c0475a25508ea88993bb59faf82f3b35",
-                "sha256:6c6b1389ed66cdd174d040105123a5a1bc91d0aa7059c7261d20e583b6d8cbd2",
-                "sha256:6d9dfb9959a3b0039ee06c1a1a90dc23bac3b430842dcb97908ddde05870601c",
-                "sha256:765cb54c0b8724a7c12c55146ae4647e0274a839fb6de7bcba841e04298e1011",
-                "sha256:7a21222644ab69ddd9967cfe6f2bb420b460dae4289c9d40ff9a4896e7c35c9a",
-                "sha256:7ac7594397698f77bce84382929747130765f66406dc2cd8b4ab4da68ade4c6e",
-                "sha256:7cfc287da09f9d2a7ec146ee4d72d6ea1342e770d975e49a8621bf54eaa8f30f",
-                "sha256:83125753a60cfc8c412de5896d10a0a405e0bd88d0470ad82e0869ddf0cb3848",
-                "sha256:847b114580c5cc9ebaf216dd8c8dbc6b00a3b7ab0131e173d7120e6deade1f57",
-                "sha256:87708d78a14d56a990fbf4f9cb350b7d89ee8988705e58e39bdf4d82c149210f",
-                "sha256:8a2b5874d17e72dfb80d917213abd55d7e1ed2479f38f001f264f7ce7bae757c",
-                "sha256:8f127e7b028900421cad64f51f75c051b628db17fb00e099eb148761eed598c9",
-                "sha256:94cdff45173b1919350601f82d61365e792895e3c3a3443cf99819e6fbf717a5",
-                "sha256:99d92d148dd03fd19d16175b6d355cc1b01faf80dae93c6c3eb4163709edc0a9",
-                "sha256:9a3049a10261d7f2b6514d35bbb7a4dfc3ece4c4de14ef5876c4b7a23a0e566d",
-                "sha256:9d9a62576b68cd90f7075876f4e8444487db5eeea0e4df3ba298ee38a8d067b0",
-                "sha256:9e5f94742033898bfe84c93c831a6f552bb629448d4072dd312306bab3bd96f1",
-                "sha256:a1c2d7780448eb93fbcc3789bf3916aa5720d942e37945f4056680317f1cd23e",
-                "sha256:a2e0f87144fcbbe54297cae708c5e7f9da21a4646523456b00cc956bd4c65815",
-                "sha256:a4dfdae195335abb4e89cc9762b2edc524f3c6e80d647a9a81bf81e17e3fb6f0",
-                "sha256:a96e6e23f2b79433390273eaf8cc94fec9c6370842e577ab10dabdcc7ea0a66b",
-                "sha256:aabdab8ec1e7ca7f1434d042bf8b1e92056245fb179790dc97ed040361f16bfd",
-                "sha256:b222090c455d6d1a64e6b7bb5f4035c4dff479e22455c9eaa1bdd4c75b52c80c",
-                "sha256:b52ff4f4e002f828ea6483faf4c4e8deea8d743cf801b74910243c58acc6eda3",
-                "sha256:b70756ec9417c34e097f987b4d8c510975216ad26ba6e57ccb53bc758f490dab",
-                "sha256:b8c2f6eb0df979ee99433d8b3f6d193d9590f735cf12274c108bd954e30ca858",
-                "sha256:b9b752ab91e78234941e44abdecc07f1f0d8f51fb62941d32995b8161f68cfe5",
-                "sha256:ba6612b6548220ff5e9df85261bddc811a057b0b465a1226b39bfb8550616aee",
-                "sha256:bd752c5ff1b4a870b7661234694f24b1d2b9076b8bf337321a814c612665f343",
-                "sha256:c3c4ed2ff6760e98d262e0cc9c9a7f7b8a9f61aa4d47c58835cdaf7b0b8811bb",
-                "sha256:c5c1362c14aee73f50143d74389b2c158707b4abce2cb055b7ad37ce60738d47",
-                "sha256:cb362e3b0976dc994857391b776ddaa8c13c28a16f80ac6522c23d5257156bed",
-                "sha256:d197df5489004db87d90b918033edbeee0bd6df3848a204bca3ff0a903bef837",
-                "sha256:d3b56206244dc8711f7e8b7d6cad4663917cd5b2d950799425076681e8766286",
-                "sha256:d5b2f8a31bd43e0f18172d8ac82347c8f37ef3e0b414431157718aa234991b28",
-                "sha256:d7081c084ceb58278dd3cf81f836bc818978c0ccc770cbbb202125ddabec6628",
-                "sha256:db74f5562c09953b2c5f8ec4b7dfd3f5421f31811e97d1dbc0a7c93d6e3a24df",
-                "sha256:df41112ccce5d47770a0c13651479fbcd8793f34232a2dd9faeccb75eb5d0d0d",
-                "sha256:e1339790c083c5a4de48f688b4841f18df839eb3c9584a770cbd818b33e26d5d",
-                "sha256:e621b0246192d3b9cb1dc62c78cfa4c6f6d2ddc0ec207d43c0dedecb914f152a",
-                "sha256:e8c5cf126889a4de385c02a2c3d3aba4b00f70234bfddae82a5eaa3ee6d5e3e6",
-                "sha256:e9d7747847c53a16a729b6ee5e737cf170f7a16611c143d95aa60a109a59c336",
-                "sha256:eaef5d2de3c7e9b21f1e762f289d17b726c2239a42b11e25446abf82b26ac132",
-                "sha256:ed3e4b4e1e6de75fdc16d3259098de7c6571b1a6cc863b1a49e7d3d53e036070",
-                "sha256:ef21af928e807f10bf4141cad4746eee692a0dd3ff56cfb25fce076ec3cc8abe",
-                "sha256:f09598b416ba39a8f489c124447b007fe865f786a89dbfa48bb5cf395693132a",
-                "sha256:f0caf4a5dcf610d96c3bd32932bfac8aee61c96e60481c2a0ea58da435e25acd",
-                "sha256:f6e78171be3fb7941f9910ea15b4b14ec27725865a73c15277bc39f5ca4f8391",
-                "sha256:f715c32e774a60a337b2bb8ad9839b4abf75b267a0f18806f6f4f5f1688c4b5a",
-                "sha256:fb5c1ad6bad98c57482236a21bf985ab0ef42bd51f7ad4e4538e89a997624e12"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==9.4.0"
-        },
-        "pyparsing": {
-            "hashes": [
-                "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb",
-                "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"
-            ],
-            "markers": "python_full_version >= '3.6.8'",
-            "version": "==3.0.9"
-        },
-        "python-dateutil": {
-            "hashes": [
-                "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
-                "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
-            "version": "==2.8.2"
-        },
-        "pytz": {
-            "hashes": [
-                "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0",
-                "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"
-            ],
-            "version": "==2022.7.1"
-        },
-        "seaborn": {
-            "hashes": [
-                "sha256:374645f36509d0dcab895cba5b47daf0586f77bfe3b36c97c607db7da5be0139",
-                "sha256:ebf15355a4dba46037dfd65b7350f014ceb1f13c05e814eda2c9f5fd731afc08"
-            ],
-            "index": "pypi",
-            "version": "==0.12.2"
-        },
-        "six": {
-            "hashes": [
-                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
-                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
-            "version": "==1.16.0"
-        }
-    },
-    "develop": {}
-}
diff --git a/README.md b/README.md
index 2cbd3ad..b0e3f25 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,12 @@
 # TikTok hashtag analysis toolset 
 
-> IMPORTANT NOTE: this tool relies on [drawrowfly/tiktok-scraper](https://github.com/drawrowfly/tiktok-scraper) which seems to be broken at time of writing and without updates for some time with several open issues ([796](https://github.com/drawrowfly/tiktok-scraper/issues/796) [#799](https://github.com/drawrowfly/tiktok-scraper/issues/799)) that need to be fixed before this library can work smoothly :/
-
-The tool helps to download posts and videos from TikTok for a given set of hashtags over a period of time. Users can create a growing database of posts for specific hashtags which can then be used for further hashtag analysis. It uses the [tiktok-scraper](https://github.com/drawrowfly/tiktok-scraper) Node package  to download the posts and videos.
+The tool helps to download posts and videos from TikTok for a given set of hashtags over a period of time. Users can create a growing database of posts for specific hashtags which can then be used for further hashtag analysis. It uses the [TikTokApi](https://github.com/davidteather/TikTok-Api) Python package  to download the posts and uses [yt-dlp](https://github.com/yt-dlp/yt-dlp) to download the videos.
 
 [![PyPI version](https://badge.fury.io/py/tiktok-hashtag-analysis.svg)](https://badge.fury.io/py/tiktok-hashtag-analysis)
 
 ## Pre-requisites
-1. Make sure you have Python 3.6 or a later version installed
-2. And, you need to have node version 16. On Mac, do `brew install node` followed by `npm install -g n` and then `n 16`
-4. Download and install TikTok scraper: https://github.com/drawrowfly/tiktok-scraper 
-5. Install the tool with pip: `pip install tiktok-hashtag-analysis`
+1. Make sure you have Python 3.9 or a later version installed
+2. Install the tool with pip: `pip install tiktok-hashtag-analysis`
    1. or directly from the repo version: `pip install git+https://github.com/bellingcat/tiktok-hashtag-analysis`
 
 You should now be ready to start using it.
@@ -19,27 +15,23 @@ You should now be ready to start using it.
 ## About the tool
 ### Command-line arguments
 ```
-tiktok-hashtag-analysis --help
-usage: tiktok-hashtag-analysis [-h] [-t [T ...]] [-f F] [-p] [-v] [-ht HASHTAG] [-n NUMBER] [-plt] [-d] {download,frequencies}
+usage: tiktok-hashtag-analysis [-h] [--file FILE] [-d] [--number NUMBER] [-p] [-t] [--output-dir OUTPUT_DIR] [--log LOG] [hashtags ...]
 
 Analyze hashtags within posts scraped from TikTok.
 
 positional arguments:
-  {download,frequencies}
-                        command to initialize
+  hashtags              List of hashtags to scrape
 
-options:
+optional arguments:
   -h, --help            show this help message and exit
-  -t [T ...]            List of hashtags to scrape (module: run_downloader)
-  -f F                  File name containing list of hashtags to scrape (module: run_downloader)
-  -p                    Download post data (module: run_downloader)
-  -v                    Download video files (module: run_downloader)
-  -ht HASHTAG, --hashtag HASHTAG
-                        The hashtag of scraped posts to analyze (module: hashtag_frequencies)
-  -n NUMBER, --number NUMBER
-                        The number of top n occurrences (module: hashtag_frequencies)
-  -plt, --plot          Plot the occurrences (module: hashtag_frequencies)
-  -d, --print           List top n hashtags (module: hashtag_frequencies)
+  --file FILE           File name containing list of hashtags to scrape
+  -d, --download        Download video files corresponding to scraped posts
+  --number NUMBER       The number of co-occurring hashtags to analyze
+  -p, --plot            Plot the most common co-occurring hashtags
+  -t, --table           Print a table of the most common co-occurring hashtags
+  --output-dir OUTPUT_DIR
+                        Directory to save scraped data and visualizations to
+  --log LOG             File to write logs to
 ```
 
 ### Structure of output data
@@ -67,9 +59,9 @@ The `data` folder contains all the downloaded data as shown in the tree diagram
 
 ## How to use
 ### Post downloading
-Running the `tiktok-hashtag-analysis download` command with the following options will scrape posts containing the hashtags `#london`, `#paris`, or `#newyork`:
+Running the `tiktok-hashtag-analysis` command with the following options will scrape posts containing the hashtags `#london`, `#paris`, or `#newyork`:
 
-    tiktok-hashtag-analysis download -t london paris newyork -p
+    tiktok-hashtag-analysis london paris newyork
 
 and will produce an output similar to the following log:
 
@@ -100,7 +92,7 @@ Assume we want to analyze the 20 most frequently occurring hashtags in the downl
 
 - The results can be plotted and saved as a PNG file by executing the following command: 
 
-    `tiktok-hashtag-analysis frequencies london 20 -p`
+    `tiktok-hashtag-analysis frequencies --hashtag london --number 20 --plot`
     
     which will produce a figure similar to that shown below:
     <p align="center">
@@ -111,7 +103,7 @@ Assume we want to analyze the 20 most frequently occurring hashtags in the downl
 
 - The results can be displayed in tabular form by executing the following command:
 
-    `tiktok-hashtag-analysis frequencies london 20 -d`
+    `tiktok-hashtag-analysis frequencies --hashtag london --number 20 --print`
 
     which will produce a terminal output similar to the following:
     ```
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 9a8d369..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-matplotlib
-seaborn
\ No newline at end of file
diff --git a/scripts/release.sh b/scripts/release.sh
index 6789652..c96718c 100644
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -3,7 +3,7 @@
 
 set -e
 
-TAG=$(python -c 'from tiktok_hashtag_analysis.version import __version__; print("v" + __version__)')
+TAG=$(python -c 'from tiktok_hashtag_analysis import __version__; print("v" + __version__)')
 
 read -p "Creating new release for $TAG. Do you want to continue? [Y/n] " prompt
 
diff --git a/setup.py b/setup.py
index 8a347af..bd6119e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
-from setuptools import setup, find_packages
-from tiktok_hashtag_analysis.version import __version__
+from setuptools import setup
+from tiktok_hashtag_analysis import __version__
 
 with open("README.md", "r", encoding="utf-8") as file:
     long_description = file.read()
@@ -10,23 +10,18 @@ setup(
     author="Bellingcat",
     author_email="tech@bellingcat.com",
     packages=["tiktok_hashtag_analysis"],
-    package_data={
-        "tiktok_hashtag_analysis": [
-            "logging.config",
-        ]
-    },
     description="Analyze hashtags within posts scraped from TikTok",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/bellingcat/tiktok-hashtag-analysis",
     license="MIT License",
-    install_requires=["seaborn", "matplotlib"],
+    install_requires=["seaborn", "matplotlib", "TikTokApi", "requests", "yt-dlp"],
     classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Information Technology',
-        'License :: OSI Approved :: MIT License',
-        'Natural Language :: English',
-        'Programming Language :: Python :: 3'
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Information Technology",
+        "License :: OSI Approved :: MIT License",
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3",
     ],
     entry_points={
         "console_scripts": [
diff --git a/tiktok_hashtag_analysis/__init__.py b/tiktok_hashtag_analysis/__init__.py
index e69de29..8c0d5d5 100644
--- a/tiktok_hashtag_analysis/__init__.py
+++ b/tiktok_hashtag_analysis/__init__.py
@@ -0,0 +1 @@
+__version__ = "2.0.0"
diff --git a/tiktok_hashtag_analysis/__main__.py b/tiktok_hashtag_analysis/__main__.py
index c5b1525..8e7dce2 100644
--- a/tiktok_hashtag_analysis/__main__.py
+++ b/tiktok_hashtag_analysis/__main__.py
@@ -1,76 +1,91 @@
-import logging, argparse
-from .file_methods import log_writer
-from .run_downloader import * # Import everything from run_downloader.py
-from .hashtag_frequencies import * # Import everything from hashtag_frequencies.py
+import logging
+import argparse
+from pathlib import Path
+import sys
 
-logger = logging.getLogger()
+from .base import TikTokDownloader, load_hashtags_from_file
 
 
-def create_parser() -> argparse.ArgumentParser:
-    """Create the parser and the arguments for the user input."""
-    parser = argparse.ArgumentParser(description="Analyze hashtags within posts scraped from TikTok.")
-    parser.add_argument("command", help="command to initialize", choices=['download', 'frequencies'])
-    parser.add_argument("-t", type=str, nargs="*", help="List of hashtags to scrape (module: run_downloader)")
-    parser.add_argument("-f", type=str, help="File name containing list of hashtags to scrape (module: run_downloader)")
-    parser.add_argument("-p", action="store_true", help="Download post data (module: run_downloader)")
-    parser.add_argument("-v", action="store_true", help="Download video files (module: run_downloader)")
-    parser.add_argument("-ht", "--hashtag", type=str,
-                        help="The hashtag of scraped posts to analyze (module: hashtag_frequencies)", )
-    parser.add_argument("-n", "--number", type=int, help="The number of top n occurrences (module: hashtag_frequencies)")
-    parser.add_argument("-plt", "--plot", help="Plot the occurrences (module: hashtag_frequencies)", action="store_true")
-    parser.add_argument("-d", "--print", help="List top n hashtags (module: hashtag_frequencies)", action="store_true")
+def create_parser():
+    parser = argparse.ArgumentParser(
+        description="Analyze hashtags within posts scraped from TikTok."
+    )
+
+    parser.add_argument(
+        "hashtags",
+        type=str,
+        nargs="*",
+        help="List of hashtags to scrape",
+    )
+    parser.add_argument(
+        "--file",
+        type=str,
+        help="File name containing list of hashtags to scrape",
+    )
+    parser.add_argument(
+        "-d",
+        "--download",
+        action="store_true",
+        help="Download video files corresponding to scraped posts",
+    )
+    parser.add_argument(
+        "--number",
+        type=int,
+        help="The number of co-occurring hashtags to analyze",
+        default=20,
+    )
+    parser.add_argument(
+        "-p",
+        "--plot",
+        help="Plot the most common co-occurring hashtags",
+        action="store_true",
+    )
+    parser.add_argument(
+        "-t",
+        "--table",
+        help="Print a table of the most common co-occurring hashtags",
+        action="store_true",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        help="Directory to save scraped data and visualizations to",
+        default=Path(".").resolve().parent / "data",
+    )
+    parser.add_argument("--log", type=str, help="File to write logs to", default=None)
+
     return parser
 
 
 def main():
     parser = create_parser()
     args = parser.parse_args()
-    if args.command == "download":
-        if not (args.t or args.f):
-            parser.error(
-                "No hashtags were given, please use either the `-t` flag or the `-f` flag to specify one or more hashtags.")
 
-        if not (args.p or args.v):
+    logging.basicConfig(
+        level=logging.INFO,
+        filename=args.log,
+        format="%(asctime)s %(levelname)s | %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+    if len(args.hashtags) == 0:
+        if not args.file:
             parser.error(
-                "No argument given, please specify either the `-p` flag to download post data or the `-v` flag to download video files, or both."
+                "No hashtags were specified, please specify one or more hashtags "
+                "to scrape or use the `--file` flag to specify a text file containing "
+                "hashtags."
             )
-
-        if args.t:
-            hashtags = args.t
-        elif args.f:
-            file_name = args.f
-            hashtags = get_hashtag_list(file_name)
-
-        logger.info(f"Hashtags to scrape: {hashtags}")
-        if not hashtags:
-            raise ValueError(
-                "No hashtags were specified: please use either the `-t` flag to specify a sspace-separated list of one or more hashtags as a command-line argument, or use the `-f` flag to specify a text file of newline-separated hashtags.")
-
-        download_data_type = {"posts": args.p, "videos": args.v}
-
-        scraped_summary_list = get_data(hashtags, download_data_type)
-        if scraped_summary_list:
-            log_writer(scraped_summary_list)
-    elif args.command == "frequencies":
-        img_folder = IMAGES
-        check_file(img_folder, "dir")
-        if args.n < 1:
-            raise ValueError(
-                f"Specified argument `n` (the number of hashtags to analyze) must be greater than zero, not: {args.n}.")
-        input_file = data_file = os.path.join(
-            FILES["data"], args.hashtag, FILES["posts"], FILES["data_file"]
-        )
-        if not check_existence(input_file, "file"):
-            raise FileNotFoundError(
-                f"File ({input_file}) for specified argument `hashtag` ({args.hashtag}) does not exist.")
-
-        # base = os.path.splitext(input_file)[0]
-        # path = f"./{base}_sorted_hashtags.csv"
-        occs = get_occurrences(input_file, args.n)
-        if args.plot:
-            plot(occs, img_folder)
         else:
-            print_occurrences(occs)
+            hashtags = load_hashtags_from_file(file=args.file)
+    else:
+        hashtags = args.hashtags
 
-if __name__=="__main__":
-    main()
\ No newline at end of file
+    downloader = TikTokDownloader(hashtags=hashtags, data_dir=args.output_dir)
+
+    downloader.run(
+        download=args.download, plot=args.plot, table=args.table, number=args.number
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py
new file mode 100644
index 0000000..63224ef
--- /dev/null
+++ b/tiktok_hashtag_analysis/base.py
@@ -0,0 +1,259 @@
+import os
+import json
+from pathlib import Path
+from collections import Counter
+from datetime import datetime
+import warnings
+import asyncio
+import logging
+import re
+from typing import List, Dict
+
+import yt_dlp
+import requests
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mtick
+import seaborn as sns
+
+from TikTokApi import TikTokApi
+
+warnings.filterwarnings("ignore", message="Glyph (.*) missing from current font")
+sns.set_theme(style="darkgrid")
+
+
+def process_hashtag_list(hashtags: List[str]) -> List[str]:
+    """Convert a list of hashtags to a standard form (remove whitespace, make
+    lowercase, etc.)."""
+    return list(
+        filter(None, (hashtag.strip().strip("#").lower() for hashtag in hashtags))
+    )
+
+
+def load_hashtags_from_file(file: str) -> List[str]:
+    """Read and process hashtags specified in a text file."""
+    if not os.path.isfile(file):
+        raise OSError(f"{file} does not exist")
+    with open(file, "r", encoding="utf-8") as f:
+        hashtags = re.split(r"\n|,", f.read())
+    return process_hashtag_list(hashtags=hashtags)
+
+
+async def _fetch_hashtag_data(hashtag: str) -> List[Dict]:
+    """Fetch data for videos containing a specified hashtag, asynchronously."""
+    data = []
+    async with TikTokApi() as api:
+        await api.create_sessions(
+            ms_tokens=[os.environ["MS_TOKEN"]], num_sessions=1, sleep_after=3
+        )
+        async for video in api.hashtag(name=hashtag).videos(count=1000):
+            data.append(video.as_dict)
+    return data
+
+
+def json_load(file_path: Path) -> List:
+    """Read a JSON file and return the read data."""
+    with open(file_path, "r", encoding="utf-8") as f:
+        data = json.load(fp=f)
+    return data
+
+
+def json_dump(file_path: Path, data: List):
+    """Write data to a JSON file."""
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(obj=data, fp=f)
+
+
+def download_gallery(video_data: Dict, video_dir: Path):
+    """yt-dlp doesn't seem to support downloading images from an image gallery,
+    so this is a quick fix that likely will fail on edge cases."""
+
+    video_id = video_data["id"]
+    if play_url := video_data["music"]["playUrl"]:
+        r = requests.get(play_url)
+        with open(video_dir / f"{video_id}.mp3", "wb") as f:
+            f.write(r.content)
+
+    for i, image in enumerate(video_data["imagePost"]["images"]):
+        image_url = image["imageURL"]["urlList"][0]
+        r = requests.get(image_url)
+        ext = r.headers["Content-Type"].split("/")[-1]
+        with open(video_dir / f"{video_id}_{i:02d}.{ext}", "wb") as f:
+            f.write(r.content)
+
+
+def aggregate_cooccurring_hashtags(hashtag_file: Path) -> Counter:
+    """Aggregate how frequently hashtags are used, from a file containing a
+    list of raw TikTok post API responses."""
+    videos = json_load(file_path=hashtag_file)
+
+    all_hashtags: List[set] = []
+    for video in videos:
+        video_hashtags = set(
+            hashtag["hashtagName"]
+            for hashtag in video.get("textExtra", [])
+            if hashtag.get("hashtagName")
+        )
+        all_hashtags.extend(video_hashtags)
+
+    return Counter(all_hashtags)
+
+
+class TikTokDownloader:
+    """Main class for scraping data from TikTok."""
+
+    def __init__(self, hashtags: List[str], data_dir: str):
+        self.hashtags = process_hashtag_list(hashtags)
+        logging.info(f"Hashtags to scrape: {hashtags}")
+
+        self.data_dir = Path(data_dir)
+        os.makedirs(self.data_dir, exist_ok=True)
+
+    def get_hashtag_posts(self, hashtag: str):
+        """Fetch data about posts that used a specified hashtag and merge with
+        existing data, if it exists."""
+
+        # Define file to store hashtags in and create parent directory
+        hashtag_file = self.data_dir / hashtag / "posts.json"
+        hashtag_file.parent.mkdir(exist_ok=True, parents=True)
+
+        # If there are previously scraped posts, load them
+        if hashtag_file.is_file():
+            already_fetched_data = json_load(file_path=hashtag_file)
+            already_fetched_ids = set(video["id"] for video in already_fetched_data)
+        else:
+            already_fetched_ids = set()
+            already_fetched_data = []
+
+        # Scrape posts that use the specified hashtag
+        fetched_data = asyncio.run(_fetch_hashtag_data(hashtag=hashtag))
+        if len(fetched_data) == 0:
+            logging.warning(f"No posts were found for the hashtag: {hashtag}")
+
+        # Determine which newly scraped posts haven't been scraped before
+        new_fetched_data = [
+            video for video in fetched_data if video["id"] not in already_fetched_ids
+        ]
+        if len(new_fetched_data) == 0:
+            logging.warning(f"No new posts were found for the hashtag: {hashtag}")
+
+        # Merge new and old data and write to file
+        all_fetched_data = already_fetched_data + new_fetched_data
+        json_dump(file_path=hashtag_file, data=all_fetched_data)
+        logging.info(
+            f"Scraped {len(new_fetched_data)} new posts containing the hashtag "
+            f"'{hashtag}', with {len(already_fetched_data)} posts previously scraped"
+        )
+
+    def get_hashtag_videos(self, hashtag: str):
+        """Download videos and other media corresponding to posts that used a
+        specified hashtag,"""
+
+        # Define file containing post data and directory to save videos to
+        hashtag_file = self.data_dir / hashtag / "posts.json"
+        video_dir = self.data_dir / hashtag / "videos"
+        video_dir.mkdir(exist_ok=True)
+
+        # Get list of post IDs that have previously had their media downloaded
+        already_downloaded_ids = set(
+            file.split(".")[0].split("_")[0] for file in os.listdir(video_dir)
+        )
+        # Get list of posts that have been scraped but not had their media downloaded
+        video_list = json_load(file_path=hashtag_file)
+        new_video_list = [
+            video for video in video_list if video["id"] not in already_downloaded_ids
+        ]
+        if len(new_video_list) == 0:
+            logging.warning(
+                f"No new videos to be downloaded for the hashtag: {hashtag}"
+            )
+
+        # Populate list of URLs to download using yt-dlp, and list of image 
+        # galleries to download using the `download_gallery` function
+        urls_to_download = []
+        galleries_to_download = []
+        for video in new_video_list:
+            if video.get("imagePost") is None:
+                url = f"https://www.tiktok.com/@{video['author']['uniqueId']}/video/{video['id']}"
+                urls_to_download.append(url)
+            else:
+                galleries_to_download.append(video)
+
+        # Download audio and image files for all image gallery posts
+        if len(galleries_to_download) > 0:
+            logging.info(f"Downloading image galleries for hashtag {hashtag}")
+        for video in galleries_to_download:
+            download_gallery(video_data=video, video_dir=video_dir)
+
+        # Download video files for all video posts
+        if len(urls_to_download) > 0:
+            logging.info(f"Downloading videos for hashtag {hashtag}")
+        ydl_opts = {"outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s")}
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download(urls_to_download)
+
+    def frequency_table(self, hashtag: str, number: int):
+        """Print `number`-most commonly co-occurring hashtags for a specified
+        source hashtag, in tabular form."""
+
+        # Load video data file and extract co-occurring hashtag frequency information
+        hashtag_file = self.data_dir / hashtag / "posts.json"
+        frequencies = aggregate_cooccurring_hashtags(hashtag_file=hashtag_file)
+
+        # Print table that displays most commonly co-occurring hashtags
+        total_posts = max(frequencies.values())
+        print(f"\nCo-occurring hashtags for #{hashtag} posts")
+        print(f"{'Rank':<8} {'Hashtag':<30} {'Occurrences':<15} {'Frequency':<15}")
+        for row, (hashtag, frequency) in enumerate(frequencies.most_common(number)):
+            ratio = frequency / total_posts
+            print(f"{row:<8} {hashtag:<30} {frequency:<15} {ratio:.4f}")
+        print(f"Total posts: {total_posts}\n\n")
+
+    def plot(self, hashtag: str, number: int):
+        """Create plot of `number`-most commonly co-occurring hashtags for a
+        specified source hashtag."""
+
+        # Load video data file and extract co-occurring hashtag frequency information
+        hashtag_file = self.data_dir / hashtag / "posts.json"
+        frequencies = aggregate_cooccurring_hashtags(hashtag_file=hashtag_file)
+
+        # Define labels and other fields used in plot
+        total_posts = max(frequencies.values())
+        sorted_frequencices = frequencies.most_common(number)
+        labels = [label for label, _ in sorted_frequencices[1:]]
+        ratios = [freq / total_posts * 100 for _, freq in sorted_frequencices[1:]]
+        y_pos = list(reversed(range(len(sorted_frequencices) - 1)))
+
+        # Visualize data in bar chart
+        fig, ax = plt.subplots(figsize=(5, 6.66))
+        ax.barh(y_pos, ratios)
+        ax.set_yticks(y_pos)
+        ax.set_yticklabels(labels)
+        ax.grid(axis="y")
+        ax.set_xlabel("Percent of posts with co-occurring hashtag")
+        ax.set_ylim(min(y_pos) - 1, max(y_pos) + 1)
+        ax.set_title(f"Co-occurring hashtags for #{hashtag} posts")
+        ax.xaxis.set_major_formatter(mtick.PercentFormatter(decimals=0))
+        
+        # Write image of plot to file
+        current_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
+        plot_file = self.data_dir / hashtag / "plots" / f"{hashtag}__{current_time}.png"
+        plot_file.parent.mkdir(exist_ok=True, parents=True)
+        plt.savefig(plot_file, bbox_inches="tight", facecolor="white", dpi=300)
+        logging.info(f"Plot saved to file: {plot_file}")
+
+    def run(self, download: bool, plot: bool, table: bool, number: int):
+        """Execute the specified operations on all specified hashtags."""
+
+        # Scrape all specified hashtags and perform analyses, depending on if 
+        # `--table` and `--plot` flags are used in the command
+        for hashtag in self.hashtags:
+            self.get_hashtag_posts(hashtag=hashtag)
+            if plot:
+                self.plot(hashtag=hashtag, number=number)
+            if table:
+                self.frequency_table(hashtag=hashtag, number=number)
+
+        # Download media for all hashtags if `--download` flag is used in the command
+        for hashtag in self.hashtags:
+            if download:
+                self.get_hashtag_videos(hashtag=hashtag)
diff --git a/tiktok_hashtag_analysis/data_methods.py b/tiktok_hashtag_analysis/data_methods.py
deleted file mode 100644
index 24078af..0000000
--- a/tiktok_hashtag_analysis/data_methods.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""Utility functions that perform data processing related tasks.
-"""
-
-from typing import NamedTuple, List, Tuple, Set, Optional, Dict, Any
-import logging
-
-from . import file_methods
-
-logger = logging.getLogger()
-
-
-class Diff(NamedTuple):
-    """Keep track of scraped post IDs and whether previously-scraped posts have been filtered."""
-
-    ids: Set[str]
-    filter_posts: bool
-
-
-class Total(NamedTuple):
-    """Keep track of number of total and number of unique scraped posts."""
-
-    total: int
-    unique: int
-
-
-def get_difference(tag: str, file_name: str, ids: List[str]) -> Optional[Diff]:
-    """Find TikTok post IDs that haven't previously been scraped.
-
-    Filter out the new posts for the hashtag `tag` by comparing the list of
-    post IDs contained in `filename` to the list of newly downloaded IDs
-    contained in `ids`.
-    """
-    filter_posts = False
-    current_id_data = file_methods.get_data(file_name)
-    if tag in current_id_data:
-        current_ids = current_id_data[tag]
-        set_current_ids = set(current_ids)
-        total_current_ids = len(set_current_ids)
-        set_ids = set(ids)
-        new_ids = set_ids.difference(set_current_ids)
-        if not new_ids:
-            return None
-        else:
-            total_new_ids = len(new_ids)
-            if total_new_ids == total_current_ids:
-                new_data = Diff(new_ids, filter_posts)
-            else:
-                new_data = Diff(new_ids, filter_posts)
-            return new_data
-    else:
-        filter_posts = True
-        new_data = Diff(set(ids), filter_posts)
-        return new_data
-
-
-def extract_posts(
-    settings: Dict[Any, Any], file_name: str, tag: str
-) -> Optional[Tuple[List[str], List[Dict]]]:
-    """Find TikTok posts that haven't previously been scraped.
-
-    Compares the file downloaded by tiktok-scraper to the list of
-    previously-scraped posts (from the file ids/post_ids.json).
-    """
-    ids = []
-    posts = []
-
-    posts = file_methods.get_data(file_name)
-    for post in posts:
-        ids.append(post["id"])
-
-    if not ids:
-        logger.warn(f"No posts were found for the hashtag: {tag}")
-        return None
-
-    status = file_methods.check_existence(settings["post_ids"], "file")
-    if not status:
-        new_data = (ids, posts)
-        return new_data
-    else:
-        new_ids = get_difference(tag, settings["post_ids"], ids)
-        if not new_ids:
-            logger.warn(f"No new posts were found for the hashtag: {tag}")
-            return None
-        elif new_ids.filter_posts:
-            new_posts = [post for post in posts if post["id"] in new_ids.ids]
-            return (list(new_ids.ids), new_posts)
-        else:
-            return (list(new_ids.ids), posts)
-
-
-def extract_videos(settings: dict, tag: str, download_list: List[str]) -> List[str]:
-    """Find TikTok videos that haven't previously been scraped.
-
-    Compares the file downloaded by tiktok-scraper to the list of
-    previously-scraped videos (from the file ids/video_ids.json).
-    """
-    status = file_methods.check_existence(settings["video_ids"], "file")
-    if not status:
-        new_data = download_list
-        return new_data
-    else:
-        new_videos = get_difference(tag, settings["video_ids"], download_list)
-        if not new_videos:
-            logger.warn(
-                f"No new videos were found for the {tag} in the downloaded folder."
-            )
-            return []
-        else:
-            return list(new_videos.ids)
-
-
-def update_posts(
-    file_path: str, file_type: str, new_data: List[Any], tag: str = None
-) -> Optional[Tuple[str, int]]:
-    """Update the file containing scraped post IDs (`ids/post_ids.json`) with
-    the IDs of the recently scraped posts.
-    """
-    status = file_methods.check_existence(file_path, file_type)
-    if not tag:
-        file_methods.post_writer(file_path, new_data, status)
-        return None
-    else:
-        scraped_data = file_methods.id_writer(file_path, new_data, tag, status)
-        return scraped_data
-
-
-def update_videos(
-    settings: Dict[str, Any], new_data: List[str], tag: str
-) -> Tuple[str, int]:
-    """Update the file containing video IDs (`ids/video_ids.json`) with the IDs
-    of the recently scraped videos.
-    """
-    file_path = settings["video_ids"]
-    file_methods.check_file(file_path, "file")
-    number_scraped = file_methods.id_writer(file_path, new_data, tag, True)
-    file_methods.clean_video_files(settings, tag, new_data)
-    return number_scraped
-
-
-def get_total_posts(file_path: str, tag: str) -> Total:
-    """Count number of total scraped posts and number of unique scraped posts."""
-    status = file_methods.check_existence(file_path, "file")
-    if not status:
-        raise OSError(f"{file_path} not found!")
-    else:
-        data = file_methods.get_data(file_path)
-        total_posts = len(data[tag])
-        unique = len(set(data[tag]))
-        t = Total(total_posts, unique)
-        return t
-
-
-def print_total(file_path: str, tag: str, data_type: str):
-    """Print number of total and unique scraped posts, warn if any non-unique posts."""
-    total = get_total_posts(file_path, tag)
-    if total.total == total.unique:
-        logger.info(f"Scraped {total.total} {data_type} containing the hashtag '{tag}'")
-    else:
-        logger.warn(
-            f"Out of total {data_type} for the hashtag {tag} {total.total}, only {total.unique} are unique. Something is going wrong..."
-        )
diff --git a/tiktok_hashtag_analysis/file_methods.py b/tiktok_hashtag_analysis/file_methods.py
deleted file mode 100644
index 024eadc..0000000
--- a/tiktok_hashtag_analysis/file_methods.py
+++ /dev/null
@@ -1,216 +0,0 @@
-"""Utility functions that operate on files, such as writing to reading from a file.
-"""
-
-import os
-import json
-import subprocess
-from os import path
-from datetime import datetime
-import shutil
-from typing import Tuple, List, Optional, Dict, Any
-
-import logging, logging.config
-
-logging.config.fileConfig(path.join(path.dirname(path.abspath(__file__)), 'logging.config'))
-logger = logging.getLogger("Logger")
-
-
-def create_file(name: str, file_type: str):
-    """Create a file or directory."""
-    if file_type == "dir":
-        os.makedirs(name, mode=0o777)
-    elif file_type == "file":
-        with open(name, "w"):
-            pass
-    else:
-        raise ValueError(f"{file_type} has to be either 'dir' or 'file'")
-
-
-def check_existence(file_path: str, file_type: str):
-    """Check if a file or a directory exists."""
-    if file_type == "file":
-        return os.path.isfile(file_path)
-    elif file_type == "dir":
-        return os.path.isdir(file_path)
-    else:
-        raise ValueError(f"{file_type} has to be either 'dir' or 'file'")
-
-
-def check_file(file_path: str, file_type: str):
-    """If path does not exist, creates a file or directory."""
-    status = check_existence(file_path, file_type)
-    if not status:
-        create_file(file_path, file_type)
-
-
-def download_posts(settings: Dict, tag: str, output_dir: Any):
-    """Run the tiktok-scraper command to download posts for a given hashtag.
-
-    Returns the path to the downloaded file of posts. If no file was downloaded,
-    prints the error and returns nothing in order to move on.
-
-    os.chdir is used to execute shell commands in the correct folder and then
-    reused to return to the original folder of execution of run_downloader script.
-    """
-    path = os.path.join(settings["data"], tag, settings["posts"])
-    os.makedirs(path, exist_ok=True)
-    tiktok_command = f"tiktok-scraper hashtag {tag} -t 'json' --filepath {output_dir}"
-    output = subprocess.check_output(tiktok_command, shell=True, encoding="utf-8")
-    new_file = output.split()[-1]
-    if "json" in new_file:
-        return new_file
-    else:
-        logger.warn(
-            f"Something's wrong with what is returned by tiktok-scraper for the hashtag {tag} - *{new_file}* is not a json file.\n\ntiktok-scraper returned {output}"
-        )
-
-
-def download_videos(settings: Dict, tag: str):
-    """Run the tiktok-scraper command to download videos for a given hashtag.
-
-    Note that all the videos are downloaded that are returned by the TikTok API,
-    making this a time- and data-intensive process.
-    The list of downloaded video IDs is constucted and returned if the
-    downloaded folder contains at least 1 video.
-
-    os.chdir is used to execute shell commands in the correct folder and then
-    reused to return to the original folder of execution of run_downloader script.
-    """
-    path = os.path.join(settings["data"], tag, settings["videos"])
-    os.makedirs(path, exist_ok=True)
-    tiktok_command = f"tiktok-scraper hashtag {tag} -d --filepath {path}"
-    result = subprocess.check_output(tiktok_command, shell=True)
-    downloaded_list_tmp = os.listdir(os.path.join(path, f"#{tag}"))
-    if downloaded_list_tmp:
-        downloaded_list = []
-        for file in downloaded_list_tmp:
-            file = file.split(".")[0]
-            downloaded_list.append(file)
-
-        return downloaded_list
-    else:
-        logger.warn(f"No video files were downloaded for the hashtag {tag}.")
-        shutil.rmtree(settings["videos_delete"])
-
-
-def get_data(file_path: str) -> Any:
-    """Read a JSON file and return the read data."""
-    with open(file_path, "r", encoding="utf-8") as f:
-        data = json.load(f)
-    return data
-
-
-def dump_data(file_path: str, data: Any):
-    """Write data to a JSON file."""
-    with open(file_path, "w", encoding="utf-8") as f:
-        json.dump(data, f)
-
-
-def log_writer(log_data: List[Tuple[str, Tuple[str, int]]]):
-    """Create the dictionary of total downloads (posts and videos) per hashtag.
-
-    Example : {
-        timetamp : {
-            hashtag : {
-                videos : number_of_new_videos ,
-                posts : number_of_new_posts
-            }
-        }
-    }
-
-    Writes the dictionary to the log file (`logs/log.json`).
-    """
-
-    total = 0
-    scraped_summary_dict = {}  # type: Dict[str, Dict[str, int]]
-    for hashtag, (data_type, count) in log_data:
-        if hashtag in scraped_summary_dict:
-            if data_type in scraped_summary_dict[hashtag]:
-                scraped_summary_dict[hashtag][data_type] += count
-            else:
-                scraped_summary_dict[hashtag][data_type] = count
-            total += count
-        else:
-            scraped_summary_dict[hashtag] = {data_type: count}
-            total += count
-
-    now_str = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
-    data = {now_str: scraped_summary_dict}
-
-    logger.debug(f"Logged post data: {data}")
-    logger.info(f"Successfully scraped {total} total entries")
-
-
-def id_writer(
-    file_path: str, new_data: List[str], tag: str, status: bool
-) -> Tuple[str, int]:
-    """Write the list of new ids to the post_ids or video_ids file."""
-
-    total = len(new_data)
-    if status:
-        try:
-            data = get_data(file_path)
-            if tag in data:
-                data[tag] += new_data
-            else:
-                data[tag] = new_data
-            dump_data(file_path, data)
-        except json.decoder.JSONDecodeError:
-            data = {tag: new_data}
-            dump_data(file_path, data)
-    else:
-        data = {tag: new_data}
-        dump_data(file_path, data)
-    logger.debug(f"SUCCESS - {total} entries added to {file_path}")
-    number_scraped = (tag, total)
-    return number_scraped
-
-
-def post_writer(file_path: str, new_data: List[Dict], status: bool):
-    """Write the new posts in the post file of the given hashtag
-    (`/data/{hashtag}/posts/data.json`).
-    """
-    total = len(new_data)
-    if status:
-        try:
-            data = get_data(file_path)
-            data += new_data
-            dump_data(file_path, data)
-        except json.decoder.JSONDecodeError:
-            data = new_data
-            dump_data(file_path, data)
-    else:
-        data = new_data
-        dump_data(file_path, data)
-    logger.debug(f"SUCCESS - {total} entries added to {file_path}")
-
-
-def delete_file(file_path: str, file_type: str):
-    """Delete a directory or file."""
-    if not check_existence(file_path, file_type):
-        raise OSError(f"Attempt to delete file failed: {file_path} does not exist")
-    elif file_type == "file":
-        os.remove(file_path)
-        logger.debug(f"Successfully deleted {file_path}")
-    elif file_type == "dir":
-        os.rmdir(file_path)
-        logger.debug(f"Successfully deleted {file_path}")
-    else:
-        raise OSError("{file_type} needs to be either 'file' or 'dir'")
-
-
-def clean_video_files(settings: dict, tag: str, new_data: Optional[List[str]] = None):
-    """Move the new videos from the tiktok-scraper video folder to `/data/{hashtag}/videos/`.
-    Deletes the residual tiktok-scraper video folder.
-    """
-    if new_data:
-        for file in new_data:
-            settings["videos_from"] = (
-                settings["data"] + f"/{tag}/videos/#{tag}/{file}.mp4"
-            )
-            shutil.move(settings["videos_from"], settings["videos_to"])
-
-    shutil.rmtree(settings["videos_delete"])
-    logger.debug(
-        f"Successfully deleted the folder {settings['videos_delete']} folder of videos."
-    )
diff --git a/tiktok_hashtag_analysis/global_data.py b/tiktok_hashtag_analysis/global_data.py
deleted file mode 100644
index ed8c317..0000000
--- a/tiktok_hashtag_analysis/global_data.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""Specify global constants including file paths and scraping options.
-"""
-
-
-# Directories
-DATA = "../data"
-IDS = "ids"
-POSTS = "posts"
-VIDEOS = "videos"
-IMAGES = f"{DATA}/img"
-
-# Files
-POST_IDS = "post_ids.json"
-VIDEO_IDS = "video_ids.json"
-DATA_FILE = "data.json"
-
-FILES = {
-    "data": DATA,
-    "ids": IDS,
-    "posts": POSTS,
-    "videos": VIDEOS,
-    "images": IMAGES,
-    "post_ids": f"{DATA}/{IDS}/{POST_IDS}",
-    "video_ids": f"{DATA}/{IDS}/{VIDEO_IDS}",
-    "data_file": f"{DATA_FILE}",
-    "downloads": [],
-}
-
-PARAMETERS = {
-    "scraper_attempts": 3,
-    "sleep": 8,
-}
diff --git a/tiktok_hashtag_analysis/hashtag_frequencies.py b/tiktok_hashtag_analysis/hashtag_frequencies.py
deleted file mode 100644
index 204e6ee..0000000
--- a/tiktok_hashtag_analysis/hashtag_frequencies.py
+++ /dev/null
@@ -1,99 +0,0 @@
-"""Analyze the frequency of hashtags appearing in the set of given posts.
-
-- The "hashtag" positional argument specifies the hashtag of scraped posts to analyze
-- The "n" positional argument specifies how many hashtags does the user wants to analyze
-- Specifying the "-d" flag prints the hashtag frequencies on the shell
-- Specifying the "-p" flag plots the hashtag frequencies and saves as a png file
-"""
-import json
-from datetime import datetime
-import warnings
-import logging
-from typing import List, Tuple, Dict, Any
-import matplotlib.pyplot as plt
-import matplotlib.ticker as mtick
-import seaborn as sns
-
-warnings.filterwarnings("ignore", message="Glyph (.*) missing from current font")
-sns.set_theme(style="darkgrid")
-
-
-def get_hashtags(obj: Dict) -> List[Tuple[str, int]]:
-    if not obj:
-        raise ValueError(f"Empty item, no hashtags could be extracted.")
-    else:
-        hashtags = {}
-        tags = [set([tag["name"] for tag in ele["hashtags"]]) for ele in obj]
-        {
-            tag: (
-                1
-                if tag not in hashtags and not hashtags.update({tag: 1})
-                else hashtags[tag] + 1 and not hashtags.update({tag: hashtags[tag] + 1})
-            )
-            for ele in tags
-            for tag in ele
-        }
-
-        return sorted(hashtags.items(), key=lambda e: e[1], reverse=True)
-
-
-def get_occurrences(filename: str, n: int = 1) -> Dict[str, Any]:
-    """Aggregate hashtag frequency information for a specified JSON file.
-
-    Example: {
-        "total": total posts in the file,
-        top_n: [[top n hashtags ], [frequencies of corresponding hashtags]]
-    }
-    """
-    with open(filename) as f:
-        obj = json.load(f)
-    l = len(obj)
-    tags = get_hashtags(obj)
-    occs = {"total": l, "top_n": []}
-    occs["top_n"] = [[ele[i] for ele in tags[0 : min(l, n)]] for i in range(2)]
-    return occs
-
-
-def plot(occs: dict, img_folder: str):
-    """Save plot of common hashtags as bar chart to file."""
-    y_pos = list(reversed(range(len(occs["top_n"][0]) - 1)))
-    max_count = occs["top_n"][1][0]
-    freqs = [count / max_count * 100 for count in occs["top_n"][1][1:]]
-    labels = occs["top_n"][0][1:]
-    hashtag = occs["top_n"][0][0]
-
-    fig, ax = plt.subplots(figsize=(5, 6.66))
-    ax.barh(y_pos, freqs)
-    ax.set_yticks(y_pos)
-    ax.set_yticklabels(labels)
-    ax.grid(axis="y")
-    ax.set_xlabel("Percent of posts with common hashtag")
-    ax.set_ylim(min(y_pos) - 1, max(y_pos) + 1)
-    ax.set_title(f"Common hashtags for #{hashtag} posts")
-    ax.xaxis.set_major_formatter(mtick.PercentFormatter(decimals=0))
-    save_plot(img_folder, hashtag)
-
-
-def save_plot(img_folder, hashtag):
-    """Save the plot as a png file in the folder ../data/imgs/"""
-    now = datetime.now()
-    current_time = now.strftime("%Y_%m_%d_%H_%M_%S")
-    filename = f"{img_folder}/{hashtag}_{current_time}.png"
-    logging.info(f"Plot saved to file: {filename}")
-    plt.savefig(filename, bbox_inches="tight", facecolor="white", dpi=300)
-
-
-def print_occurrences(occs):
-    """Print information about the top n hashtags and their frequencies."""
-    row_number = 0
-    total_posts = occs["total"]
-    print(
-        "{:<8} {:<30} {:<15} {:<15}".format(
-            "Rank", "Hashtag", "Occurrences", "Frequency"
-        )
-    )
-    for key, value in zip(occs["top_n"][0], occs["top_n"][1]):
-        ratio = value / total_posts
-        print("{:<8} {:<30} {:<15} {:.4f}".format(row_number, key, value, ratio))
-        row_number += 1
-    print(f"Total posts: {total_posts}")
diff --git a/tiktok_hashtag_analysis/hashtag_list.txt b/tiktok_hashtag_analysis/hashtag_list.txt
deleted file mode 100644
index d2303f9..0000000
--- a/tiktok_hashtag_analysis/hashtag_list.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Enter a hashtag per line. Each line should contain only one word.
-london    
-paris
-tokyo
-newyork
diff --git a/tiktok_hashtag_analysis/logging.config b/tiktok_hashtag_analysis/logging.config
deleted file mode 100644
index faac2d3..0000000
--- a/tiktok_hashtag_analysis/logging.config
+++ /dev/null
@@ -1,36 +0,0 @@
-[loggers]
-keys=root,Logger
-
-[handlers]
-keys=consoleHandler,fileHandler
-
-[formatters]
-keys=consoleFormatter,fileFormatter
-
-[logger_root]
-level=DEBUG
-handlers=consoleHandler
-
-[logger_Logger]
-level=DEBUG
-handlers=consoleHandler,fileHandler
-qualname=Logger
-propagate=0
-
-[handler_consoleHandler]
-class=StreamHandler
-level=INFO
-formatter=consoleFormatter
-args=(sys.stdout,)
-
-[handler_fileHandler]
-class=FileHandler
-level=DEBUG
-formatter=fileFormatter
-args=("../logfile.log",)
-
-[formatter_consoleFormatter]
-format=%(message)s
-
-[formatter_fileFormatter]
-format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
diff --git a/tiktok_hashtag_analysis/run_downloader.py b/tiktok_hashtag_analysis/run_downloader.py
deleted file mode 100644
index a74825b..0000000
--- a/tiktok_hashtag_analysis/run_downloader.py
+++ /dev/null
@@ -1,150 +0,0 @@
-"""Download post data or videos from TikToks containing one or more specified hashtags.
-
-- The "-p" flag specifies that only data from posts is downloaded, no video files
-- The "-v" flag specifies that only video files are downloaded, no post data
-- Specifying both "-p" and "-v" flags downloads both post data and video files
-- The "-t" flag allows the user to specify a list of space-separated hashtags as an argument
-- The "-f" flag allows the user to specify the filename of a text file containing a list of newline-separated hashtags as an argument
-"""
-
-import os
-import time
-from typing import List, Tuple, Dict, Any, Optional
-from tempfile import TemporaryDirectory
-from tiktok_hashtag_analysis import global_data
-import tiktok_hashtag_analysis.file_methods as file_methods
-from tiktok_hashtag_analysis import data_methods
-
-
-def get_hashtag_list(file_name: str) -> List[str]:
-    """Extract list of newline-separated hashtags from text file."""
-    if not file_methods.check_existence(file_name, "file"):
-        raise OSError(f"{file_name} does not exist")
-    with open(file_name) as f:
-        tags = list(
-            filter(None, [line.strip() for line in f if not line.startswith("#")])
-        )
-        return tags
-
-
-def set_download_settings(download_data_type: Dict[str, bool]) -> Dict[str, Any]:
-    """Load the constants from global_data module into the `settings` dict."""
-    settings = {
-        "data": global_data.FILES["data"],
-        "ids": global_data.FILES["ids"],
-        "sleep": global_data.PARAMETERS["sleep"],
-        "scraper": global_data.PARAMETERS["scraper_attempts"],
-    }
-    file_methods.check_file(f"{settings['data']}/{settings['ids']}", "dir")
-    if download_data_type["posts"]:
-        settings["posts"] = global_data.FILES["posts"]
-        settings["post_ids"] = global_data.FILES["post_ids"]
-        settings["data_file"] = global_data.FILES["data_file"]
-
-    if download_data_type["videos"]:
-        settings["videos"] = global_data.FILES["videos"]
-        settings["video_ids"] = global_data.FILES["video_ids"]
-
-    return settings
-
-
-def get_posts(settings: dict, tag: str) -> Optional[Tuple[str, int]]:
-    """Scrape trending TikTok post data for the specified hashtag.
-
-    1. Calls `file_methods.download_posts` to scrape the post data for a given hashtag
-    2. Calls `data_methods.extract_posts` to determine which if any posts
-    haven't previously been downloaded.
-    3. Calls `data_methods.update_posts` to update the ID list with the IDs of
-    newly downloaded posts.
-    """
-    with TemporaryDirectory() as temp_dir:
-        file_path = file_methods.download_posts(settings, tag, temp_dir)
-        number_scraped = None
-        if file_path:
-            new_data = data_methods.extract_posts(settings, file_path, tag)
-            if new_data:
-                data_file = os.path.join(
-                    settings["data"], tag, settings["posts"], settings["data_file"]
-                )
-                data_methods.update_posts(data_file, "file", new_data[1])
-                number_scraped = data_methods.update_posts(
-                    settings["post_ids"], "file", new_data[0], tag
-                )
-
-    return number_scraped
-
-
-def get_videos(settings: dict, tag: str) -> Optional[Tuple[str, int]]:
-    """Scrape trending TikTok video files for the specified hashtag.
-
-    1. Calls `file_methods.download_videos` to download the video files for a given hashtag
-    2. Calls `data_methods.extract_videos` to determine which if any videos
-    haven't previouly been downloaded.
-    3. Calls `data_methods.update_videos` to update the ID list with the IDs of
-    newly downloaded videos.
-    4. Calls `clean_video_files` function to delete the residual video folder
-    after the data processing.
-    """
-    number_scraped = None
-    download_list = file_methods.download_videos(settings, tag)
-    if download_list:
-        new_data = data_methods.extract_videos(settings, tag, download_list)
-        if new_data:
-            number_scraped = data_methods.update_videos(settings, new_data, tag)
-        else:
-            file_methods.clean_video_files(settings, tag)
-
-    return number_scraped
-
-
-def get_data(
-        hashtags: list, download_data_type: Dict[str, bool]
-) -> List[Tuple[str, Tuple[str, int]]]:
-    """Check command-line arguments and scrape posts/videos for specified hashtags."""
-    counter = 0
-    total_hashtags = len(hashtags)
-    total_hashtags_offset = total_hashtags - 1
-    scraped_summary_list = []
-
-    if download_data_type["posts"]:
-        settings = set_download_settings(download_data_type)
-        while counter < total_hashtags:
-            tag = hashtags[counter]
-            file_methods.check_file(
-                os.path.join(settings["data"], tag, settings["posts"]), "dir"
-            )
-            file_methods.check_file(
-                os.path.join(
-                    settings["data"], tag, settings["posts"], settings["data_file"]
-                ),
-                "file",
-            )
-            res = get_posts(settings, tag)
-            if res:
-                number_scraped = (res[0], ("posts", res[1]))
-                scraped_summary_list.append(number_scraped)
-                data_methods.print_total(settings["post_ids"], tag, "posts")
-
-            counter += 1
-            if counter < total_hashtags_offset:
-                time.sleep(settings["sleep"])
-
-    if download_data_type["videos"]:
-        settings = set_download_settings(download_data_type)
-        while counter < total_hashtags:
-            tag = hashtags[counter]
-            file_methods.check_file(
-                os.path.join(settings["data"], tag, settings["videos"]), "dir"
-            )
-            settings["videos_delete"] = settings["data"] + f"/{tag}/videos/#{tag}"
-            settings["videos_to"] = settings["data"] + f"/{tag}/videos"
-            _res = get_videos(settings, tag)
-            if _res:
-                scraped_summary_list.append((_res[0], ("videos", _res[1])))
-                data_methods.print_total(settings["video_ids"], tag, "videos")
-
-            counter += 1
-            if counter < total_hashtags_offset:
-                time.sleep(settings["sleep"])
-
-    return scraped_summary_list
diff --git a/tiktok_hashtag_analysis/version.py b/tiktok_hashtag_analysis/version.py
deleted file mode 100644
index 0899cd6..0000000
--- a/tiktok_hashtag_analysis/version.py
+++ /dev/null
@@ -1,12 +0,0 @@
-
-_MAJOR = "1"
-_MINOR = "0"
-# On main and in a nightly release the patch should be one ahead of the last
-# released build.
-_PATCH = "4"
-# This is mainly for nightly builds which have the suffix ".dev$DATE". See
-# https://semver.org/#is-v123-a-semantic-version for the semantics.
-_SUFFIX = ""
-
-VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
-__version__ = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)
\ No newline at end of file

From cf575e6cf60420587913a1cd64897dd76405f347 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Fri, 1 Sep 2023 18:33:32 -0500
Subject: [PATCH 2/6] updated README and added authorization

---
 .gitignore                          |  2 +
 README.md                           | 71 ++++++++++++++---------------
 tiktok_hashtag_analysis/__main__.py |  1 -
 tiktok_hashtag_analysis/auth.py     | 67 +++++++++++++++++++++++++++
 tiktok_hashtag_analysis/base.py     |  6 +--
 5 files changed, 107 insertions(+), 40 deletions(-)
 create mode 100644 tiktok_hashtag_analysis/auth.py

diff --git a/.gitignore b/.gitignore
index eca42b1..525e540 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 # Data directory
 data/
+build/
+*.egg-info/
 
 # Miscellaneous files
 **/.DS_Store
diff --git a/README.md b/README.md
index b0e3f25..75e5e26 100644
--- a/README.md
+++ b/README.md
@@ -59,40 +59,38 @@ The `data` folder contains all the downloaded data as shown in the tree diagram
 
 ## How to use
 ### Post downloading
-Running the `tiktok-hashtag-analysis` command with the following options will scrape posts containing the hashtags `#london`, `#paris`, or `#newyork`:
+Running the `tiktok-hashtag-analysis` command with the following options will scrape posts that contain the hashtags `#london`, `#paris`, or `#newyork`:
 
     tiktok-hashtag-analysis london paris newyork
 
 and will produce an output similar to the following log:
 
-    $ tiktok-hashtag-analysis download -t london paris newyork -p
+    $ tiktok-hashtag-analysis download london paris newyork
     Hashtags to scrape: ['london', 'paris', 'newyork']
     Scraped 963 posts containing the hashtag 'london'
     Scraped 961 posts containing the hashtag 'paris'
     Scraped 940 posts containing the hashtag 'newyork'
     Successfully scraped 2864 total entries
 
-- The `-t` flag allows a space-separated list of hashtags to be specified as a command line argument
-- The `-p` flag specifies that posts, not videos, will be downloaded
+- The list of hashtags to scrape is specified as a positional argument
 
 ### Video downloading
-Running the `tiktok-hashtag-analysis download` script with the following options will scrape trending videos containing the hashtag `#london`:
-`tiktok-hashtag-analysis download -t london -v`
+Running the `tiktok-hashtag-analysis` script with the following options will scrape trending videos containing the hashtag `#london`:
+`tiktok-hashtag-analysis download london --download`
 
-- The `-t` flag allows a space-separated list of hashtags to be specified as a command line argument
-- The `-v` flag specifies that videos, not posts, will be downloaded
+- The `--download` flag specifies that video files for scraped posts should be downloaded
 
-Note that video downloading is a time and data rate consuming task, as a result we recommend using one hashtag at a time when using the `-v` flag to avoid complications.
+Note that video downloading is a time and data rate consuming task, as a result we recommend using one hashtag at a time when using the `--download` flag to avoid complications.
 
 ## Analyzing results 
-### Top n hashtag occurrences 
-The script `tiktok-hashtag-analysis frequencies` analyzes the frequencies of top occurring hashtags in a given set of posts.
+### Most common co-occurring hashtags
+In addition to scraping data and downloading videos, the `tiktok-hashtag-analysis` script can also analyze the frequencies of the most common co-occurring hashtags in a given set of posts.
 
-Assume we want to analyze the 20 most frequently occurring hashtags in the downloaded posts of the `#london` hashtag.
+Assume we want to analyze the 20 most frequently co-occurring hashtags in the downloaded posts of the `#london` hashtag.
 
 - The results can be plotted and saved as a PNG file by executing the following command: 
 
-    `tiktok-hashtag-analysis frequencies --hashtag london --number 20 --plot`
+    `tiktok-hashtag-analysis london --number 20 --plot`
     
     which will produce a figure similar to that shown below:
     <p align="center">
@@ -103,32 +101,33 @@ Assume we want to analyze the 20 most frequently occurring hashtags in the downl
 
 - The results can be displayed in tabular form by executing the following command:
 
-    `tiktok-hashtag-analysis frequencies --hashtag london --number 20 --print`
+    `tiktok-hashtag-analysis london --number 20 --table`
 
     which will produce a terminal output similar to the following:
     ```
-    Rank     Hashtag                        Occurrences     Frequency
-    0        london                         960             1.0000
-    1        fyp                            494             0.5146
-    2        uk                             238             0.2479
-    3        foryou                         221             0.2302
-    4        foryoupage                     184             0.1917
-    5        viral                          179             0.1865
-    6        fypシ                           84              0.0875
-    7        funny                          56              0.0583
-    8        xyzbca                         51              0.0531
-    9        british                        45              0.0469
-    10       england                        44              0.0458
-    11       trending                       40              0.0417
-    12       fy                             33              0.0344
-    13       comedy                         32              0.0333
-    14       roadman                        28              0.0292
-    15       4u                             27              0.0281
-    16       usa                            26              0.0271
-    17       tiktok                         26              0.0271
-    18       travel                         21              0.0219
-    19       america                        20              0.0208
-    Total posts: 960
+    Co-occurring hashtags for #london posts
+    Rank     Hashtag                        Occurrences     Frequency      
+    0        london                         881             1.0000
+    1        fyp                            399             0.4529
+    2        uk                             174             0.1975
+    3        foryou                         168             0.1907
+    4        viral                          152             0.1725
+    5        foryoupage                     137             0.1555
+    6        fypシ                           73              0.0829
+    7        funny                          54              0.0613
+    8        tiktok                         43              0.0488
+    9        trending                       43              0.0488
+    10       british                        41              0.0465
+    11       england                        38              0.0431
+    12       xyzbca                         34              0.0386
+    13       fy                             33              0.0375
+    14       usa                            33              0.0375
+    15       love                           29              0.0329
+    16       comedy                         25              0.0284
+    17       royalfamily                    23              0.0261
+    18       queen                          23              0.0261
+    19       queenelizabeth                 22              0.0250
+    Total posts: 881
     ```
 
     The `Frequency` column shows the ratio of the occurrence to the total number of downloaded posts.
diff --git a/tiktok_hashtag_analysis/__main__.py b/tiktok_hashtag_analysis/__main__.py
index 8e7dce2..8a9e5ee 100644
--- a/tiktok_hashtag_analysis/__main__.py
+++ b/tiktok_hashtag_analysis/__main__.py
@@ -1,7 +1,6 @@
 import logging
 import argparse
 from pathlib import Path
-import sys
 
 from .base import TikTokDownloader, load_hashtags_from_file
 
diff --git a/tiktok_hashtag_analysis/auth.py b/tiktok_hashtag_analysis/auth.py
new file mode 100644
index 0000000..17b8f3c
--- /dev/null
+++ b/tiktok_hashtag_analysis/auth.py
@@ -0,0 +1,67 @@
+import os
+import configparser
+from pathlib import Path
+import logging
+
+
+class Authorization:
+    """Handle authorization for TikTok, using the `msToken`."""
+
+    def __init__(self):
+        self.config_file = Path.home() / ".tiktok"
+        self.section = "TikTok"
+        self.ms_token = None
+
+    def get_token(self):
+        """Load the "msToken" cookie taken from TikTok, which the scraper requires."""
+
+        # Step 1: check if MS_TOKEN is defined as environment variable
+        if ms_token := os.environ.get("MS_TOKEN"):
+            self.ms_token = ms_token
+            logging.info("Loaded token from environment variable")
+
+        # Step 2: check if MS_TOKEN is defined in config file
+        elif self.config_file.is_file():
+            if ms_token := self.load_token():
+                self.ms_token = ms_token
+                logging.info(f"Loaded token from config file: {self.config_file}")
+
+        # Step 3: have user enter MS_TOKEN via terminal
+        else:
+            ms_token = self.input_token()
+            self.dump_token(ms_token=ms_token)
+            self.ms_token = ms_token
+            logging.info(
+                f"Loaded token from user input and saved to config file: {self.config_file}"
+            )
+
+        return self.ms_token
+
+    def load_token(self):
+        """Parse a config file and extract the token."""
+
+        config = configparser.ConfigParser()
+        config.read(self.config_file)
+        return config.get(section=self.section, option="MS_TOKEN", fallback=None)
+
+    def dump_token(self, ms_token):
+        """Write the token to a config file."""
+
+        config = configparser.ConfigParser()
+        config.read(self.config_file)
+        config.add_section(self.section)
+        config.set(section=self.section, option="MS_TOKEN", value=ms_token)
+
+        with open(self.config_file, "w") as f:
+            config.write(f)
+
+    def input_token(self):
+        """Allow user to manually enter the token in the terminal."""
+
+        print(
+            "\nPlease copy and paste your `msToken` cookie taken from your web browser when visiting the TikTok website. See [THIS VIDEO] for more information.\n"
+        )
+
+        ms_token = input("msToken: ")
+
+        return ms_token
diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py
index 63224ef..74df81b 100644
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -167,7 +167,7 @@ class TikTokDownloader:
                 f"No new videos to be downloaded for the hashtag: {hashtag}"
             )
 
-        # Populate list of URLs to download using yt-dlp, and list of image 
+        # Populate list of URLs to download using yt-dlp, and list of image
         # galleries to download using the `download_gallery` function
         urls_to_download = []
         galleries_to_download = []
@@ -233,7 +233,7 @@ class TikTokDownloader:
         ax.set_ylim(min(y_pos) - 1, max(y_pos) + 1)
         ax.set_title(f"Co-occurring hashtags for #{hashtag} posts")
         ax.xaxis.set_major_formatter(mtick.PercentFormatter(decimals=0))
-        
+
         # Write image of plot to file
         current_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
         plot_file = self.data_dir / hashtag / "plots" / f"{hashtag}__{current_time}.png"
@@ -244,7 +244,7 @@ class TikTokDownloader:
     def run(self, download: bool, plot: bool, table: bool, number: int):
         """Execute the specified operations on all specified hashtags."""
 
-        # Scrape all specified hashtags and perform analyses, depending on if 
+        # Scrape all specified hashtags and perform analyses, depending on if
         # `--table` and `--plot` flags are used in the command
         for hashtag in self.hashtags:
             self.get_hashtag_posts(hashtag=hashtag)

From 0f8e865bf3c958db567f80a056ee207f81094a15 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Mon, 4 Sep 2023 10:40:30 -0500
Subject: [PATCH 3/6] added type hints for auth, incorporated auth into base
 module

---
 tiktok_hashtag_analysis/__main__.py | 14 +++++++++++++-
 tiktok_hashtag_analysis/auth.py     | 23 ++++++++++++++---------
 tiktok_hashtag_analysis/base.py     | 17 +++++++++++------
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/tiktok_hashtag_analysis/__main__.py b/tiktok_hashtag_analysis/__main__.py
index 8a9e5ee..3c3bbfd 100644
--- a/tiktok_hashtag_analysis/__main__.py
+++ b/tiktok_hashtag_analysis/__main__.py
@@ -6,6 +6,8 @@ from .base import TikTokDownloader, load_hashtags_from_file
 
 
 def create_parser():
+    """Create parser tp parse input command-line arguments."""
+
     parser = argparse.ArgumentParser(
         description="Analyze hashtags within posts scraped from TikTok."
     )
@@ -51,12 +53,20 @@ def create_parser():
         help="Directory to save scraped data and visualizations to",
         default=Path(".").resolve().parent / "data",
     )
+    parser.add_argument(
+        "--config",
+        type=str,
+        help="File name of configuration file to store TikTok credentials to",
+        default=None,
+    )
     parser.add_argument("--log", type=str, help="File to write logs to", default=None)
 
     return parser
 
 
 def main():
+    """Parse and process command-line arguments, scrape specified hashtags, and perform specified analyses."""
+
     parser = create_parser()
     args = parser.parse_args()
 
@@ -79,7 +89,9 @@ def main():
     else:
         hashtags = args.hashtags
 
-    downloader = TikTokDownloader(hashtags=hashtags, data_dir=args.output_dir)
+    downloader = TikTokDownloader(
+        hashtags=hashtags, data_dir=args.output_dir, config_file=args.config
+    )
 
     downloader.run(
         download=args.download, plot=args.plot, table=args.table, number=args.number
diff --git a/tiktok_hashtag_analysis/auth.py b/tiktok_hashtag_analysis/auth.py
index 17b8f3c..5d5ac16 100644
--- a/tiktok_hashtag_analysis/auth.py
+++ b/tiktok_hashtag_analysis/auth.py
@@ -2,17 +2,22 @@ import os
 import configparser
 from pathlib import Path
 import logging
+from typing import Optional
 
 
 class Authorization:
     """Handle authorization for TikTok, using the `msToken`."""
 
-    def __init__(self):
-        self.config_file = Path.home() / ".tiktok"
-        self.section = "TikTok"
-        self.ms_token = None
+    def __init__(self, config_file: Optional[str] = None):
+        if config_file:
+            self.config_file = Path(config_file)
+        else:
+            self.config_file = Path.home() / ".tiktok"
 
-    def get_token(self):
+        self.section = "TikTok"
+        self.get_token()
+
+    def get_token(self) -> str:
         """Load the "msToken" cookie taken from TikTok, which the scraper requires."""
 
         # Step 1: check if MS_TOKEN is defined as environment variable
@@ -37,14 +42,14 @@ class Authorization:
 
         return self.ms_token
 
-    def load_token(self):
+    def load_token(self) -> Optional[str]:
         """Parse a config file and extract the token."""
 
         config = configparser.ConfigParser()
         config.read(self.config_file)
         return config.get(section=self.section, option="MS_TOKEN", fallback=None)
 
-    def dump_token(self, ms_token):
+    def dump_token(self, ms_token: str):
         """Write the token to a config file."""
 
         config = configparser.ConfigParser()
@@ -52,10 +57,10 @@ class Authorization:
         config.add_section(self.section)
         config.set(section=self.section, option="MS_TOKEN", value=ms_token)
 
-        with open(self.config_file, "w") as f:
+        with open(self.config_file, "w", encoding="utf-8") as f:
             config.write(f)
 
-    def input_token(self):
+    def input_token(self) -> str:
         """Allow user to manually enter the token in the terminal."""
 
         print(
diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py
index 74df81b..77623a3 100644
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -17,6 +17,8 @@ import seaborn as sns
 
 from TikTokApi import TikTokApi
 
+from .auth import Authorization
+
 warnings.filterwarnings("ignore", message="Glyph (.*) missing from current font")
 sns.set_theme(style="darkgrid")
 
@@ -38,13 +40,11 @@ def load_hashtags_from_file(file: str) -> List[str]:
     return process_hashtag_list(hashtags=hashtags)
 
 
-async def _fetch_hashtag_data(hashtag: str) -> List[Dict]:
+async def _fetch_hashtag_data(hashtag: str, ms_token: str) -> List[Dict]:
     """Fetch data for videos containing a specified hashtag, asynchronously."""
     data = []
     async with TikTokApi() as api:
-        await api.create_sessions(
-            ms_tokens=[os.environ["MS_TOKEN"]], num_sessions=1, sleep_after=3
-        )
+        await api.create_sessions(ms_tokens=[ms_token], num_sessions=1, sleep_after=3)
         async for video in api.hashtag(name=hashtag).videos(count=1000):
             data.append(video.as_dict)
     return data
@@ -101,13 +101,16 @@ def aggregate_cooccurring_hashtags(hashtag_file: Path) -> Counter:
 class TikTokDownloader:
     """Main class for scraping data from TikTok."""
 
-    def __init__(self, hashtags: List[str], data_dir: str):
+    def __init__(self, hashtags: List[str], data_dir: str, config_file: str = None):
         self.hashtags = process_hashtag_list(hashtags)
         logging.info(f"Hashtags to scrape: {hashtags}")
 
         self.data_dir = Path(data_dir)
         os.makedirs(self.data_dir, exist_ok=True)
 
+        self.auth = Authorization(config_file=config_file)
+        self.ms_token = self.auth.ms_token
+
     def get_hashtag_posts(self, hashtag: str):
         """Fetch data about posts that used a specified hashtag and merge with
         existing data, if it exists."""
@@ -125,7 +128,9 @@ class TikTokDownloader:
             already_fetched_data = []
 
         # Scrape posts that use the specified hashtag
-        fetched_data = asyncio.run(_fetch_hashtag_data(hashtag=hashtag))
+        fetched_data = asyncio.run(
+            _fetch_hashtag_data(hashtag=hashtag, ms_token=self.ms_token)
+        )
         if len(fetched_data) == 0:
             logging.warning(f"No posts were found for the hashtag: {hashtag}")
 

From 5ae962496826bed58b80336e91dda3c19d2db12d Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Mon, 4 Sep 2023 13:26:38 -0500
Subject: [PATCH 4/6] added tests, changed __main__ to cli

---
 pytest.ini                                    | 15 +++++++++
 setup.py                                      |  3 +-
 tests/__init__.py                             |  0
 tests/auth.py                                 | 24 ++++++++++++++
 tests/base.py                                 | 15 +++++++++
 tests/cli.py                                  | 31 +++++++++++++++++++
 tests/conftest.py                             | 11 +++++++
 tiktok_hashtag_analysis/__init__.py           |  2 ++
 tiktok_hashtag_analysis/auth.py               |  1 -
 tiktok_hashtag_analysis/base.py               |  2 +-
 .../{__main__.py => cli.py}                   |  0
 11 files changed, 101 insertions(+), 3 deletions(-)
 create mode 100644 pytest.ini
 create mode 100644 tests/__init__.py
 create mode 100644 tests/auth.py
 create mode 100644 tests/base.py
 create mode 100644 tests/cli.py
 create mode 100644 tests/conftest.py
 rename tiktok_hashtag_analysis/{__main__.py => cli.py} (100%)

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..4004079
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,15 @@
+[pytest]
+minversion =
+  7.0.0
+testpaths =
+  tests/
+python_files =
+  *.py
+addopts =
+  -vvv
+  --cov='tiktok_hashtag_analysis'
+  --cov-report html:reports/coverage
+  --html='reports/tests.html'
+  --self-contained-html
+filterwarnings =
+    ignore:Glyph (.*) missing from current font
\ No newline at end of file
diff --git a/setup.py b/setup.py
index bd6119e..f5d5377 100644
--- a/setup.py
+++ b/setup.py
@@ -16,6 +16,7 @@ setup(
     url="https://github.com/bellingcat/tiktok-hashtag-analysis",
     license="MIT License",
     install_requires=["seaborn", "matplotlib", "TikTokApi", "requests", "yt-dlp"],
+    extras_require={"test": ["pytest", "pytest-cov", "pytest-html", "pytest-metadata"]},
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "Intended Audience :: Information Technology",
@@ -25,7 +26,7 @@ setup(
     ],
     entry_points={
         "console_scripts": [
-            "tiktok-hashtag-analysis=tiktok_hashtag_analysis.__main__:main",
+            "tiktok-hashtag-analysis=tiktok_hashtag_analysis.cli:main",
         ]
     },
 )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/auth.py b/tests/auth.py
new file mode 100644
index 0000000..6d0c078
--- /dev/null
+++ b/tests/auth.py
@@ -0,0 +1,24 @@
+import pytest
+
+from tiktok_hashtag_analysis.auth import Authorization
+
+MS_TOKEN = "thisisafakemstokenfortiktok"
+
+
+def test_auth_input(tmp_path, monkeypatch):
+    config_file = tmp_path / ".tiktok"
+    monkeypatch.setattr("builtins.input", lambda _: MS_TOKEN)
+    auth = Authorization(config_file=config_file)
+    auth.get_token()
+
+    assert auth.ms_token == MS_TOKEN
+
+
+def test_auth(tmp_path):
+    config_file = tmp_path / ".tiktok"
+    auth = Authorization(config_file=config_file)
+
+    auth.dump_token(ms_token=MS_TOKEN)
+    auth.get_token()
+
+    assert auth.ms_token == MS_TOKEN
diff --git a/tests/base.py b/tests/base.py
new file mode 100644
index 0000000..c0d2a07
--- /dev/null
+++ b/tests/base.py
@@ -0,0 +1,15 @@
+from tiktok_hashtag_analysis.base import TikTokDownloader, load_hashtags_from_file
+
+
+def test_scrape(tmp_path, hashtags):
+    downloader = TikTokDownloader(hashtags=hashtags[:1], data_dir=tmp_path)
+    downloader.run(download=True, plot=True, table=True, number=20)
+
+
+def test_load_hashtags_from_file(tmp_path, hashtags):
+    file = tmp_path / "hashtags.txt"
+    with open(file, "w", encoding="utf-8") as f:
+        f.write("\n".join(hashtags))
+
+    loaded_hashtags = load_hashtags_from_file(file=file)
+    assert loaded_hashtags == hashtags
diff --git a/tests/cli.py b/tests/cli.py
new file mode 100644
index 0000000..dd58f5e
--- /dev/null
+++ b/tests/cli.py
@@ -0,0 +1,31 @@
+import pytest
+
+from tiktok_hashtag_analysis.cli import create_parser
+
+ARGUMENTS = [
+    ("file", "hashtags.txt", "--file"),
+    ("download", True, "--download"),
+    ("download", True, "-d"),
+    ("number", 20, "--number"),
+    ("plot", True, "--plot"),
+    ("plot", True, "-p"),
+    ("table", True, "--table"),
+    ("table", True, "-t"),
+    ("output_dir", "/tmp/tiktok_download", "--output-dir"),
+    ("config", "~/.tiktok", "--config"),
+    ("log", "../logfile.log", "--log"),
+]
+
+
+@pytest.mark.parametrize("attribute,value,flag", ARGUMENTS)
+def test_parser(hashtags, attribute, value, flag):
+    argument_list = [*hashtags, flag]
+
+    if not isinstance(value, bool):
+        argument_list.append(str(value))
+
+    parser = create_parser()
+    args = vars(parser.parse_args(argument_list))
+
+    assert args.get(attribute) == value
+    assert args.get("hashtags") == hashtags
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..b5c096d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,11 @@
+import os
+import tempfile
+
+import pytest
+
+TEST_HASHTAGS = ["embraceeuropa", "francisparkeryockey"]
+
+
+@pytest.fixture(scope="package")
+def hashtags():
+    return TEST_HASHTAGS
diff --git a/tiktok_hashtag_analysis/__init__.py b/tiktok_hashtag_analysis/__init__.py
index 8c0d5d5..7a97c27 100644
--- a/tiktok_hashtag_analysis/__init__.py
+++ b/tiktok_hashtag_analysis/__init__.py
@@ -1 +1,3 @@
 __version__ = "2.0.0"
+
+from .base import TikTokDownloader
diff --git a/tiktok_hashtag_analysis/auth.py b/tiktok_hashtag_analysis/auth.py
index 5d5ac16..25c2222 100644
--- a/tiktok_hashtag_analysis/auth.py
+++ b/tiktok_hashtag_analysis/auth.py
@@ -15,7 +15,6 @@ class Authorization:
             self.config_file = Path.home() / ".tiktok"
 
         self.section = "TikTok"
-        self.get_token()
 
     def get_token(self) -> str:
         """Load the "msToken" cookie taken from TikTok, which the scraper requires."""
diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py
index 77623a3..e059dbb 100644
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -109,7 +109,7 @@ class TikTokDownloader:
         os.makedirs(self.data_dir, exist_ok=True)
 
         self.auth = Authorization(config_file=config_file)
-        self.ms_token = self.auth.ms_token
+        self.ms_token = self.auth.get_token()
 
     def get_hashtag_posts(self, hashtag: str):
         """Fetch data about posts that used a specified hashtag and merge with
diff --git a/tiktok_hashtag_analysis/__main__.py b/tiktok_hashtag_analysis/cli.py
similarity index 100%
rename from tiktok_hashtag_analysis/__main__.py
rename to tiktok_hashtag_analysis/cli.py

From 8c32a3cf1642998aaa4d9916ba1ec06a588dcaac Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Mon, 4 Sep 2023 13:51:28 -0500
Subject: [PATCH 5/6] updated README, made yt-dlp downloading more robust
 against errors, changed name of videos folder to media (since images and
 audio files are also downloaded now)

---
 README.md                       | 29 +++++++++++++++--------------
 tiktok_hashtag_analysis/base.py |  6 +++---
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 75e5e26..2c51e2e 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ You should now be ready to start using it.
 ## About the tool
 ### Command-line arguments
 ```
-usage: tiktok-hashtag-analysis [-h] [--file FILE] [-d] [--number NUMBER] [-p] [-t] [--output-dir OUTPUT_DIR] [--log LOG] [hashtags ...]
+usage: tiktok-hashtag-analysis [-h] [--file FILE] [-d] [--number NUMBER] [-p] [-t] [--output-dir OUTPUT_DIR] [--config CONFIG] [--log LOG] [hashtags ...]
 
 Analyze hashtags within posts scraped from TikTok.
 
@@ -31,6 +31,7 @@ optional arguments:
   -t, --table           Print a table of the most common co-occurring hashtags
   --output-dir OUTPUT_DIR
                         Directory to save scraped data and visualizations to
+  --config CONFIG       File name of configuration file to store TikTok credentials to
   --log LOG             File to write logs to
 ```
 
@@ -38,23 +39,23 @@ optional arguments:
 ```
 $ tree ../data
 ../data
-├── ids
-│   └── post_ids.json
 ├── london
-│   └── posts
-│       └── data.json
+│   ├── plots
+│   ├── posts.json
+│   └── media
 ├── newyork
-│   └── posts
-│       └── data.json
+│   ├── plots
+│   ├── posts.json
+│   └── media
 └── paris
-    └── posts
-        └── data.json
+│   ├── plots
+│   ├── posts.json
+│   └── media
 ```
 
 
 The `data` folder contains all the downloaded data as shown in the tree diagram above. 
-- The `ids` folder contains two files `post_ids.json` and `video_ids.json` that record the ids of the downloaded posts and videos for each hashtag.
-- Each hashtag has a folder with two subfolders `posts` and `videos` that store posts and videos respectively. The posts are stored in the `data.json` file in the `posts` folder, and videos are stored as the `.mp4` files in the `videos` folder.
+- Each hashtag has a folder with two subfolders `plots` and `media` that store plots of the most common co-occurring hashtags, and media downloaded from the posts. The posts are stored in the `posts.json` file, and downloaded media is stored as `.mp4` files (for videos) or audio and image files (for image galleries) in the `media` folder.
 
 
 ## How to use
@@ -75,8 +76,8 @@ and will produce an output similar to the following log:
 - The list of hashtags to scrape is specified as a positional argument
 
 ### Video downloading
-Running the `tiktok-hashtag-analysis` script with the following options will scrape trending videos containing the hashtag `#london`:
-`tiktok-hashtag-analysis download london --download`
+Running the `tiktok-hashtag-analysis` script with the following options will scrape trending posts containing the hashtag `#london`:
+`tiktok-hashtag-analysis london --download`
 
 - The `--download` flag specifies that video files for scraped posts should be downloaded
 
@@ -84,7 +85,7 @@ Note that video downloading is a time and data rate consuming task, as a result
 
 ## Analyzing results 
 ### Most common co-occurring hashtags
-In addition to scraping data and downloading videos, the `tiktok-hashtag-analysis` script can also analyze the frequencies of the most common co-occurring hashtags in a given set of posts.
+In addition to scraping data and downloading media, the `tiktok-hashtag-analysis` script can also analyze the frequencies of the most common co-occurring hashtags in a given set of posts.
 
 Assume we want to analyze the 20 most frequently co-occurring hashtags in the downloaded posts of the `#london` hashtag.
 
diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py
index e059dbb..c6aed7e 100644
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -155,7 +155,7 @@ class TikTokDownloader:
 
         # Define file containing post data and directory to save videos to
         hashtag_file = self.data_dir / hashtag / "posts.json"
-        video_dir = self.data_dir / hashtag / "videos"
+        video_dir = self.data_dir / hashtag / "media"
         video_dir.mkdir(exist_ok=True)
 
         # Get list of post IDs that have previously had their media downloaded
@@ -191,8 +191,8 @@ class TikTokDownloader:
 
         # Download video files for all video posts
         if len(urls_to_download) > 0:
-            logging.info(f"Downloading videos for hashtag {hashtag}")
-        ydl_opts = {"outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s")}
+            logging.info(f"Downloading media for hashtag {hashtag}")
+        ydl_opts = {"outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s"), "ignore_errors": True}
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download(urls_to_download)
 

From 10821e30f2f9dff488d3eedca99880fab7f8bb18 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Wed, 6 Sep 2023 09:51:31 -0500
Subject: [PATCH 6/6] preparing for publishing (removed pipenv commands from
 workflow, added Contributing section on README, added functionality to pin
 dependency versions with requirements.txt)

---
 .github/workflows/python-publish.yaml |  7 ++---
 .gitignore                            |  1 +
 README.md                             | 15 +++++++++++
 requirements.txt                      |  5 ++++
 setup.py                              | 38 ++++++++++++++++++++++++---
 tiktok_hashtag_analysis/__init__.py   |  2 --
 tiktok_hashtag_analysis/auth.py       |  1 +
 tiktok_hashtag_analysis/base.py       |  5 +++-
 tiktok_hashtag_analysis/version.py    | 11 ++++++++
 9 files changed, 74 insertions(+), 11 deletions(-)
 create mode 100644 requirements.txt
 create mode 100644 tiktok_hashtag_analysis/version.py

diff --git a/.github/workflows/python-publish.yaml b/.github/workflows/python-publish.yaml
index 5ce8e63..83d16e0 100644
--- a/.github/workflows/python-publish.yaml
+++ b/.github/workflows/python-publish.yaml
@@ -33,15 +33,12 @@ jobs:
 
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade --upgrade-strategy=eager pip setuptools wheel twine pipenv
+        python -m pip install --upgrade --upgrade-strategy=eager pip setuptools wheel twine
         python -m pip install -e . --upgrade
-        python -m pipenv install --dev --python 3.10
-      env:
-        PIPENV_DEFAULT_PYTHON_VERSION: "3.10"
 
     - name: Build wheels
       run: |
-        python -m pipenv run python setup.py sdist bdist_wheel
+        python setup.py sdist bdist_wheel
 
     - name: Publish a Python distribution to PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
index 525e540..d5095d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 data/
 build/
 *.egg-info/
+dist/
 
 # Miscellaneous files
 **/.DS_Store
diff --git a/README.md b/README.md
index 2c51e2e..5f891ac 100644
--- a/README.md
+++ b/README.md
@@ -132,3 +132,18 @@ Assume we want to analyze the 20 most frequently co-occurring hashtags in the do
     ```
 
     The `Frequency` column shows the ratio of the occurrence to the total number of downloaded posts.
+
+### Contributing
+To run the build-in tests in the `tests/` directory, first install the test dependency packages:
+
+```
+pip install .[test]
+```
+
+and then run the tests using the following command:
+
+```
+pytest
+```
+
+This repo uses [black](https://github.com/psf/black) to format source code, please run the `black` command before submitting a PR. 
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e4144ef
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+seaborn==0.12.2
+matplotlib==3.7.2
+yt-dlp==2023.7.6
+TikTokApi==6.1.1
+requests==2.31.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index f5d5377..5760f41 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,42 @@
 from setuptools import setup
-from tiktok_hashtag_analysis import __version__
+
+
+def read_requirements(filename: str):
+    with open(filename) as requirements_file:
+        import re
+
+        def fix_url_dependencies(req: str) -> str:
+            """Pip and setuptools disagree about how URL dependencies should be handled."""
+            m = re.match(
+                r"^(git\+)?(https|ssh)://(git@)?github\.com/([\w-]+)/(?P<name>[\w-]+)\.git",
+                req,
+            )
+            if m is None:
+                return req
+            else:
+                return f"{m.group('name')} @ {req}"
+
+        requirements = []
+        for line in requirements_file:
+            line = line.strip()
+            if line.startswith("#") or len(line) <= 0:
+                continue
+            requirements.append(fix_url_dependencies(line))
+    return requirements
+
 
 with open("README.md", "r", encoding="utf-8") as file:
     long_description = file.read()
 
+# version.py defines the VERSION and VERSION_SHORT variables.
+# We use exec here so we don't import cached_path whilst setting up.
+VERSION = {}  # type: ignore
+with open("tiktok_hashtag_analysis/version.py", "r") as version_file:
+    exec(version_file.read(), VERSION)
+
 setup(
     name="tiktok-hashtag-analysis",
-    version=__version__,
+    version=VERSION["VERSION"],
     author="Bellingcat",
     author_email="tech@bellingcat.com",
     packages=["tiktok_hashtag_analysis"],
@@ -15,7 +45,9 @@ setup(
     long_description_content_type="text/markdown",
     url="https://github.com/bellingcat/tiktok-hashtag-analysis",
     license="MIT License",
-    install_requires=["seaborn", "matplotlib", "TikTokApi", "requests", "yt-dlp"],
+    # install_requires=read_requirements("requirements.txt"),
+    # extras_require={"dev": read_requirements("dev-requirements.txt")},
+    install_requires=["seaborn", "matplotlib", "TikTokApi", "requests", "yt_dlp"],
     extras_require={"test": ["pytest", "pytest-cov", "pytest-html", "pytest-metadata"]},
     classifiers=[
         "Development Status :: 5 - Production/Stable",
diff --git a/tiktok_hashtag_analysis/__init__.py b/tiktok_hashtag_analysis/__init__.py
index 7a97c27..eea2898 100644
--- a/tiktok_hashtag_analysis/__init__.py
+++ b/tiktok_hashtag_analysis/__init__.py
@@ -1,3 +1 @@
-__version__ = "2.0.0"
-
 from .base import TikTokDownloader
diff --git a/tiktok_hashtag_analysis/auth.py b/tiktok_hashtag_analysis/auth.py
index 25c2222..545e2ce 100644
--- a/tiktok_hashtag_analysis/auth.py
+++ b/tiktok_hashtag_analysis/auth.py
@@ -15,6 +15,7 @@ class Authorization:
             self.config_file = Path.home() / ".tiktok"
 
         self.section = "TikTok"
+        self.ms_token = None
 
     def get_token(self) -> str:
         """Load the "msToken" cookie taken from TikTok, which the scraper requires."""
diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py
index c6aed7e..d7a9e9e 100644
--- a/tiktok_hashtag_analysis/base.py
+++ b/tiktok_hashtag_analysis/base.py
@@ -192,7 +192,10 @@ class TikTokDownloader:
         # Download video files for all video posts
         if len(urls_to_download) > 0:
             logging.info(f"Downloading media for hashtag {hashtag}")
-        ydl_opts = {"outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s"), "ignore_errors": True}
+        ydl_opts = {
+            "outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s"),
+            "ignore_errors": True,
+        }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download(urls_to_download)
 
diff --git a/tiktok_hashtag_analysis/version.py b/tiktok_hashtag_analysis/version.py
new file mode 100644
index 0000000..aba80f2
--- /dev/null
+++ b/tiktok_hashtag_analysis/version.py
@@ -0,0 +1,11 @@
+_MAJOR = "2"
+_MINOR = "0"
+# On main and in a nightly release the patch should be one ahead of the last
+# released build.
+_PATCH = "0"
+# This is mainly for nightly builds which have the suffix ".dev$DATE". See
+# https://semver.org/#is-v123-a-semantic-version for the semantics.
+_SUFFIX = ""
+
+VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
+VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)