Merge pull request #17 from rly0nheart/main

WIP
This commit is contained in:
Miguel Sozinho Ramalho
2023-02-13 16:49:02 +00:00
committed by GitHub
16 changed files with 662 additions and 159 deletions

53
.github/workflows/python-publish.yaml vendored Normal file
View File

@@ -0,0 +1,53 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
name: Pypi
on:
release:
types: [published]
push:
branches: [ "main" ]
tags: [ "v*.*.*" ]
permissions:
contents: read
jobs:
deploy:
name: Publish python package
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade --upgrade-strategy=eager pip setuptools wheel twine pipenv
python -m pip install -e . --upgrade
python -m pipenv install --dev --python 3.10
env:
PIPENV_DEFAULT_PYTHON_VERSION: "3.10"
- name: Build wheels
run: |
python -m pipenv run python setup.py sdist bdist_wheel
- name: Publish a Python distribution to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
verbose: true
skip_existing: true
password: ${{ secrets.PYPI_API_TOKEN }}
packages_dir: dist/

1
.gitignore vendored
View File

@@ -12,7 +12,6 @@ data/
service_account.json
.vscode/
*.log
*.lock
# Unit test / coverage reports
reports

11
Dockerfile Normal file
View File

@@ -0,0 +1,11 @@
# syntax=docker/dockerfile:1
FROM python:latest
WORKDIR /app
COPY . .
RUN pip install --upgrade pip && pip install build && python -m build && pip install dist/*.whl
ENTRYPOINT ["tiktok_hashtag_analysis"]

13
Pipfile Normal file
View File

@@ -0,0 +1,13 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
matplotlib = "*"
seaborn = "*"
[dev-packages]
[requires]
python_version = "3.10"

416
Pipfile.lock generated Normal file
View File

@@ -0,0 +1,416 @@
{
"_meta": {
"hash": {
"sha256": "97c5ef0126b17f586b5fa1d518cf359b7e984e48f8fc2310e9aa79bd384c2374"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.10"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"contourpy": {
"hashes": [
"sha256:031154ed61f7328ad7f97662e48660a150ef84ee1bc8876b6472af88bf5a9b98",
"sha256:0f9d350b639db6c2c233d92c7f213d94d2e444d8e8fc5ca44c9706cf72193772",
"sha256:130230b7e49825c98edf0b428b7aa1125503d91732735ef897786fe5452b1ec2",
"sha256:152fd8f730c31fd67fe0ffebe1df38ab6a669403da93df218801a893645c6ccc",
"sha256:1c71fdd8f1c0f84ffd58fca37d00ca4ebaa9e502fb49825484da075ac0b0b803",
"sha256:24847601071f740837aefb730e01bd169fbcaa610209779a78db7ebb6e6a7051",
"sha256:2e9ebb4425fc1b658e13bace354c48a933b842d53c458f02c86f371cecbedecc",
"sha256:30676ca45084ee61e9c3da589042c24a57592e375d4b138bd84d8709893a1ba4",
"sha256:31a55dccc8426e71817e3fe09b37d6d48ae40aae4ecbc8c7ad59d6893569c436",
"sha256:366a0cf0fc079af5204801786ad7a1c007714ee3909e364dbac1729f5b0849e5",
"sha256:38e2e577f0f092b8e6774459317c05a69935a1755ecfb621c0a98f0e3c09c9a5",
"sha256:3c184ad2433635f216645fdf0493011a4667e8d46b34082f5a3de702b6ec42e3",
"sha256:3caea6365b13119626ee996711ab63e0c9d7496f65641f4459c60a009a1f3e80",
"sha256:3e927b3868bd1e12acee7cc8f3747d815b4ab3e445a28d2e5373a7f4a6e76ba1",
"sha256:4ee3ee247f795a69e53cd91d927146fb16c4e803c7ac86c84104940c7d2cabf0",
"sha256:54d43960d809c4c12508a60b66cb936e7ed57d51fb5e30b513934a4a23874fae",
"sha256:57119b0116e3f408acbdccf9eb6ef19d7fe7baf0d1e9aaa5381489bc1aa56556",
"sha256:58569c491e7f7e874f11519ef46737cea1d6eda1b514e4eb5ac7dab6aa864d02",
"sha256:5a011cf354107b47c58ea932d13b04d93c6d1d69b8b6dce885e642531f847566",
"sha256:5caeacc68642e5f19d707471890f037a13007feba8427eb7f2a60811a1fc1350",
"sha256:5dd34c1ae752515318224cba7fc62b53130c45ac6a1040c8b7c1a223c46e8967",
"sha256:60835badb5ed5f4e194a6f21c09283dd6e007664a86101431bf870d9e86266c4",
"sha256:62398c80ef57589bdbe1eb8537127321c1abcfdf8c5f14f479dbbe27d0322e66",
"sha256:6381fa66866b0ea35e15d197fc06ac3840a9b2643a6475c8fff267db8b9f1e69",
"sha256:64757f6460fc55d7e16ed4f1de193f362104285c667c112b50a804d482777edd",
"sha256:69f8ff4db108815addd900a74df665e135dbbd6547a8a69333a68e1f6e368ac2",
"sha256:6c180d89a28787e4b73b07e9b0e2dac7741261dbdca95f2b489c4f8f887dd810",
"sha256:71b0bf0c30d432278793d2141362ac853859e87de0a7dee24a1cea35231f0d50",
"sha256:769eef00437edf115e24d87f8926955f00f7704bede656ce605097584f9966dc",
"sha256:7f6979d20ee5693a1057ab53e043adffa1e7418d734c1532e2d9e915b08d8ec2",
"sha256:87f4d8941a9564cda3f7fa6a6cd9b32ec575830780677932abdec7bcb61717b0",
"sha256:89ba9bb365446a22411f0673abf6ee1fea3b2cf47b37533b970904880ceb72f3",
"sha256:8acf74b5d383414401926c1598ed77825cd530ac7b463ebc2e4f46638f56cce6",
"sha256:9056c5310eb1daa33fc234ef39ebfb8c8e2533f088bbf0bc7350f70a29bde1ac",
"sha256:95c3acddf921944f241b6773b767f1cbce71d03307270e2d769fd584d5d1092d",
"sha256:9e20e5a1908e18aaa60d9077a6d8753090e3f85ca25da6e25d30dc0a9e84c2c6",
"sha256:a1e97b86f73715e8670ef45292d7cc033548266f07d54e2183ecb3c87598888f",
"sha256:a877ada905f7d69b2a31796c4b66e31a8068b37aa9b78832d41c82fc3e056ddd",
"sha256:a9d7587d2fdc820cc9177139b56795c39fb8560f540bba9ceea215f1f66e1566",
"sha256:abf298af1e7ad44eeb93501e40eb5a67abbf93b5d90e468d01fc0c4451971afa",
"sha256:ae90d5a8590e5310c32a7630b4b8618cef7563cebf649011da80874d0aa8f414",
"sha256:b6d0f9e1d39dbfb3977f9dd79f156c86eb03e57a7face96f199e02b18e58d32a",
"sha256:b8d587cc39057d0afd4166083d289bdeff221ac6d3ee5046aef2d480dc4b503c",
"sha256:c5210e5d5117e9aec8c47d9156d1d3835570dd909a899171b9535cb4a3f32693",
"sha256:cc331c13902d0f50845099434cd936d49d7a2ca76cb654b39691974cb1e4812d",
"sha256:ce41676b3d0dd16dbcfabcc1dc46090aaf4688fd6e819ef343dbda5a57ef0161",
"sha256:d8165a088d31798b59e91117d1f5fc3df8168d8b48c4acc10fc0df0d0bdbcc5e",
"sha256:e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2",
"sha256:e96a08b62bb8de960d3a6afbc5ed8421bf1a2d9c85cc4ea73f4bc81b4910500f",
"sha256:ed33433fc3820263a6368e532f19ddb4c5990855e4886088ad84fd7c4e561c71",
"sha256:efb8f6d08ca7998cf59eaf50c9d60717f29a1a0a09caa46460d33b2924839dbd",
"sha256:efe99298ba37e37787f6a2ea868265465410822f7bea163edcc1bd3903354ea9",
"sha256:f99e9486bf1bb979d95d5cffed40689cb595abb2b841f2991fc894b3452290e8",
"sha256:fc1464c97579da9f3ab16763c32e5c5d5bb5fa1ec7ce509a4ca6108b61b84fab",
"sha256:fd7dc0e6812b799a34f6d12fcb1000539098c249c8da54f3566c6a6461d0dbad"
],
"markers": "python_version >= '3.8'",
"version": "==1.0.7"
},
"cycler": {
"hashes": [
"sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3",
"sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"
],
"markers": "python_version >= '3.6'",
"version": "==0.11.0"
},
"fonttools": {
"hashes": [
"sha256:2bb244009f9bf3fa100fc3ead6aeb99febe5985fa20afbfbaa2f8946c2fbdaf1",
"sha256:820466f43c8be8c3009aef8b87e785014133508f0de64ec469e4efb643ae54fb"
],
"markers": "python_version >= '3.7'",
"version": "==4.38.0"
},
"kiwisolver": {
"hashes": [
"sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b",
"sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166",
"sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c",
"sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c",
"sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0",
"sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4",
"sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9",
"sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286",
"sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767",
"sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c",
"sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6",
"sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b",
"sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004",
"sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf",
"sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494",
"sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac",
"sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626",
"sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766",
"sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514",
"sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6",
"sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f",
"sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d",
"sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191",
"sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d",
"sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51",
"sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f",
"sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8",
"sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454",
"sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb",
"sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da",
"sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8",
"sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de",
"sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a",
"sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9",
"sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008",
"sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3",
"sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32",
"sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938",
"sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1",
"sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9",
"sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d",
"sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824",
"sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b",
"sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd",
"sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2",
"sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5",
"sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69",
"sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3",
"sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae",
"sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597",
"sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e",
"sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955",
"sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca",
"sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a",
"sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea",
"sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede",
"sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4",
"sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6",
"sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686",
"sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408",
"sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871",
"sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29",
"sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750",
"sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897",
"sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0",
"sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2",
"sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09",
"sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"
],
"markers": "python_version >= '3.7'",
"version": "==1.4.4"
},
"matplotlib": {
"hashes": [
"sha256:01b7f521a9a73c383825813af255f8c4485d1706e4f3e2ed5ae771e4403a40ab",
"sha256:11011c97d62c1db7bc20509572557842dbb8c2a2ddd3dd7f20501aa1cde3e54e",
"sha256:1183877d008c752d7d535396096c910f4663e4b74a18313adee1213328388e1e",
"sha256:12f999661589981e74d793ee2f41b924b3b87d65fd929f6153bf0f30675c59b1",
"sha256:1c235bf9be052347373f589e018988cad177abb3f997ab1a2e2210c41562cc0c",
"sha256:1f4d69707b1677560cd952544ee4962f68ff07952fb9069ff8c12b56353cb8c9",
"sha256:1fcc4cad498533d3c393a160975acc9b36ffa224d15a6b90ae579eacee5d8579",
"sha256:2787a16df07370dcba385fe20cdd0cc3cfaabd3c873ddabca78c10514c799721",
"sha256:29f17b7f2e068dc346687cbdf80b430580bab42346625821c2d3abf3a1ec5417",
"sha256:38d38cb1ea1d80ee0f6351b65c6f76cad6060bbbead015720ba001348ae90f0c",
"sha256:3f56a7252eee8f3438447f75f5e1148a1896a2756a92285fe5d73bed6deebff4",
"sha256:5223affa21050fb6118353c1380c15e23aedfb436bf3e162c26dc950617a7519",
"sha256:57ad1aee29043163374bfa8990e1a2a10ff72c9a1bfaa92e9c46f6ea59269121",
"sha256:59400cc9451094b7f08cc3f321972e6e1db4cd37a978d4e8a12824bf7fd2f03b",
"sha256:68d94a436f62b8a861bf3ace82067a71bafb724b4e4f9133521e4d8012420dd7",
"sha256:6adc441b5b2098a4b904bbf9d9e92fb816fef50c55aa2ea6a823fc89b94bb838",
"sha256:6d81b11ede69e3a751424b98dc869c96c10256b2206bfdf41f9c720eee86844c",
"sha256:73b93af33634ed919e72811c9703e1105185cd3fb46d76f30b7f4cfbbd063f89",
"sha256:77b384cee7ab8cf75ffccbfea351a09b97564fc62d149827a5e864bec81526e5",
"sha256:79e501eb847f4a489eb7065bb8d3187117f65a4c02d12ea3a19d6c5bef173bcc",
"sha256:809119d1cba3ece3c9742eb01827fe7a0e781ea3c5d89534655a75e07979344f",
"sha256:80c166a0e28512e26755f69040e6bf2f946a02ffdb7c00bf6158cca3d2b146e6",
"sha256:81b409b2790cf8d7c1ef35920f01676d2ae7afa8241844e7aa5484fdf493a9a0",
"sha256:994637e2995b0342699b396a320698b07cd148bbcf2dd2fa2daba73f34dd19f2",
"sha256:9ceebaf73f1a3444fa11014f38b9da37ff7ea328d6efa1652241fe3777bfdab9",
"sha256:9fb8fb19d03abf3c5dab89a8677e62c4023632f919a62b6dd1d6d2dbf42cd9f5",
"sha256:acc3b1a4bddbf56fe461e36fb9ef94c2cb607fc90d24ccc650040bfcc7610de4",
"sha256:bbddfeb1495484351fb5b30cf5bdf06b3de0bc4626a707d29e43dfd61af2a780",
"sha256:bbf269e1d24bc25247095d71c7a969813f7080e2a7c6fa28931a603f747ab012",
"sha256:bebcff4c3ed02c6399d47329f3554193abd824d3d53b5ca02cf583bcd94470e2",
"sha256:c3f08df2ac4636249b8bc7a85b8b82c983bef1441595936f62c2918370ca7e1d",
"sha256:ca94f0362f6b6f424b555b956971dcb94b12d0368a6c3e07dc7a40d32d6d873d",
"sha256:d00c248ab6b92bea3f8148714837937053a083ff03b4c5e30ed37e28fc0e7e56",
"sha256:d2cfaa7fd62294d945b8843ea24228a27c8e7c5b48fa634f3c168153b825a21b",
"sha256:d5f18430f5cfa5571ab8f4c72c89af52aa0618e864c60028f11a857d62200cba",
"sha256:debeab8e2ab07e5e3dac33e12456da79c7e104270d2b2d1df92b9e40347cca75",
"sha256:dfba7057609ca9567b9704626756f0142e97ec8c5ba2c70c6e7bd1c25ef99f06",
"sha256:e0a64d7cc336b52e90f59e6d638ae847b966f68582a7af041e063d568e814740",
"sha256:eb9421c403ffd387fbe729de6d9a03005bf42faba5e8432f4e51e703215b49fc",
"sha256:faff486b36530a836a6b4395850322e74211cd81fc17f28b4904e1bd53668e3e",
"sha256:ff2aa84e74f80891e6bcf292ebb1dd57714ffbe13177642d65fee25384a30894"
],
"index": "pypi",
"version": "==3.6.3"
},
"numpy": {
"hashes": [
"sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22",
"sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f",
"sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9",
"sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96",
"sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0",
"sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a",
"sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281",
"sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04",
"sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468",
"sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253",
"sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756",
"sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a",
"sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb",
"sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d",
"sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0",
"sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910",
"sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978",
"sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5",
"sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f",
"sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a",
"sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5",
"sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2",
"sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d",
"sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95",
"sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5",
"sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d",
"sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780",
"sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"
],
"markers": "python_version >= '3.8'",
"version": "==1.24.2"
},
"packaging": {
"hashes": [
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
],
"markers": "python_version >= '3.7'",
"version": "==23.0"
},
"pandas": {
"hashes": [
"sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813",
"sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792",
"sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406",
"sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373",
"sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328",
"sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996",
"sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf",
"sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6",
"sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7",
"sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc",
"sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1",
"sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23",
"sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a",
"sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51",
"sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572",
"sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31",
"sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5",
"sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a",
"sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003",
"sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d",
"sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354",
"sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee",
"sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa",
"sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0",
"sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9",
"sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae",
"sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"
],
"markers": "python_version >= '3.8'",
"version": "==1.5.3"
},
"pillow": {
"hashes": [
"sha256:013016af6b3a12a2f40b704677f8b51f72cb007dac785a9933d5c86a72a7fe33",
"sha256:0845adc64fe9886db00f5ab68c4a8cd933ab749a87747555cec1c95acea64b0b",
"sha256:0884ba7b515163a1a05440a138adeb722b8a6ae2c2b33aea93ea3118dd3a899e",
"sha256:09b89ddc95c248ee788328528e6a2996e09eaccddeeb82a5356e92645733be35",
"sha256:0dd4c681b82214b36273c18ca7ee87065a50e013112eea7d78c7a1b89a739153",
"sha256:0e51f608da093e5d9038c592b5b575cadc12fd748af1479b5e858045fff955a9",
"sha256:0f3269304c1a7ce82f1759c12ce731ef9b6e95b6df829dccd9fe42912cc48569",
"sha256:16a8df99701f9095bea8a6c4b3197da105df6f74e6176c5b410bc2df2fd29a57",
"sha256:19005a8e58b7c1796bc0167862b1f54a64d3b44ee5d48152b06bb861458bc0f8",
"sha256:1b4b4e9dda4f4e4c4e6896f93e84a8f0bcca3b059de9ddf67dac3c334b1195e1",
"sha256:28676836c7796805914b76b1837a40f76827ee0d5398f72f7dcc634bae7c6264",
"sha256:2968c58feca624bb6c8502f9564dd187d0e1389964898f5e9e1fbc8533169157",
"sha256:3f4cc516e0b264c8d4ccd6b6cbc69a07c6d582d8337df79be1e15a5056b258c9",
"sha256:3fa1284762aacca6dc97474ee9c16f83990b8eeb6697f2ba17140d54b453e133",
"sha256:43521ce2c4b865d385e78579a082b6ad1166ebed2b1a2293c3be1d68dd7ca3b9",
"sha256:451f10ef963918e65b8869e17d67db5e2f4ab40e716ee6ce7129b0cde2876eab",
"sha256:46c259e87199041583658457372a183636ae8cd56dbf3f0755e0f376a7f9d0e6",
"sha256:46f39cab8bbf4a384ba7cb0bc8bae7b7062b6a11cfac1ca4bc144dea90d4a9f5",
"sha256:519e14e2c49fcf7616d6d2cfc5c70adae95682ae20f0395e9280db85e8d6c4df",
"sha256:53dcb50fbdc3fb2c55431a9b30caeb2f7027fcd2aeb501459464f0214200a503",
"sha256:54614444887e0d3043557d9dbc697dbb16cfb5a35d672b7a0fcc1ed0cf1c600b",
"sha256:575d8912dca808edd9acd6f7795199332696d3469665ef26163cd090fa1f8bfa",
"sha256:5dd5a9c3091a0f414a963d427f920368e2b6a4c2f7527fdd82cde8ef0bc7a327",
"sha256:5f532a2ad4d174eb73494e7397988e22bf427f91acc8e6ebf5bb10597b49c493",
"sha256:60e7da3a3ad1812c128750fc1bc14a7ceeb8d29f77e0a2356a8fb2aa8925287d",
"sha256:653d7fb2df65efefbcbf81ef5fe5e5be931f1ee4332c2893ca638c9b11a409c4",
"sha256:6663977496d616b618b6cfa43ec86e479ee62b942e1da76a2c3daa1c75933ef4",
"sha256:6abfb51a82e919e3933eb137e17c4ae9c0475a25508ea88993bb59faf82f3b35",
"sha256:6c6b1389ed66cdd174d040105123a5a1bc91d0aa7059c7261d20e583b6d8cbd2",
"sha256:6d9dfb9959a3b0039ee06c1a1a90dc23bac3b430842dcb97908ddde05870601c",
"sha256:765cb54c0b8724a7c12c55146ae4647e0274a839fb6de7bcba841e04298e1011",
"sha256:7a21222644ab69ddd9967cfe6f2bb420b460dae4289c9d40ff9a4896e7c35c9a",
"sha256:7ac7594397698f77bce84382929747130765f66406dc2cd8b4ab4da68ade4c6e",
"sha256:7cfc287da09f9d2a7ec146ee4d72d6ea1342e770d975e49a8621bf54eaa8f30f",
"sha256:83125753a60cfc8c412de5896d10a0a405e0bd88d0470ad82e0869ddf0cb3848",
"sha256:847b114580c5cc9ebaf216dd8c8dbc6b00a3b7ab0131e173d7120e6deade1f57",
"sha256:87708d78a14d56a990fbf4f9cb350b7d89ee8988705e58e39bdf4d82c149210f",
"sha256:8a2b5874d17e72dfb80d917213abd55d7e1ed2479f38f001f264f7ce7bae757c",
"sha256:8f127e7b028900421cad64f51f75c051b628db17fb00e099eb148761eed598c9",
"sha256:94cdff45173b1919350601f82d61365e792895e3c3a3443cf99819e6fbf717a5",
"sha256:99d92d148dd03fd19d16175b6d355cc1b01faf80dae93c6c3eb4163709edc0a9",
"sha256:9a3049a10261d7f2b6514d35bbb7a4dfc3ece4c4de14ef5876c4b7a23a0e566d",
"sha256:9d9a62576b68cd90f7075876f4e8444487db5eeea0e4df3ba298ee38a8d067b0",
"sha256:9e5f94742033898bfe84c93c831a6f552bb629448d4072dd312306bab3bd96f1",
"sha256:a1c2d7780448eb93fbcc3789bf3916aa5720d942e37945f4056680317f1cd23e",
"sha256:a2e0f87144fcbbe54297cae708c5e7f9da21a4646523456b00cc956bd4c65815",
"sha256:a4dfdae195335abb4e89cc9762b2edc524f3c6e80d647a9a81bf81e17e3fb6f0",
"sha256:a96e6e23f2b79433390273eaf8cc94fec9c6370842e577ab10dabdcc7ea0a66b",
"sha256:aabdab8ec1e7ca7f1434d042bf8b1e92056245fb179790dc97ed040361f16bfd",
"sha256:b222090c455d6d1a64e6b7bb5f4035c4dff479e22455c9eaa1bdd4c75b52c80c",
"sha256:b52ff4f4e002f828ea6483faf4c4e8deea8d743cf801b74910243c58acc6eda3",
"sha256:b70756ec9417c34e097f987b4d8c510975216ad26ba6e57ccb53bc758f490dab",
"sha256:b8c2f6eb0df979ee99433d8b3f6d193d9590f735cf12274c108bd954e30ca858",
"sha256:b9b752ab91e78234941e44abdecc07f1f0d8f51fb62941d32995b8161f68cfe5",
"sha256:ba6612b6548220ff5e9df85261bddc811a057b0b465a1226b39bfb8550616aee",
"sha256:bd752c5ff1b4a870b7661234694f24b1d2b9076b8bf337321a814c612665f343",
"sha256:c3c4ed2ff6760e98d262e0cc9c9a7f7b8a9f61aa4d47c58835cdaf7b0b8811bb",
"sha256:c5c1362c14aee73f50143d74389b2c158707b4abce2cb055b7ad37ce60738d47",
"sha256:cb362e3b0976dc994857391b776ddaa8c13c28a16f80ac6522c23d5257156bed",
"sha256:d197df5489004db87d90b918033edbeee0bd6df3848a204bca3ff0a903bef837",
"sha256:d3b56206244dc8711f7e8b7d6cad4663917cd5b2d950799425076681e8766286",
"sha256:d5b2f8a31bd43e0f18172d8ac82347c8f37ef3e0b414431157718aa234991b28",
"sha256:d7081c084ceb58278dd3cf81f836bc818978c0ccc770cbbb202125ddabec6628",
"sha256:db74f5562c09953b2c5f8ec4b7dfd3f5421f31811e97d1dbc0a7c93d6e3a24df",
"sha256:df41112ccce5d47770a0c13651479fbcd8793f34232a2dd9faeccb75eb5d0d0d",
"sha256:e1339790c083c5a4de48f688b4841f18df839eb3c9584a770cbd818b33e26d5d",
"sha256:e621b0246192d3b9cb1dc62c78cfa4c6f6d2ddc0ec207d43c0dedecb914f152a",
"sha256:e8c5cf126889a4de385c02a2c3d3aba4b00f70234bfddae82a5eaa3ee6d5e3e6",
"sha256:e9d7747847c53a16a729b6ee5e737cf170f7a16611c143d95aa60a109a59c336",
"sha256:eaef5d2de3c7e9b21f1e762f289d17b726c2239a42b11e25446abf82b26ac132",
"sha256:ed3e4b4e1e6de75fdc16d3259098de7c6571b1a6cc863b1a49e7d3d53e036070",
"sha256:ef21af928e807f10bf4141cad4746eee692a0dd3ff56cfb25fce076ec3cc8abe",
"sha256:f09598b416ba39a8f489c124447b007fe865f786a89dbfa48bb5cf395693132a",
"sha256:f0caf4a5dcf610d96c3bd32932bfac8aee61c96e60481c2a0ea58da435e25acd",
"sha256:f6e78171be3fb7941f9910ea15b4b14ec27725865a73c15277bc39f5ca4f8391",
"sha256:f715c32e774a60a337b2bb8ad9839b4abf75b267a0f18806f6f4f5f1688c4b5a",
"sha256:fb5c1ad6bad98c57482236a21bf985ab0ef42bd51f7ad4e4538e89a997624e12"
],
"markers": "python_version >= '3.7'",
"version": "==9.4.0"
},
"pyparsing": {
"hashes": [
"sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb",
"sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"
],
"markers": "python_full_version >= '3.6.8'",
"version": "==3.0.9"
},
"python-dateutil": {
"hashes": [
"sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
"sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"version": "==2.8.2"
},
"pytz": {
"hashes": [
"sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0",
"sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"
],
"version": "==2022.7.1"
},
"seaborn": {
"hashes": [
"sha256:374645f36509d0dcab895cba5b47daf0586f77bfe3b36c97c607db7da5be0139",
"sha256:ebf15355a4dba46037dfd65b7350f014ceb1f13c05e814eda2c9f5fd731afc08"
],
"index": "pypi",
"version": "==0.12.2"
},
"six": {
"hashes": [
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"version": "==1.16.0"
}
},
"develop": {}
}

View File

@@ -1,39 +1,42 @@
# TikTok hashtag analysis toolset
The tool helps to download posts and videos from TikTok for a given set of hashtags over a period of time. Users can create a growing database of posts for specific hashtags which can then be used for further hashtag analysis. It uses the [tiktok-scraper](https://github.com/drawrowfly/tiktok-scraper) Node package to download the posts and videos.
[![PyPI version](https://badge.fury.io/py/tiktok-hashtag-analysis.svg)](https://badge.fury.io/py/tiktok-hashtag-analysis
## Pre-requisites
1. Make sure you have Python 3.6 or a later version installed
2. And, you need to have node version 16. On Mac, do `brew install node` followed by `npm install -g n` and then `n 16`
4. Download and install TikTok scraper: https://github.com/drawrowfly/tiktok-scraper
5. (Optional) create and activate a virtual environment for this tool, for example by executing the following command, which creates the `.env` virtual environment in the tool's root directory:
5. Install the tool with pip: `pip install tiktok-hashtag-analysis`
1. or directly from the repo version: `pip install git+https://github.com/bellingcat/tiktok-hashtag-analysis`
`python3 -m venv .env`
4. Start your virtual environment
- On Unix-like operating systems (macOS, Linux), this can be done using the command `source .env/bin/activate`
- On Windows, this can be done using the command `.env\Scripts\activate.bat`
5. Install the Python package dependencies for this tool by executing the command:
`pip install -r requirements.txt`
You should now be ready to start using the tool.
You should now be ready to start using it.
## About the tool
### Command-line arguments
```
python3 run_downloader.py --help
usage: run_downloader.py [-h] [-t [T [T ...]]] [-f F] [-p] [-v]
python3 tiktok-hashtag-analysis --help
usage: tiktok-hashtag-analysis [-h] [-t [T ...]] [-f F] [-p] [-v] [-ht HASHTAG] [-n NUMBER] [-plt] [-d] {download,frequencies}
Download the tiktoks for the requested hashtags
Analyze hashtags within posts scraped from TikTok.
optional arguments:
-h, --help show this help message and exit
-t [T [T ...]] List of hashtags to scrape
-f F File name containing list of hashtags to scrape
-p Download post data
-v Download video files
positional arguments:
{download,frequencies}
command to initialize
options:
-h, --help show this help message and exit
-t [T ...] List of hashtags to scrape (module: run_downloader)
-f F File name containing list of hashtags to scrape (module: run_downloader)
-p Download post data (module: run_downloader)
-v Download video files (module: run_downloader)
-ht HASHTAG, --hashtag HASHTAG
The hashtag of scraped posts to analyze (module: hashtag_frequencies)
-n NUMBER, --number NUMBER
The number of top n occurrences (module: hashtag_frequencies)
-plt, --plot Plot the occurrences (module: hashtag_frequencies)
-d, --print List top n hashtags (module: hashtag_frequencies)
```
### Structure of output data
@@ -61,13 +64,13 @@ The `data` folder contains all the downloaded data as shown in the tree diagram
## How to use
### Post downloading
Running the `run_downloader.py` script with the following options will scrape posts containing the hashtags `#london`, `#paris`, or `#newyork`:
Running the `tiktok-hashtag-analysis download` command with the following options will scrape posts containing the hashtags `#london`, `#paris`, or `#newyork`:
python3 run_downloader.py -t london paris newyork -p
tiktok-hashtag-analysis download -t london paris newyork -p
and will produce an output similar to the following log:
$ python3 run_downloader.py -t london paris newyork -p
$ tiktok-hashtag-analysis download -t london paris newyork -p
Hashtags to scrape: ['london', 'paris', 'newyork']
Scraped 963 posts containing the hashtag 'london'
Scraped 961 posts containing the hashtag 'paris'
@@ -78,8 +81,8 @@ and will produce an output similar to the following log:
- The `-p` flag specifies that posts, not videos, will be downloaded
### Video downloading
Running the `run_downloader.py` script with the following options will scrape trending videos containing the hashtag `#london`:
` python3 run_downloader.py -t london -v`
Running the `tiktok-hashtag-analysis download` script with the following options will scrape trending videos containing the hashtag `#london`:
`tiktok-hashtag-analysis download -t london -v`
- The `-t` flag allows a space-separated list of hashtags to be specified as a command line argument
- The `-v` flag specifies that videos, not posts, will be downloaded
@@ -88,27 +91,13 @@ Note that video downloading is a time and data rate consuming task, as a result
## Analyzing results
### Top n hashtag occurrences
The script `hashtag_frequencies.py` analyzes the frequencies of top occurring hashtags in a given set of posts.
```
$ python3 hashtag_frequencies.py --help
usage: hashtag_frequencies.py [-h] [-p] [-d] hashtag n
positional arguments:
hashtag The hashtag of scraped posts to analyze
n The number of top n occurrences
optional arguments:
-h, --help show this help message and exit
-p, --plot Plot the occurrences
-d, --print List top n hashtags
```
The script `tiktok-hashtag-analysis frequencies` analyzes the frequencies of top occurring hashtags in a given set of posts.
Assume we want to analyze the 20 most frequently occurring hashtags in the downloaded posts of the `#london` hashtag.
- The results can be plotted and saved as a PNG file by executing the following command:
`python3 hashtag_frequencies.py london 20 -p`
`tiktok-hashtag-analysis frequencies london 20 -p`
which will produce a figure similar to that shown below:
<p align="center">
@@ -119,7 +108,7 @@ Assume we want to analyze the 20 most frequently occurring hashtags in the downl
- The results can be displayed in tabular form by executing the following command:
`python3 hashtag_frequencies.py london 20 -d`
`tiktok-hashtag-analysis frequencies london 20 -d`
which will produce a terminal output similar to the following:
```

19
scripts/release.sh Normal file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
set -e
TAG=$(python -c 'from tiktok_hashtag_analysis.version import __version__; print("v" + __version__)')
read -p "Creating new release for $TAG. Do you want to continue? [Y/n] " prompt
if [[ $prompt == "y" || $prompt == "Y" || $prompt == "yes" || $prompt == "Yes" ]]; then
git add -A
git commit -m "Bump version to $TAG for release" || true && git push
echo "Creating new git tag $TAG"
git tag "$TAG" -m "$TAG"
git push --tags
else
echo "Cancelled"
exit 1
fi

31
setup.py Normal file
View File

@@ -0,0 +1,31 @@
import setuptools
from tiktok_hashtag_analysis import __version__
with open("README.md", "r", encoding="utf-8") as file:
long_description = file.read()
setuptools.setup(
name="tiktok-hashtag-analysis",
version=__version__,
author="Bellingcat",
author_email="tech@bellingcat.com",
packages=["tiktok_hashtag_analysis"],
description="Analyze hashtags within posts scraped from TikTok",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/bellingcat/tiktok-hashtag-analysis",
license="MIT License",
install_requires=["seaborn", "matplotlib"],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Information Technology',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Programming Language :: Python :: 3'
],
entry_points={
"console_scripts": [
"tiktok_hashtag_analysis=tiktok_hashtag_analysis.__main__:main",
]
},
)

View File

View File

@@ -0,0 +1,76 @@
import logging, argparse
from .file_methods import log_writer
from .run_downloader import * # Import everything from run_downloader.py
from .hashtag_frequencies import * # Import everything from hashtag_frequencies.py
logger = logging.getLogger()
def create_parser() -> argparse.ArgumentParser:
"""Create the parser and the arguments for the user input."""
parser = argparse.ArgumentParser(description="Analyze hashtags within posts scraped from TikTok.")
parser.add_argument("command", help="command to initialize", choices=['download', 'frequencies'])
parser.add_argument("-t", type=str, nargs="*", help="List of hashtags to scrape (module: run_downloader)")
parser.add_argument("-f", type=str, help="File name containing list of hashtags to scrape (module: run_downloader)")
parser.add_argument("-p", action="store_true", help="Download post data (module: run_downloader)")
parser.add_argument("-v", action="store_true", help="Download video files (module: run_downloader)")
parser.add_argument("-ht", "--hashtag", type=str,
help="The hashtag of scraped posts to analyze (module: hashtag_frequencies)", )
parser.add_argument("-n", "--number", type=int, help="The number of top n occurrences (module: hashtag_frequencies)")
parser.add_argument("-plt", "--plot", help="Plot the occurrences (module: hashtag_frequencies)", action="store_true")
parser.add_argument("-d", "--print", help="List top n hashtags (module: hashtag_frequencies)", action="store_true")
return parser
def main():
parser = create_parser()
args = parser.parse_args()
if args.command == "download":
if not (args.t or args.f):
parser.error(
"No hashtags were given, please use either the `-t` flag or the `-f` flag to specify one or more hashtags.")
if not (args.p or args.v):
parser.error(
"No argument given, please specify either the `-p` flag to download post data or the `-v` flag to download video files, or both."
)
if args.t:
hashtags = args.t
elif args.f:
file_name = args.f
hashtags = get_hashtag_list(file_name)
logger.info(f"Hashtags to scrape: {hashtags}")
if not hashtags:
raise ValueError(
"No hashtags were specified: please use either the `-t` flag to specify a sspace-separated list of one or more hashtags as a command-line argument, or use the `-f` flag to specify a text file of newline-separated hashtags.")
download_data_type = {"posts": args.p, "videos": args.v}
scraped_summary_list = get_data(hashtags, download_data_type)
if scraped_summary_list:
log_writer(scraped_summary_list)
elif args.command == "frequencies":
img_folder = IMAGES
check_file(img_folder, "dir")
if args.n < 1:
raise ValueError(
f"Specified argument `n` (the number of hashtags to analyze) must be greater than zero, not: {args.n}.")
input_file = data_file = os.path.join(
FILES["data"], args.hashtag, FILES["posts"], FILES["data_file"]
)
if not check_existence(input_file, "file"):
raise FileNotFoundError(
f"File ({input_file}) for specified argument `hashtag` ({args.hashtag}) does not exist.")
# base = os.path.splitext(input_file)[0]
# path = f"./{base}_sorted_hashtags.csv"
occs = get_occurrences(input_file, args.n)
if args.plot:
plot(occs, img_folder)
else:
print_occurrences(occs)
if __name__=="__main__":
main()

View File

@@ -4,7 +4,7 @@
from typing import NamedTuple, List, Tuple, Set, Optional, Dict, Any
import logging
import file_methods
from . import file_methods
logger = logging.getLogger()

View File

@@ -4,13 +4,14 @@
import os
import json
import subprocess
from os import path
from datetime import datetime
import shutil
from typing import Tuple, List, Optional, Dict, Any
import logging, logging.config
logging.config.fileConfig("../logging.config")
logging.config.fileConfig(path.join(path.dirname(path.abspath(__file__)), 'logging.config'))
logger = logging.getLogger("Logger")

View File

@@ -5,43 +5,19 @@
- Specifying the "-d" flag prints the hashtag frequencies on the shell
- Specifying the "-p" flag plots the hashtag frequencies and saves as a png file
"""
import os
import json
import argparse
from datetime import datetime
import warnings
from typing import List, Tuple, Dict, Any
import logging
from typing import List, Tuple, Dict, Any
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from file_methods import check_file, check_existence
from global_data import IMAGES, FILES
from tiktok_hashtag_analysis.file_methods import check_file, check_existence
from tiktok_hashtag_analysis.global_data import IMAGES, FILES
warnings.filterwarnings("ignore", message="Glyph (.*) missing from current font")
sns.set_theme(style="darkgrid")
logger = logging.getLogger()
def create_parser() -> argparse.ArgumentParser:
"""Create the parser and the arguments for the user input."""
parser = argparse.ArgumentParser()
parser.add_argument(
"hashtag",
type=str,
help="The hashtag of scraped posts to analyze",
)
parser.add_argument("n", type=int, help="The number of top n occurrences")
parser.add_argument(
"-p", "--plot", help="Plot the occurrences", action="store_true"
)
parser.add_argument(
"-d", "--print", help="List top n hashtags", action="store_true"
)
return parser
def get_hashtags(obj: Dict) -> List[Tuple[str, int]]:
@@ -123,30 +99,3 @@ def print_occurrences(occs):
print("{:<8} {:<30} {:<15} {:.4f}".format(row_number, key, value, ratio))
row_number += 1
print(f"Total posts: {total_posts}")
if __name__ == "__main__":
img_folder = IMAGES
check_file(img_folder, "dir")
parser = create_parser()
args = parser.parse_args()
if args.n < 1:
raise ValueError(
f"Specified argument `n` (the number of hashtags to analyze) must be greater than zero, not: {args.n}."
)
input_file = data_file = os.path.join(
FILES["data"], args.hashtag, FILES["posts"], FILES["data_file"]
)
if not check_existence(input_file, "file"):
raise FileNotFoundError(
f"File ({input_file}) for specified argument `hashtag` ({args.hashtag}) does not exist."
)
base = os.path.splitext(input_file)[0]
path = f"./{base}_sorted_hashtags.csv"
occs = get_occurrences(input_file, args.n)
if args.plot:
plot(occs, img_folder)
else:
print_occurrences(occs)

View File

@@ -9,32 +9,11 @@
import os
import time
import argparse
import logging
from typing import List, Tuple, Dict, Any, Optional
from tempfile import TemporaryDirectory
import global_data
import file_methods
import data_methods
logger = logging.getLogger()
def create_parser() -> argparse.ArgumentParser:
"""Create the parser and the arguments for the user input."""
parser = argparse.ArgumentParser(
description="Download the tiktoks for the requested hashtags"
)
parser.add_argument("-t", type=str, nargs="*", help="List of hashtags to scrape")
parser.add_argument(
"-f", type=str, help="File name containing list of hashtags to scrape"
)
parser.add_argument("-p", action="store_true", help="Download post data")
parser.add_argument("-v", action="store_true", help="Download video files")
return parser
import tiktok_hashtag_analysis.global_data
import tiktok_hashtag_analysis.file_methods as file_methods
import tiktok_hashtag_analysis.data_methods
def get_hashtag_list(file_name: str) -> List[str]:
@@ -74,7 +53,7 @@ def get_posts(settings: dict, tag: str) -> Optional[Tuple[str, int]]:
1. Calls `file_methods.download_posts` to scrape the post data for a given hashtag
2. Calls `data_methods.extract_posts` to determine which if any posts
haven't previouly been downloaded.
haven't previously been downloaded.
3. Calls `data_methods.update_posts` to update the ID list with the IDs of
newly downloaded posts.
"""
@@ -119,7 +98,7 @@ def get_videos(settings: dict, tag: str) -> Optional[Tuple[str, int]]:
def get_data(
hashtags: list, download_data_type: Dict[str, bool]
hashtags: list, download_data_type: Dict[str, bool]
) -> List[Tuple[str, Tuple[str, int]]]:
"""Check command-line arguments and scrape posts/videos for specified hashtags."""
counter = 0
@@ -169,36 +148,3 @@ def get_data(
time.sleep(settings["sleep"])
return scraped_summary_list
if __name__ == "__main__":
parser = create_parser()
args = parser.parse_args()
if not (args.t or args.f):
parser.error(
"No hashtags were given, please use either the `-t` flag or the `-f` flag to specify one or more hashtags."
)
if not (args.p or args.v):
parser.error(
"No argument given, please specify either the `-p` flag to download post data or the `-v` flag to download video files, or both."
)
if args.t:
hashtags = args.t
elif args.f:
file_name = args.f
hashtags = get_hashtag_list(file_name)
logger.info(f"Hashtags to scrape: {hashtags}")
if not hashtags:
raise ValueError(
"No hashtags were specified: please use either the `-t` flag to specify a sspace-separated list of one or more hashtags as a command-line argument, or use the `-f` flag to specify a text file of newline-separated hashtags."
)
download_data_type = {"posts": args.p, "videos": args.v}
scraped_summary_list = get_data(hashtags, download_data_type)
if scraped_summary_list:
file_methods.log_writer(scraped_summary_list)

View File