diff --git a/.github/workflows/python-publish.yaml b/.github/workflows/python-publish.yaml index 5ce8e63..83d16e0 100644 --- a/.github/workflows/python-publish.yaml +++ b/.github/workflows/python-publish.yaml @@ -33,15 +33,12 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade --upgrade-strategy=eager pip setuptools wheel twine pipenv + python -m pip install --upgrade --upgrade-strategy=eager pip setuptools wheel twine python -m pip install -e . --upgrade - python -m pipenv install --dev --python 3.10 - env: - PIPENV_DEFAULT_PYTHON_VERSION: "3.10" - name: Build wheels run: | - python -m pipenv run python setup.py sdist bdist_wheel + python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 525e540..d5095d6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ data/ build/ *.egg-info/ +dist/ # Miscellaneous files **/.DS_Store diff --git a/README.md b/README.md index 2c51e2e..5f891ac 100644 --- a/README.md +++ b/README.md @@ -132,3 +132,18 @@ Assume we want to analyze the 20 most frequently co-occurring hashtags in the do ``` The `Frequency` column shows the ratio of the occurrence to the total number of downloaded posts. + +### Contributing +To run the build-in tests in the `tests/` directory, first install the test dependency packages: + +``` +pip install .[test] +``` + +and then run the tests using the following command: + +``` +pytest +``` + +This repo uses [black](https://github.com/psf/black) to format source code, please run the `black` command before submitting a PR. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e4144ef --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +seaborn==0.12.2 +matplotlib==3.7.2 +yt-dlp==2023.7.6 +TikTokApi==6.1.1 +requests==2.31.0 \ No newline at end of file diff --git a/setup.py b/setup.py index f5d5377..5760f41 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,42 @@ from setuptools import setup -from tiktok_hashtag_analysis import __version__ + + +def read_requirements(filename: str): + with open(filename) as requirements_file: + import re + + def fix_url_dependencies(req: str) -> str: + """Pip and setuptools disagree about how URL dependencies should be handled.""" + m = re.match( + r"^(git\+)?(https|ssh)://(git@)?github\.com/([\w-]+)/(?P[\w-]+)\.git", + req, + ) + if m is None: + return req + else: + return f"{m.group('name')} @ {req}" + + requirements = [] + for line in requirements_file: + line = line.strip() + if line.startswith("#") or len(line) <= 0: + continue + requirements.append(fix_url_dependencies(line)) + return requirements + with open("README.md", "r", encoding="utf-8") as file: long_description = file.read() +# version.py defines the VERSION and VERSION_SHORT variables. +# We use exec here so we don't import cached_path whilst setting up. +VERSION = {} # type: ignore +with open("tiktok_hashtag_analysis/version.py", "r") as version_file: + exec(version_file.read(), VERSION) + setup( name="tiktok-hashtag-analysis", - version=__version__, + version=VERSION["VERSION"], author="Bellingcat", author_email="tech@bellingcat.com", packages=["tiktok_hashtag_analysis"], @@ -15,7 +45,9 @@ setup( long_description_content_type="text/markdown", url="https://github.com/bellingcat/tiktok-hashtag-analysis", license="MIT License", - install_requires=["seaborn", "matplotlib", "TikTokApi", "requests", "yt-dlp"], + # install_requires=read_requirements("requirements.txt"), + # extras_require={"dev": read_requirements("dev-requirements.txt")}, + install_requires=["seaborn", "matplotlib", "TikTokApi", "requests", "yt_dlp"], extras_require={"test": ["pytest", "pytest-cov", "pytest-html", "pytest-metadata"]}, classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/tiktok_hashtag_analysis/__init__.py b/tiktok_hashtag_analysis/__init__.py index 7a97c27..eea2898 100644 --- a/tiktok_hashtag_analysis/__init__.py +++ b/tiktok_hashtag_analysis/__init__.py @@ -1,3 +1 @@ -__version__ = "2.0.0" - from .base import TikTokDownloader diff --git a/tiktok_hashtag_analysis/auth.py b/tiktok_hashtag_analysis/auth.py index 25c2222..545e2ce 100644 --- a/tiktok_hashtag_analysis/auth.py +++ b/tiktok_hashtag_analysis/auth.py @@ -15,6 +15,7 @@ class Authorization: self.config_file = Path.home() / ".tiktok" self.section = "TikTok" + self.ms_token = None def get_token(self) -> str: """Load the "msToken" cookie taken from TikTok, which the scraper requires.""" diff --git a/tiktok_hashtag_analysis/base.py b/tiktok_hashtag_analysis/base.py index c6aed7e..d7a9e9e 100644 --- a/tiktok_hashtag_analysis/base.py +++ b/tiktok_hashtag_analysis/base.py @@ -192,7 +192,10 @@ class TikTokDownloader: # Download video files for all video posts if len(urls_to_download) > 0: logging.info(f"Downloading media for hashtag {hashtag}") - ydl_opts = {"outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s"), "ignore_errors": True} + ydl_opts = { + "outtmpl": os.path.join(video_dir, "%(id)s.%(ext)s"), + "ignore_errors": True, + } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(urls_to_download) diff --git a/tiktok_hashtag_analysis/version.py b/tiktok_hashtag_analysis/version.py new file mode 100644 index 0000000..aba80f2 --- /dev/null +++ b/tiktok_hashtag_analysis/version.py @@ -0,0 +1,11 @@ +_MAJOR = "2" +_MINOR = "0" +# On main and in a nightly release the patch should be one ahead of the last +# released build. +_PATCH = "0" +# This is mainly for nightly builds which have the suffix ".dev$DATE". See +# https://semver.org/#is-v123-a-semantic-version for the semantics. +_SUFFIX = "" + +VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR) +VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)