Compare commits

...

15 Commits

Author SHA1 Message Date
msramalho
8e5fba712c archival notification 2025-05-05 11:45:33 +01:00
msramalho
f522f891cb Bump version to v0.3.34 for release 2025-05-05 11:26:10 +01:00
msramalho
743ca9c165 adapting CD 2025-05-05 11:25:58 +01:00
msramalho
2130a33829 Bump version to v0.3.33 for release 2025-05-05 11:20:03 +01:00
msramalho
3d5b6de557 release requires build 2025-05-05 11:19:57 +01:00
msramalho
b9a6b2b747 Bump version to v0.3.32 for release 2025-05-05 11:17:54 +01:00
msramalho
d948044ae9 removes need for checks for release, as yt will always fail 2025-05-05 11:17:39 +01:00
msramalho
49c254a413 version bump 2025-05-05 11:02:55 +01:00
msramalho
d840b280d7 removes unnecessary dependency and does minor cleanup 2025-05-05 11:01:50 +01:00
Miguel Sozinho Ramalho
e6c98c73ea Merge pull request #65 from bellingcat/migrate-gh-artifacts-to-v4
migrate gh artifact actions to v4
2025-01-09 15:16:05 +00:00
Miguel Sozinho Ramalho
e6fdd54518 cache v2 is also being deprecated
https://github.com/actions/cache/discussions/1510
2025-01-09 15:05:01 +00:00
Miguel Sozinho Ramalho
f61204c4b1 Update main.yml
from migration guide no breaking changes apply here.
2025-01-09 14:57:37 +00:00
msramalho
ea834c37e2 improved documentation of vk-api dependency 2024-07-16 16:40:04 +01:00
msramalho
3e22709430 Bump version to v0.3.30 for release 2024-07-16 16:18:32 +01:00
msramalho
9c7eadc716 attempts to circumvent pypi not allowing repo reference 2024-07-16 16:18:23 +01:00
8 changed files with 1511 additions and 1581 deletions

View File

@@ -31,7 +31,7 @@ runs:
# Get the exact Python version to use in the cache key. # Get the exact Python version to use in the cache key.
echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV
- uses: actions/cache@v2 - uses: actions/cache@v4
id: virtualenv-cache id: virtualenv-cache
with: with:
path: .venv path: .venv

View File

@@ -79,10 +79,11 @@ jobs:
run: | run: |
. .venv/bin/activate . .venv/bin/activate
${{ matrix.task.run }} ${{ matrix.task.run }}
continue-on-error: ${{ matrix.task.name != 'Build' }}
- name: Upload package distribution files - name: Upload package distribution files
if: matrix.task.name == 'Build' if: matrix.task.name == 'Build'
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: package name: package
path: dist path: dist
@@ -117,7 +118,7 @@ jobs:
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
- name: Download package distribution files - name: Download package distribution files
uses: actions/download-artifact@v3 uses: actions/download-artifact@v4
with: with:
name: package name: package
path: dist path: dist

24
Pipfile
View File

@@ -5,6 +5,15 @@ name = "pypi"
[packages] [packages]
yt-dlp = ">=2023.2.17" yt-dlp = ">=2023.2.17"
certifi = ">=2023.7.22"
charset-normalizer = ">=3.0.1"
idna = ">=3.4"
mutagen = ">=1.46.0"
pycryptodomex = ">=3.17"
requests = ">=2.28.2"
urllib3 = ">=1.26.14"
websockets = ">=10.4"
vk-api = {ref = "b99dac0ec2f832a6c4b20bde49869e7229ce4742", git = "git+https://github.com/python273/vk_api.git"}
flake8 = "*" flake8 = "*"
mypy = ">=0.961" mypy = ">=0.961"
black = ">=22.3.0" black = ">=22.3.0"
@@ -19,19 +28,8 @@ myst-parser = "<0.19.0,>=0.15.2"
sphinx-copybutton = ">=0.5.0" sphinx-copybutton = ">=0.5.0"
sphinx-autobuild = ">=2021.3.14" sphinx-autobuild = ">=2021.3.14"
sphinx-autodoc-typehints = "*" sphinx-autodoc-typehints = "*"
packaging = "*"
python-dotenv = ">=0.21.1" python-dotenv = ">=0.21.1"
brotli = ">=1.0.9"
certifi = ">=2023.7.22"
charset-normalizer = ">=3.0.1"
idna = ">=3.4"
mutagen = ">=1.46.0"
pycryptodomex = ">=3.17"
requests = ">=2.28.2"
urllib3 = ">=1.26.14"
websockets = ">=10.4"
# vk-api = {ref = "77b5a0d51a6bbf54d59554332f28a488615fbd6c", git = "git+https://github.com/python273/vk_api.git"}
# vk-api = "*"
vk-api = {ref = "b99dac0ec2f832a6c4b20bde49869e7229ce4742", git = "git+https://github.com/python273/vk_api.git"}
[dev-packages] [dev-packages]
sphinx-copybutton = "==0.5.0" sphinx-copybutton = "==0.5.0"
@@ -51,7 +49,7 @@ sphinx-autodoc-typehints = "*"
python-dotenv = "*" python-dotenv = "*"
[requires] [requires]
python_version = "3.9" python_version = "3.11"
[pipenv] [pipenv]
allow_prereleases = true allow_prereleases = true

2914
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
# vk-url-scraper # vk-url-scraper
Python library to scrape data, and especially media links like videos and photos, from vk.com URLs. Python library to scrape data, and especially media links like videos and photos, from vk.com URLs.
> This repo has been archived because it relies on a fixed git commit of the vk_api library which we can no longer publish to pypi, see [issue](https://github.com/bellingcat/vk-url-scraper/issues/66). You can still install the latest install. This archived state may change if a solution is found to publish the library to pypi again.
[![PyPI version](https://badge.fury.io/py/vk-url-scraper.svg)](https://badge.fury.io/py/vk-url-scraper) [![PyPI version](https://badge.fury.io/py/vk-url-scraper.svg)](https://badge.fury.io/py/vk-url-scraper)
[![PyPI download month](https://img.shields.io/pypi/dm/vk-url-scraper.svg)](https://pypi.python.org/pypi/vk-url-scraper/) [![PyPI download month](https://img.shields.io/pypi/dm/vk-url-scraper.svg)](https://pypi.python.org/pypi/vk-url-scraper/)
@@ -12,6 +13,12 @@ You can use it via the [command line](#command-line-usage) or as a [python libra
## Installation ## Installation
You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`. You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`.
Currently you need to manually unsintall and re-install one dependency (as it is installed from github and not pypi):
```bash
pip uninstall vk-api
pip install git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742
```
To use the library you will need a valid username/password combination for vk.com. To use the library you will need a valid username/password combination for vk.com.
## Command line usage ## Command line usage
@@ -84,8 +91,9 @@ see [docs] for all available functions.
## Development ## Development
(more info in [CONTRIBUTING.md](CONTRIBUTING.md)). (more info in [CONTRIBUTING.md](CONTRIBUTING.md)).
1. setup dev environment with `pip install -r dev-requirements.txt` or `pipenv install -r dev-requirements.txt` 1. setup dev environment with `pipenv install --dev`
1. setup environment with `pip install -r requirements.txt` or `pipenv install -r requirements.txt` 1. setup environment with `pipenv install -r requirements.txt`
1. Activate the environment with `pipenv shell` (or prepend `pipenv run` to all commands)
2. To run all checks to `make run-checks` (fixes style) or individually 2. To run all checks to `make run-checks` (fixes style) or individually
1. To fix style: `black .` and `isort .` -> `flake8 .` to validate lint 1. To fix style: `black .` and `isort .` -> `flake8 .` to validate lint
2. To do type checking: `mypy .` 2. To do type checking: `mypy .`
@@ -98,6 +106,7 @@ To test the command line interface available in [__main__.py](__vk_url_scraper/_
## Releasing new version ## Releasing new version
1. edit [version.py](vk_url_scraper/version.py) with proper versioning 1. edit [version.py](vk_url_scraper/version.py) with proper versioning
2. make sure to run `pipenv run pip freeze > requirements.txt` if you manage libs with pipenv 2. make sure to run `pipenv run pip freeze > requirements.txt` if you manage libs with pipenv
1. if the hardcoded version of [vk_api](https://github.com/python273/vk_api) is still being used, then you must comment/remove that line from the generated requirements file and instruct users to manually install the version from the source as pypi does not allow repo/commit tags. Additionally, add the latest released version, currently `vk-api==11.9.9`.
3. run `./scripts/release.sh` to create a tag and push, alternatively 3. run `./scripts/release.sh` to create a tag and push, alternatively
1. `git tag vx.y.z` to tag version 1. `git tag vx.y.z` to tag version
2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/) 2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)

View File

@@ -1,103 +1,87 @@
aiohttp==3.9.1
aiosignal==1.3.1
alabaster==0.7.16 alabaster==0.7.16
anyio==4.4.0 anyio==4.9.0
async-timeout==4.0.3 babel==2.17.0
attrs==23.2.0
Babel==2.15.0
backports.tarfile==1.2.0 backports.tarfile==1.2.0
beautifulsoup4==4.13.0b2 beautifulsoup4==4.13.4
black==24.4.2 black==25.1.0
bleach==6.0.0 certifi==2025.4.26
Brotli==1.1.0 cffi==1.17.1
certifi==2024.7.4 charset-normalizer==3.4.2
cffi==1.17.0rc1 click==8.1.8
charset-normalizer==3.3.2
click==8.1.7
colorama==0.4.6 colorama==0.4.6
commonmark==0.9.1 coverage==7.8.0
coverage==7.6.0 cryptography==44.0.3
cryptography==42.0.8
docutils==0.18.1 docutils==0.18.1
exceptiongroup==1.2.2 flake8==7.2.0
flake8==7.1.0
frozenlist==1.4.1
furo==2023.3.27 furo==2023.3.27
h11==0.14.0 h11==0.16.0
idna==3.7 id==1.5.0
idna==3.10
imagesize==1.4.1 imagesize==1.4.1
importlib_metadata==8.0.0 importlib_metadata==8.7.0
iniconfig==2.0.0 iniconfig==2.1.0
isort==6.0.0b2 isort==6.0.1
jaraco.classes==3.4.0 jaraco.classes==3.4.0
jaraco.context==5.3.0 jaraco.context==6.0.1
jaraco.functools==4.0.1 jaraco.functools==4.1.0
jeepney==0.8.0 jeepney==0.9.0
Jinja2==3.1.4 Jinja2==3.1.6
keyring==25.2.1 keyring==25.6.0
livereload==2.6.3 livereload==2.7.1
markdown-it-py==2.2.0 markdown-it-py==2.2.0
MarkupSafe==2.1.5 MarkupSafe==3.0.2
mccabe==0.7.0 mccabe==0.7.0
mdit-py-plugins==0.3.5 mdit-py-plugins==0.3.5
mdurl==0.1.2 mdurl==0.1.2
more-itertools==10.3.0 more-itertools==10.7.0
multidict==6.0.4
mutagen==1.47.0 mutagen==1.47.0
mypy==1.10.1 mypy==1.15.0
mypy-extensions==1.0.0 mypy_extensions==1.1.0
myst-parser==0.18.1 myst-parser==0.18.1
nh3==0.2.18 nh3==0.2.21
packaging==24.1 packaging==25.0
pathspec==0.12.1 pathspec==0.12.1
pkginfo==1.10.0 pkginfo==1.10.0
platformdirs==4.2.2 platformdirs==4.3.7
pluggy==1.5.0 pluggy==1.5.0
py==1.11.0 pycodestyle==2.13.0
pycodestyle==2.12.0
pycparser==2.22 pycparser==2.22
pycryptodomex==3.20.0 pycryptodomex==3.22.0
pyflakes==3.2.0 pyflakes==3.3.2
Pygments==2.18.0 Pygments==2.19.1
pyparsing==3.0.9 pytest==8.3.5
pytest==8.2.2 pytest-cov==6.1.1
pytest-cov==5.0.0
pytest-sphinx==0.6.3 pytest-sphinx==0.6.3
python-dotenv==1.0.1 python-dotenv==1.1.0
pytz==2022.1 PyYAML==6.0.2
PyYAML==6.0.2rc1
readme_renderer==43.0 readme_renderer==43.0
requests==2.32.3 requests==2.32.3
requests-toolbelt==1.0.0 requests-toolbelt==1.0.0
rfc3986==2.0.0 rfc3986==2.0.0
rich==13.7.1 rich==14.0.0
SecretStorage==3.3.3 SecretStorage==3.3.3
six==1.16.0
sniffio==1.3.1 sniffio==1.3.1
snowballstemmer==2.2.0 snowballstemmer==2.2.0
soupsieve==2.5 soupsieve==2.7
Sphinx==5.0.2 Sphinx==5.0.2
sphinx-autobuild==2024.4.16 sphinx-autobuild==2024.10.3
sphinx-autodoc-typehints==1.19.1 sphinx-autodoc-typehints==1.19.1
sphinx-basic-ng==1.0.0b2 sphinx-basic-ng==1.0.0b2
sphinx-copybutton==0.5.2 sphinx-copybutton==0.5.2
sphinxcontrib-applehelp==1.0.8 sphinxcontrib-applehelp==2.0.0
sphinxcontrib-devhelp==1.0.6 sphinxcontrib-devhelp==2.0.0
sphinxcontrib-htmlhelp==2.0.5 sphinxcontrib-htmlhelp==2.1.0
sphinxcontrib-jsmath==1.0.1 sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.7 sphinxcontrib-qthelp==2.0.0
sphinxcontrib-serializinghtml==1.1.10 sphinxcontrib-serializinghtml==2.0.0
starlette==0.37.2 starlette==0.46.2
tomli==2.0.1 tornado==6.5b1
tornado==6.4 twine==6.1.0
twine==5.1.1 typing_extensions==4.13.2
typing_extensions==4.12.2 urllib3==2.4.0
urllib3==2.2.2 uvicorn==0.34.2
uvicorn==0.30.1 vk_api @ git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742
vk-api @ git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742 watchfiles==1.0.5
watchfiles==0.22.0 websockets==15.0.1
webencodings==0.5.1 yt-dlp==2025.5.3.232917.dev0
websockets==12.0 zipp==3.21.0
yarl==1.9.4
yt-dlp==2024.7.15.232803.dev0
zipp==3.19.2

View File

@@ -81,7 +81,7 @@ def test_scrape_wall_url_with_photos():
== "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея." == "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея."
) )
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24)) assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24))
assert len(res[0]["payload"]) == 18 assert len(res[0]["payload"]) == 19
assert len(res[0]["attachments"].keys()) == 1 assert len(res[0]["attachments"].keys()) == 1
assert list(res[0]["attachments"].keys()) == ["photo"] assert list(res[0]["attachments"].keys()) == ["photo"]
assert len(res[0]["attachments"]["photo"]) == 9 assert len(res[0]["attachments"]["photo"]) == 9

View File

@@ -2,7 +2,7 @@ _MAJOR = "0"
_MINOR = "3" _MINOR = "3"
# On main and in a nightly release the patch should be one ahead of the last # On main and in a nightly release the patch should be one ahead of the last
# released build. # released build.
_PATCH = "29" _PATCH = "34"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See # This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics. # https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = "" _SUFFIX = ""