mirror of
https://github.com/bellingcat/vk-url-scraper.git
synced 2026-06-11 21:08:37 +03:00
Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e5fba712c | ||
|
|
f522f891cb | ||
|
|
743ca9c165 | ||
|
|
2130a33829 | ||
|
|
3d5b6de557 | ||
|
|
b9a6b2b747 | ||
|
|
d948044ae9 | ||
|
|
49c254a413 | ||
|
|
d840b280d7 | ||
|
|
e6c98c73ea | ||
|
|
e6fdd54518 | ||
|
|
f61204c4b1 | ||
|
|
ea834c37e2 | ||
|
|
3e22709430 | ||
|
|
9c7eadc716 |
2
.github/actions/setup-venv/action.yml
vendored
2
.github/actions/setup-venv/action.yml
vendored
@@ -31,7 +31,7 @@ runs:
|
|||||||
# Get the exact Python version to use in the cache key.
|
# Get the exact Python version to use in the cache key.
|
||||||
echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV
|
echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV
|
||||||
|
|
||||||
- uses: actions/cache@v2
|
- uses: actions/cache@v4
|
||||||
id: virtualenv-cache
|
id: virtualenv-cache
|
||||||
with:
|
with:
|
||||||
path: .venv
|
path: .venv
|
||||||
|
|||||||
5
.github/workflows/main.yml
vendored
5
.github/workflows/main.yml
vendored
@@ -79,10 +79,11 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
. .venv/bin/activate
|
. .venv/bin/activate
|
||||||
${{ matrix.task.run }}
|
${{ matrix.task.run }}
|
||||||
|
continue-on-error: ${{ matrix.task.name != 'Build' }}
|
||||||
|
|
||||||
- name: Upload package distribution files
|
- name: Upload package distribution files
|
||||||
if: matrix.task.name == 'Build'
|
if: matrix.task.name == 'Build'
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: package
|
name: package
|
||||||
path: dist
|
path: dist
|
||||||
@@ -117,7 +118,7 @@ jobs:
|
|||||||
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Download package distribution files
|
- name: Download package distribution files
|
||||||
uses: actions/download-artifact@v3
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: package
|
name: package
|
||||||
path: dist
|
path: dist
|
||||||
|
|||||||
24
Pipfile
24
Pipfile
@@ -5,6 +5,15 @@ name = "pypi"
|
|||||||
|
|
||||||
[packages]
|
[packages]
|
||||||
yt-dlp = ">=2023.2.17"
|
yt-dlp = ">=2023.2.17"
|
||||||
|
certifi = ">=2023.7.22"
|
||||||
|
charset-normalizer = ">=3.0.1"
|
||||||
|
idna = ">=3.4"
|
||||||
|
mutagen = ">=1.46.0"
|
||||||
|
pycryptodomex = ">=3.17"
|
||||||
|
requests = ">=2.28.2"
|
||||||
|
urllib3 = ">=1.26.14"
|
||||||
|
websockets = ">=10.4"
|
||||||
|
vk-api = {ref = "b99dac0ec2f832a6c4b20bde49869e7229ce4742", git = "git+https://github.com/python273/vk_api.git"}
|
||||||
flake8 = "*"
|
flake8 = "*"
|
||||||
mypy = ">=0.961"
|
mypy = ">=0.961"
|
||||||
black = ">=22.3.0"
|
black = ">=22.3.0"
|
||||||
@@ -19,19 +28,8 @@ myst-parser = "<0.19.0,>=0.15.2"
|
|||||||
sphinx-copybutton = ">=0.5.0"
|
sphinx-copybutton = ">=0.5.0"
|
||||||
sphinx-autobuild = ">=2021.3.14"
|
sphinx-autobuild = ">=2021.3.14"
|
||||||
sphinx-autodoc-typehints = "*"
|
sphinx-autodoc-typehints = "*"
|
||||||
|
packaging = "*"
|
||||||
python-dotenv = ">=0.21.1"
|
python-dotenv = ">=0.21.1"
|
||||||
brotli = ">=1.0.9"
|
|
||||||
certifi = ">=2023.7.22"
|
|
||||||
charset-normalizer = ">=3.0.1"
|
|
||||||
idna = ">=3.4"
|
|
||||||
mutagen = ">=1.46.0"
|
|
||||||
pycryptodomex = ">=3.17"
|
|
||||||
requests = ">=2.28.2"
|
|
||||||
urllib3 = ">=1.26.14"
|
|
||||||
websockets = ">=10.4"
|
|
||||||
# vk-api = {ref = "77b5a0d51a6bbf54d59554332f28a488615fbd6c", git = "git+https://github.com/python273/vk_api.git"}
|
|
||||||
# vk-api = "*"
|
|
||||||
vk-api = {ref = "b99dac0ec2f832a6c4b20bde49869e7229ce4742", git = "git+https://github.com/python273/vk_api.git"}
|
|
||||||
|
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
sphinx-copybutton = "==0.5.0"
|
sphinx-copybutton = "==0.5.0"
|
||||||
@@ -51,7 +49,7 @@ sphinx-autodoc-typehints = "*"
|
|||||||
python-dotenv = "*"
|
python-dotenv = "*"
|
||||||
|
|
||||||
[requires]
|
[requires]
|
||||||
python_version = "3.9"
|
python_version = "3.11"
|
||||||
|
|
||||||
[pipenv]
|
[pipenv]
|
||||||
allow_prereleases = true
|
allow_prereleases = true
|
||||||
|
|||||||
2914
Pipfile.lock
generated
2914
Pipfile.lock
generated
File diff suppressed because it is too large
Load Diff
13
README.md
13
README.md
@@ -1,6 +1,7 @@
|
|||||||
# vk-url-scraper
|
# vk-url-scraper
|
||||||
Python library to scrape data, and especially media links like videos and photos, from vk.com URLs.
|
Python library to scrape data, and especially media links like videos and photos, from vk.com URLs.
|
||||||
|
|
||||||
|
> This repo has been archived because it relies on a fixed git commit of the vk_api library which we can no longer publish to pypi, see [issue](https://github.com/bellingcat/vk-url-scraper/issues/66). You can still install the latest install. This archived state may change if a solution is found to publish the library to pypi again.
|
||||||
|
|
||||||
[](https://badge.fury.io/py/vk-url-scraper)
|
[](https://badge.fury.io/py/vk-url-scraper)
|
||||||
[](https://pypi.python.org/pypi/vk-url-scraper/)
|
[](https://pypi.python.org/pypi/vk-url-scraper/)
|
||||||
@@ -12,6 +13,12 @@ You can use it via the [command line](#command-line-usage) or as a [python libra
|
|||||||
## Installation
|
## Installation
|
||||||
You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`.
|
You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`.
|
||||||
|
|
||||||
|
Currently you need to manually unsintall and re-install one dependency (as it is installed from github and not pypi):
|
||||||
|
```bash
|
||||||
|
pip uninstall vk-api
|
||||||
|
pip install git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742
|
||||||
|
```
|
||||||
|
|
||||||
To use the library you will need a valid username/password combination for vk.com.
|
To use the library you will need a valid username/password combination for vk.com.
|
||||||
|
|
||||||
## Command line usage
|
## Command line usage
|
||||||
@@ -84,8 +91,9 @@ see [docs] for all available functions.
|
|||||||
## Development
|
## Development
|
||||||
(more info in [CONTRIBUTING.md](CONTRIBUTING.md)).
|
(more info in [CONTRIBUTING.md](CONTRIBUTING.md)).
|
||||||
|
|
||||||
1. setup dev environment with `pip install -r dev-requirements.txt` or `pipenv install -r dev-requirements.txt`
|
1. setup dev environment with `pipenv install --dev`
|
||||||
1. setup environment with `pip install -r requirements.txt` or `pipenv install -r requirements.txt`
|
1. setup environment with `pipenv install -r requirements.txt`
|
||||||
|
1. Activate the environment with `pipenv shell` (or prepend `pipenv run` to all commands)
|
||||||
2. To run all checks to `make run-checks` (fixes style) or individually
|
2. To run all checks to `make run-checks` (fixes style) or individually
|
||||||
1. To fix style: `black .` and `isort .` -> `flake8 .` to validate lint
|
1. To fix style: `black .` and `isort .` -> `flake8 .` to validate lint
|
||||||
2. To do type checking: `mypy .`
|
2. To do type checking: `mypy .`
|
||||||
@@ -98,6 +106,7 @@ To test the command line interface available in [__main__.py](__vk_url_scraper/_
|
|||||||
## Releasing new version
|
## Releasing new version
|
||||||
1. edit [version.py](vk_url_scraper/version.py) with proper versioning
|
1. edit [version.py](vk_url_scraper/version.py) with proper versioning
|
||||||
2. make sure to run `pipenv run pip freeze > requirements.txt` if you manage libs with pipenv
|
2. make sure to run `pipenv run pip freeze > requirements.txt` if you manage libs with pipenv
|
||||||
|
1. if the hardcoded version of [vk_api](https://github.com/python273/vk_api) is still being used, then you must comment/remove that line from the generated requirements file and instruct users to manually install the version from the source as pypi does not allow repo/commit tags. Additionally, add the latest released version, currently `vk-api==11.9.9`.
|
||||||
3. run `./scripts/release.sh` to create a tag and push, alternatively
|
3. run `./scripts/release.sh` to create a tag and push, alternatively
|
||||||
1. `git tag vx.y.z` to tag version
|
1. `git tag vx.y.z` to tag version
|
||||||
2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)
|
2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)
|
||||||
|
|||||||
130
requirements.txt
130
requirements.txt
@@ -1,103 +1,87 @@
|
|||||||
aiohttp==3.9.1
|
|
||||||
aiosignal==1.3.1
|
|
||||||
alabaster==0.7.16
|
alabaster==0.7.16
|
||||||
anyio==4.4.0
|
anyio==4.9.0
|
||||||
async-timeout==4.0.3
|
babel==2.17.0
|
||||||
attrs==23.2.0
|
|
||||||
Babel==2.15.0
|
|
||||||
backports.tarfile==1.2.0
|
backports.tarfile==1.2.0
|
||||||
beautifulsoup4==4.13.0b2
|
beautifulsoup4==4.13.4
|
||||||
black==24.4.2
|
black==25.1.0
|
||||||
bleach==6.0.0
|
certifi==2025.4.26
|
||||||
Brotli==1.1.0
|
cffi==1.17.1
|
||||||
certifi==2024.7.4
|
charset-normalizer==3.4.2
|
||||||
cffi==1.17.0rc1
|
click==8.1.8
|
||||||
charset-normalizer==3.3.2
|
|
||||||
click==8.1.7
|
|
||||||
colorama==0.4.6
|
colorama==0.4.6
|
||||||
commonmark==0.9.1
|
coverage==7.8.0
|
||||||
coverage==7.6.0
|
cryptography==44.0.3
|
||||||
cryptography==42.0.8
|
|
||||||
docutils==0.18.1
|
docutils==0.18.1
|
||||||
exceptiongroup==1.2.2
|
flake8==7.2.0
|
||||||
flake8==7.1.0
|
|
||||||
frozenlist==1.4.1
|
|
||||||
furo==2023.3.27
|
furo==2023.3.27
|
||||||
h11==0.14.0
|
h11==0.16.0
|
||||||
idna==3.7
|
id==1.5.0
|
||||||
|
idna==3.10
|
||||||
imagesize==1.4.1
|
imagesize==1.4.1
|
||||||
importlib_metadata==8.0.0
|
importlib_metadata==8.7.0
|
||||||
iniconfig==2.0.0
|
iniconfig==2.1.0
|
||||||
isort==6.0.0b2
|
isort==6.0.1
|
||||||
jaraco.classes==3.4.0
|
jaraco.classes==3.4.0
|
||||||
jaraco.context==5.3.0
|
jaraco.context==6.0.1
|
||||||
jaraco.functools==4.0.1
|
jaraco.functools==4.1.0
|
||||||
jeepney==0.8.0
|
jeepney==0.9.0
|
||||||
Jinja2==3.1.4
|
Jinja2==3.1.6
|
||||||
keyring==25.2.1
|
keyring==25.6.0
|
||||||
livereload==2.6.3
|
livereload==2.7.1
|
||||||
markdown-it-py==2.2.0
|
markdown-it-py==2.2.0
|
||||||
MarkupSafe==2.1.5
|
MarkupSafe==3.0.2
|
||||||
mccabe==0.7.0
|
mccabe==0.7.0
|
||||||
mdit-py-plugins==0.3.5
|
mdit-py-plugins==0.3.5
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
more-itertools==10.3.0
|
more-itertools==10.7.0
|
||||||
multidict==6.0.4
|
|
||||||
mutagen==1.47.0
|
mutagen==1.47.0
|
||||||
mypy==1.10.1
|
mypy==1.15.0
|
||||||
mypy-extensions==1.0.0
|
mypy_extensions==1.1.0
|
||||||
myst-parser==0.18.1
|
myst-parser==0.18.1
|
||||||
nh3==0.2.18
|
nh3==0.2.21
|
||||||
packaging==24.1
|
packaging==25.0
|
||||||
pathspec==0.12.1
|
pathspec==0.12.1
|
||||||
pkginfo==1.10.0
|
pkginfo==1.10.0
|
||||||
platformdirs==4.2.2
|
platformdirs==4.3.7
|
||||||
pluggy==1.5.0
|
pluggy==1.5.0
|
||||||
py==1.11.0
|
pycodestyle==2.13.0
|
||||||
pycodestyle==2.12.0
|
|
||||||
pycparser==2.22
|
pycparser==2.22
|
||||||
pycryptodomex==3.20.0
|
pycryptodomex==3.22.0
|
||||||
pyflakes==3.2.0
|
pyflakes==3.3.2
|
||||||
Pygments==2.18.0
|
Pygments==2.19.1
|
||||||
pyparsing==3.0.9
|
pytest==8.3.5
|
||||||
pytest==8.2.2
|
pytest-cov==6.1.1
|
||||||
pytest-cov==5.0.0
|
|
||||||
pytest-sphinx==0.6.3
|
pytest-sphinx==0.6.3
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.1.0
|
||||||
pytz==2022.1
|
PyYAML==6.0.2
|
||||||
PyYAML==6.0.2rc1
|
|
||||||
readme_renderer==43.0
|
readme_renderer==43.0
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
requests-toolbelt==1.0.0
|
requests-toolbelt==1.0.0
|
||||||
rfc3986==2.0.0
|
rfc3986==2.0.0
|
||||||
rich==13.7.1
|
rich==14.0.0
|
||||||
SecretStorage==3.3.3
|
SecretStorage==3.3.3
|
||||||
six==1.16.0
|
|
||||||
sniffio==1.3.1
|
sniffio==1.3.1
|
||||||
snowballstemmer==2.2.0
|
snowballstemmer==2.2.0
|
||||||
soupsieve==2.5
|
soupsieve==2.7
|
||||||
Sphinx==5.0.2
|
Sphinx==5.0.2
|
||||||
sphinx-autobuild==2024.4.16
|
sphinx-autobuild==2024.10.3
|
||||||
sphinx-autodoc-typehints==1.19.1
|
sphinx-autodoc-typehints==1.19.1
|
||||||
sphinx-basic-ng==1.0.0b2
|
sphinx-basic-ng==1.0.0b2
|
||||||
sphinx-copybutton==0.5.2
|
sphinx-copybutton==0.5.2
|
||||||
sphinxcontrib-applehelp==1.0.8
|
sphinxcontrib-applehelp==2.0.0
|
||||||
sphinxcontrib-devhelp==1.0.6
|
sphinxcontrib-devhelp==2.0.0
|
||||||
sphinxcontrib-htmlhelp==2.0.5
|
sphinxcontrib-htmlhelp==2.1.0
|
||||||
sphinxcontrib-jsmath==1.0.1
|
sphinxcontrib-jsmath==1.0.1
|
||||||
sphinxcontrib-qthelp==1.0.7
|
sphinxcontrib-qthelp==2.0.0
|
||||||
sphinxcontrib-serializinghtml==1.1.10
|
sphinxcontrib-serializinghtml==2.0.0
|
||||||
starlette==0.37.2
|
starlette==0.46.2
|
||||||
tomli==2.0.1
|
tornado==6.5b1
|
||||||
tornado==6.4
|
twine==6.1.0
|
||||||
twine==5.1.1
|
typing_extensions==4.13.2
|
||||||
typing_extensions==4.12.2
|
urllib3==2.4.0
|
||||||
urllib3==2.2.2
|
uvicorn==0.34.2
|
||||||
uvicorn==0.30.1
|
vk_api @ git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742
|
||||||
vk-api @ git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742
|
watchfiles==1.0.5
|
||||||
watchfiles==0.22.0
|
websockets==15.0.1
|
||||||
webencodings==0.5.1
|
yt-dlp==2025.5.3.232917.dev0
|
||||||
websockets==12.0
|
zipp==3.21.0
|
||||||
yarl==1.9.4
|
|
||||||
yt-dlp==2024.7.15.232803.dev0
|
|
||||||
zipp==3.19.2
|
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ def test_scrape_wall_url_with_photos():
|
|||||||
== "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея."
|
== "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея."
|
||||||
)
|
)
|
||||||
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24))
|
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24))
|
||||||
assert len(res[0]["payload"]) == 18
|
assert len(res[0]["payload"]) == 19
|
||||||
assert len(res[0]["attachments"].keys()) == 1
|
assert len(res[0]["attachments"].keys()) == 1
|
||||||
assert list(res[0]["attachments"].keys()) == ["photo"]
|
assert list(res[0]["attachments"].keys()) == ["photo"]
|
||||||
assert len(res[0]["attachments"]["photo"]) == 9
|
assert len(res[0]["attachments"]["photo"]) == 9
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "3"
|
_MINOR = "3"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "29"
|
_PATCH = "34"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user