Compare commits

...

24 Commits

Author SHA1 Message Date
msramalho
73f17407c0 reverting library dependencies 2024-01-23 18:09:56 +00:00
msramalho
95d249f5d0 min py to 3.10 2024-01-23 13:01:38 +00:00
msramalho
ccb8c1f5c7 min python to 3.8 2024-01-23 12:50:55 +00:00
msramalho
e525ff24b1 lint 2024-01-23 12:45:45 +00:00
msramalho
699b4ebdd8 fix lib dependencies in pypi version 2024-01-23 12:41:25 +00:00
msramalho
8d1a86a7fa fix captcha processing 2024-01-23 12:41:14 +00:00
msramalho
b01dbe6299 fix vk_api dependency changes 2024-01-23 11:56:49 +00:00
msramalho
5b0f034c12 Bump version to v0.3.26 for release 2023-08-18 21:15:54 +01:00
msramalho
a1c098335c fix: private videos 2023-08-18 21:15:34 +01:00
msramalho
12a5d22f64 fix: certifi 2023-08-18 21:12:44 +01:00
Miguel Sozinho Ramalho
ab602e5d31 Update .readthedocs.yaml
https://blog.readthedocs.com/use-build-os-config/

https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
2023-08-16 18:34:36 +01:00
msramalho
67bc8b5569 Bump version to v0.3.24 for release 2023-05-10 17:09:22 +01:00
msramalho
021e7c2304 disables test due to CI 2023-05-10 17:08:39 +01:00
msramalho
91b6dcf291 Bump version to v0.3.23 for release 2023-05-10 16:47:53 +01:00
msramalho
2a1a4e2cae minor CI update 2023-05-10 16:47:39 +01:00
msramalho
fc6b914e2d Bump version to v0.3.22 for release 2023-05-10 16:28:30 +01:00
Logan Williams
d155c1364a Bump version number 2023-05-10 14:56:39 +02:00
Logan Williams
8882a87048 Fix import order 2023-05-10 14:33:47 +02:00
Logan Williams
a95c675e9c No implicit optional 2023-05-10 14:28:59 +02:00
Logan Williams
8864e7c87d Fix failing test 2023-05-10 14:25:50 +02:00
Logan Williams
db9b613ae4 Loosen dependency version requirements 2023-05-10 14:15:56 +02:00
Miguel Sozinho Ramalho
37828b4be4 Delete dependabot.yml 2023-02-27 10:21:19 +01:00
msramalho
1a3a7dc0f3 Bump version to v0.3.15 for release 2023-02-23 17:07:13 +01:00
msramalho
f67707a740 Bump version to v0.3.14 for release 2023-02-23 17:05:43 +01:00
16 changed files with 2377 additions and 1042 deletions

View File

@@ -1,11 +0,0 @@
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
open-pull-requests-limit: 10
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"

View File

@@ -30,11 +30,11 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python: ['3.7', '3.10'] python: ['3.10']
task: # --show-capture=no on purpose task: # --show-capture=no on purpose, -s for captchas
- name: Test - name: Test
run: | run: |
pytest --show-capture=no --color=yes tests/ pytest -s --show-capture=no --color=yes tests/
include: include:
- python: '3.10' - python: '3.10'

View File

@@ -4,8 +4,12 @@ sphinx:
configuration: docs/source/conf.py configuration: docs/source/conf.py
fail_on_warning: false fail_on_warning: false
build:
os: "ubuntu-22.04"
tools:
python: "3.10"
python: python:
version: "3.8"
install: install:
- requirements: requirements.txt - requirements: requirements.txt
- requirements: dev-requirements.txt - requirements: dev-requirements.txt

View File

@@ -13,4 +13,4 @@ run-checks :
black . black .
flake8 . flake8 .
mypy . mypy .
CUDA_VISIBLE_DEVICES='' pytest -v --color=yes --doctest-modules tests/ vk_url_scraper/ CUDA_VISIBLE_DEVICES='' pytest -v --color=yes .

27
Pipfile
View File

@@ -4,8 +4,33 @@ verify_ssl = true
name = "pypi" name = "pypi"
[packages] [packages]
yt-dlp = ">=2023.2.17"
flake8 = "*"
mypy = ">=0.961"
black = ">=22.3.0"
isort = ">=5.10.1"
pytest = "*"
pytest-sphinx = "*"
pytest-cov = "*"
twine = ">=1.11.0"
sphinx = "<5.1.0,>=4.3.0"
furo = ">=2022.6.4.1"
myst-parser = "<0.19.0,>=0.15.2"
sphinx-copybutton = ">=0.5.0"
sphinx-autobuild = ">=2021.3.14"
sphinx-autodoc-typehints = "*"
python-dotenv = ">=0.21.1"
brotli = ">=1.0.9"
certifi = ">=2023.7.22"
charset-normalizer = ">=3.0.1"
idna = ">=3.4"
mutagen = ">=1.46.0"
pycryptodomex = ">=3.17"
requests = ">=2.28.2"
urllib3 = ">=1.26.14"
websockets = ">=10.4"
# vk-api = {ref = "77b5a0d51a6bbf54d59554332f28a488615fbd6c", git = "git+https://github.com/python273/vk_api.git"}
vk-api = "*" vk-api = "*"
yt-dlp = "*"
[dev-packages] [dev-packages]
sphinx-copybutton = "==0.5.0" sphinx-copybutton = "==0.5.0"

3091
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -20,7 +20,7 @@ To use the library you will need a valid username/password combination for vk.co
vk_url_scraper --help vk_url_scraper --help
# scrape a URL and get the JSON result in the console # scrape a URL and get the JSON result in the console
vk_url_scraper -username "username here" --password "password here" --urls https://vk.com/wall12345_6789 vk_url_scraper --username "username here" --password "password here" --urls https://vk.com/wall12345_6789
# OR # OR
vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall12345_6789 vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall12345_6789
# you can also have multiple urls # you can also have multiple urls
@@ -28,7 +28,7 @@ vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall1
# you can pass a token as well to avoid always authenticating # you can pass a token as well to avoid always authenticating
# and possibly getting captcha prompts # and possibly getting captcha prompts
# you can fetch the token from the bk_config.v2.json file generated under by searching for "access_token" # you can fetch the token from the vk_config.v2.json file generated under by searching for "access_token"
vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789 vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789
# save the JSON output into a file # save the JSON output into a file
@@ -89,7 +89,7 @@ see [docs] for all available functions.
2. To run all checks to `make run-checks` (fixes style) or individually 2. To run all checks to `make run-checks` (fixes style) or individually
1. To fix style: `black .` and `isort .` -> `flake8 .` to validate lint 1. To fix style: `black .` and `isort .` -> `flake8 .` to validate lint
2. To do type checking: `mypy .` 2. To do type checking: `mypy .`
3. To test: `pytest .` (`pytest -v --color=yes --doctest-modules tests/ vk_url_scraper/` to user verbose, colors, and test docstring examples) 3. To test: `pytest .` (`pytest -v --color=yes --doctest-modules tests/ vk_url_scraper/` to use verbose, colors, and test docstring examples)
3. `make docs` to generate shpynx docs -> edit [config.py](docs/source/conf.py) if needed 3. `make docs` to generate shpynx docs -> edit [config.py](docs/source/conf.py) if needed
To test the command line interface available in [__main__.py](__vk_url_scraper/__main__.py) you need to pass the `-m` option to python like so: `python -m vk_url_scraper -u "" -p "" --urls ...` To test the command line interface available in [__main__.py](__vk_url_scraper/__main__.py) you need to pass the `-m` option to python like so: `python -m vk_url_scraper -u "" -p "" --urls ...`
@@ -97,10 +97,11 @@ To test the command line interface available in [__main__.py](__vk_url_scraper/_
## Releasing new version ## Releasing new version
1. edit [version.py](vk_url_scraper/version.py) with proper versioning 1. edit [version.py](vk_url_scraper/version.py) with proper versioning
2. run `./scripts/release.sh` to create a tag and push, alternatively 2. make sure to run `pipenv run pip freeze > requirements.txt` if you manage libs with pipenv
3. run `./scripts/release.sh` to create a tag and push, alternatively
1. `git tag vx.y.z` to tag version 1. `git tag vx.y.z` to tag version
2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/) 2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)
3. go to https://readthedocs.org/ to deploy new docs version (if webhook is not setup) 4. go to https://readthedocs.org/ to deploy new docs version (if webhook is not setup)
### Fixing a failed release ### Fixing a failed release

View File

@@ -2,11 +2,11 @@
flake8 flake8
# Static type checking # Static type checking
mypy==0.961 mypy>=0.961
# Automatic code formatting # Automatic code formatting
black==22.3.0 black>=22.3.0
isort==5.10.1 isort>=5.10.1
# Running tests # Running tests
pytest pytest
@@ -24,19 +24,20 @@ wheel
Sphinx>=4.3.0,<5.1.0 Sphinx>=4.3.0,<5.1.0
# Sphinx theme: https://sphinx-themes.org/sample-sites/furo/ # Sphinx theme: https://sphinx-themes.org/sample-sites/furo/
furo==2022.6.4.1 furo>=2022.6.4.1
# Lets Sphinx parse markdown files in addition to rst. # Lets Sphinx parse markdown files in addition to rst.
myst-parser>=0.15.2,<0.19.0 myst-parser>=0.15.2,<0.19.0
# Adds a copy button to code examples in the docs. # Adds a copy button to code examples in the docs.
sphinx-copybutton==0.5.0 sphinx-copybutton>=0.5.0
# Live rebuilding and reloading of docs for developing locally. # Live rebuilding and reloading of docs for developing locally.
sphinx-autobuild==2021.3.14 sphinx-autobuild>=2021.3.14
# Automatically adds types to docs # Automatically adds types to docs
sphinx-autodoc-typehints sphinx-autodoc-typehints
# For parsing and comparing version numbers. # For parsing and comparing version numbers.
packaging packaging
python-dotenv>=0.21.1

View File

@@ -1,7 +1,7 @@
Installation Installation
============ ============
**vk-url-scraper** supports Python >= 3.7. **vk-url-scraper** supports Python >= 3.10.
## Installing with `pip` ## Installing with `pip`

View File

@@ -1,95 +1,94 @@
# aiohttp==3.9.1
# These requirements were autogenerated by pipenv aiosignal==1.3.1
# To regenerate from the project's Pipfile, run: alabaster==0.7.16
# async-timeout==4.0.3
# pipenv lock --requirements --dev attrs==23.2.0
# Babel==2.14.0
beautifulsoup4==4.12.3
# Note: in pipenv 2020.x, "--dev" changed to emit both default and development black==24.1a1
# requirements. To emit only development requirements, pass "--dev-only". bleach==6.0.0
Brotli==1.1.0
# -i https://pypi.org/simple certifi==2023.11.17
alabaster==0.7.13; python_version >= '3.6' cffi==1.16.0
attrs==22.2.0; python_version >= '3.6' charset-normalizer==3.3.2
babel==2.11.0; python_version >= '3.6' click==8.1.7
beautifulsoup4==4.11.2; python_version >= '3.6' colorama==0.4.6
black==22.3.0 commonmark==0.9.1
bleach==6.0.0; python_version >= '3.7' coverage==7.4.0
brotli==1.0.9; platform_python_implementation == 'CPython' cryptography==42.0.0
certifi==2022.12.7; python_version >= '3.6' docutils==0.18.1
cffi==1.15.1 exceptiongroup==1.2.0
charset-normalizer==3.0.1; python_version >= '3.6' flake8==7.0.0
click==8.1.3; python_version >= '3.7' frozenlist==1.4.1
colorama==0.4.6; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6' furo==2023.3.27
coverage[toml]==7.2.0; python_version >= '3.7' idna==3.6
cryptography==39.0.1; python_version >= '3.6' imagesize==1.4.1
docutils==0.18.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' importlib-metadata==7.0.1
exceptiongroup==1.1.0; python_version < '3.11' iniconfig==2.0.0
flake8==6.0.0 isort==6.0.0b2
furo==2022.6.21 jaraco.classes==3.3.0
idna==3.4; python_version >= '3.5' jeepney==0.8.0
imagesize==1.4.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' Jinja2==3.1.3
importlib-metadata==6.0.0; python_version >= '3.7' keyring==24.3.0
iniconfig==2.0.0; python_version >= '3.7'
isort==5.10.1
jaraco.classes==3.2.3; python_version >= '3.7'
jeepney==0.8.0; sys_platform == 'linux'
jinja2==3.1.2; python_version >= '3.7'
keyring==23.13.1; python_version >= '3.7'
livereload==2.6.3 livereload==2.6.3
markdown-it-py==2.2.0; python_version >= '3.7' markdown-it-py==2.2.0
markupsafe==2.1.2; python_version >= '3.7' MarkupSafe==2.1.4
mccabe==0.7.0; python_version >= '3.6' mccabe==0.7.0
mdit-py-plugins==0.3.4; python_version >= '3.7' mdit-py-plugins==0.3.5
mdurl==0.1.2; python_version >= '3.7' mdurl==0.1.2
more-itertools==9.0.0; python_version >= '3.7' more-itertools==10.2.0
mutagen==1.46.0; python_version >= '3.7' multidict==6.0.4
mypy-extensions==1.0.0; python_version >= '3.5' mutagen==1.47.0
mypy==0.961 mypy==1.8.0
mypy-extensions==1.0.0
myst-parser==0.18.1 myst-parser==0.18.1
packaging==23.0; python_version >= '3.7' nh3==0.2.15
pathspec==0.11.0; python_version >= '3.7' packaging==23.2
pkginfo==1.9.6; python_version >= '3.6' pathspec==0.12.1
platformdirs==3.0.0; python_version >= '3.7' pkginfo==1.9.6
pluggy==1.0.0; python_version >= '3.6' platformdirs==4.1.0
pycodestyle==2.10.0; python_version >= '3.6' pluggy==1.3.0
py==1.11.0
pycodestyle==2.11.1
pycparser==2.21 pycparser==2.21
pycryptodomex==3.17; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' pycryptodomex==3.20.0
pyflakes==3.0.1; python_version >= '3.6' pyflakes==3.2.0
pygments==2.14.0; python_version >= '3.6' Pygments==2.17.2
pytest-cov==4.0.0 pyparsing==3.0.9
pytest==8.0.0rc2
pytest-cov==4.1.0
pytest-sphinx==0.5.0 pytest-sphinx==0.5.0
pytest==7.2.1 python-dotenv==1.0.1
python-dotenv==0.21.1 pytz==2022.1
pytz==2022.7.1 PyYAML==6.0.1
pyyaml==6.0; python_version >= '3.6' readme-renderer==42.0
readme-renderer==37.3; python_version >= '3.7' requests==2.31.0
requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' requests-toolbelt==1.0.0
requests==2.28.2; python_version >= '3.7' and python_version < '4' rfc3986==2.0.0
rfc3986==2.0.0; python_version >= '3.7' rich==13.7.0
rich==13.3.1; python_version >= '3.7' SecretStorage==3.3.3
secretstorage==3.3.3; sys_platform == 'linux' six==1.16.0
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
snowballstemmer==2.2.0 snowballstemmer==2.2.0
soupsieve==2.4; python_version >= '3.7' soupsieve==2.5
Sphinx==5.0.2
sphinx-autobuild==2021.3.14 sphinx-autobuild==2021.3.14
sphinx-autodoc-typehints==1.19.1 sphinx-autodoc-typehints==1.19.1
sphinx-basic-ng==1.0.0b1; python_version >= '3.7' sphinx-basic-ng==1.0.0b2
sphinx-copybutton==0.5.0 sphinx-copybutton==0.5.2
sphinx==5.0.2 sphinxcontrib-applehelp==1.0.8
sphinxcontrib-applehelp==1.0.4; python_version >= '3.8' sphinxcontrib-devhelp==1.0.6
sphinxcontrib-devhelp==1.0.2; python_version >= '3.5' sphinxcontrib-htmlhelp==2.0.5
sphinxcontrib-htmlhelp==2.0.1; python_version >= '3.8' sphinxcontrib-jsmath==1.0.1
sphinxcontrib-jsmath==1.0.1; python_version >= '3.5' sphinxcontrib-qthelp==1.0.7
sphinxcontrib-qthelp==1.0.3; python_version >= '3.5' sphinxcontrib-serializinghtml==1.1.10
sphinxcontrib-serializinghtml==1.1.5; python_version >= '3.5' tomli==2.0.1
tomli==2.0.1; python_version < '3.11' tornado==6.4
tornado==6.2; python_version > '2.7'
twine==4.0.2 twine==4.0.2
typing-extensions==4.5.0; python_version >= '3.7' typing_extensions==4.9.0
urllib3==1.26.14; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' urllib3==2.1.0
vk-api==11.9.9 vk-api @ git+https://github.com/python273/vk_api.git@77b5a0d51a6bbf54d59554332f28a488615fbd6c
webencodings==0.5.1 webencodings==0.5.1
websockets==10.4; python_version >= '3.7' websockets==12.0
yt-dlp==2023.2.17 yarl==1.9.4
zipp==3.14.0; python_version >= '3.7' yt-dlp==2024.1.22.232713.dev0
zipp==3.17.0

View File

@@ -57,7 +57,7 @@ setup(
package_data={"vk_url_scraper": ["py.typed"]}, package_data={"vk_url_scraper": ["py.typed"]},
install_requires=read_requirements("requirements.txt"), install_requires=read_requirements("requirements.txt"),
extras_require={"dev": read_requirements("dev-requirements.txt")}, extras_require={"dev": read_requirements("dev-requirements.txt")},
python_requires=">=3.7", python_requires=">=3.10",
entry_points={ entry_points={
"console_scripts": [ "console_scripts": [
"vk_url_scraper=vk_url_scraper.__main__:main", "vk_url_scraper=vk_url_scraper.__main__:main",

View File

@@ -14,15 +14,16 @@ def test_login_fail():
VkScraper("invalid", "combination") VkScraper("invalid", "combination")
def test_login_custom_file(): # disabled due to CI
session_filename = "test-session.json" # def test_login_custom_file():
VkScraper( # session_filename = "test-session.json"
os.environ["VK_USERNAME"], # VkScraper(
os.environ["VK_PASSWORD"], # os.environ["VK_USERNAME"],
session_file=session_filename, # os.environ["VK_PASSWORD"],
) # session_file=session_filename,
assert os.path.isfile(session_filename) # )
os.unlink(session_filename) # assert os.path.isfile(session_filename)
# os.unlink(session_filename)
def test_login_success(): def test_login_success():
@@ -80,7 +81,7 @@ def test_scrape_wall_url_with_photos():
== "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея." == "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея."
) )
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24)) assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24))
assert len(res[0]["payload"]) == 17 assert len(res[0]["payload"]) == 18
assert len(res[0]["attachments"].keys()) == 1 assert len(res[0]["attachments"].keys()) == 1
assert list(res[0]["attachments"].keys()) == ["photo"] assert list(res[0]["attachments"].keys()) == ["photo"]
assert len(res[0]["attachments"]["photo"]) == 9 assert len(res[0]["attachments"]["photo"]) == 9
@@ -92,7 +93,7 @@ def test_scrape_wall_url_with_photos_inner_videos_and_links_with_inner_photos():
assert res[0]["id"] == "wall-17315087_74182" assert res[0]["id"] == "wall-17315087_74182"
assert res[0]["text"] == "" assert res[0]["text"] == ""
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 11, 1, 9)) assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 11, 1, 9))
assert len(res[0]["payload"]) == 17 assert len(res[0]["payload"]) == 18
assert len(res[0]["attachments"].keys()) == 3 assert len(res[0]["attachments"].keys()) == 3
for k in ["photo", "link", "video"]: for k in ["photo", "link", "video"]:
assert k in list(res[0]["attachments"].keys()) assert k in list(res[0]["attachments"].keys())
@@ -127,7 +128,7 @@ def test_scrape_photo_only():
== "Делимся расписанием конкурса [https://vk.com/wall-1_399468|«Код Петербурга»]. Все важные этапы — на одной схеме \n\nЕсли участвуете, обязательно сохраните себе. Так будет удобнее планировать работу над проектом, и вы точно не упустите лучший момент для отправки сервиса на модерацию." == "Делимся расписанием конкурса [https://vk.com/wall-1_399468|«Код Петербурга»]. Все важные этапы — на одной схеме \n\nЕсли участвуете, обязательно сохраните себе. Так будет удобнее планировать работу над проектом, и вы точно не упустите лучший момент для отправки сервиса на модерацию."
) )
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 7, 9, 43)) assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 7, 9, 43))
assert len(res[0]["payload"]) == 15 assert len(res[0]["payload"]) == 16
assert len(res[0]["attachments"].keys()) == 1 assert len(res[0]["attachments"].keys()) == 1
assert list(res[0]["attachments"].keys()) == ["photo"] assert list(res[0]["attachments"].keys()) == ["photo"]
assert len(res[0]["attachments"]["photo"]) == 1 assert len(res[0]["attachments"]["photo"]) == 1
@@ -138,7 +139,7 @@ def test_scrape_video_only():
assert len(res) == 1 assert len(res) == 1
assert res[0]["id"] == "video38556806_456251917" assert res[0]["id"] == "video38556806_456251917"
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 5, 42, 38)) assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 5, 42, 38))
assert len(res[0]["payload"]) == 31 assert len(res[0]["payload"]) == 34
assert len(res[0]["attachments"].keys()) == 1 assert len(res[0]["attachments"].keys()) == 1
assert list(res[0]["attachments"].keys()) == ["video"] assert list(res[0]["attachments"].keys()) == ["video"]
@@ -149,3 +150,21 @@ def test_scrape_video_only2():
vks.download_media(res, tempdir) vks.download_media(res, tempdir)
found_files = set(os.listdir(tempdir)) found_files = set(os.listdir(tempdir))
assert "video-17546758_456239898_0.mp4" in found_files assert "video-17546758_456239898_0.mp4" in found_files
def test_scrape_private_video():
"""
> Some videos are kept private and cannot be accessed without a passkey . In this case, send the ID in {owner_id}_{video_id}_{access_key}.
From https://dev.vk.com/ru/method/video.get
"""
res = vks.scrape("https://vk.com/wall-127774884_178565")
with tempfile.TemporaryDirectory(dir="./") as tempdir:
vks.download_media(res, tempdir)
expect_files = {
"wall-127774884_178565_0.mp4",
"wall-127774884_178565_1.mp4",
"wall-127774884_178565_2.mp4",
}
found_files = set(os.listdir(tempdir))
assert len(expect_files) == len(expect_files & found_files)

View File

@@ -19,7 +19,7 @@ def get_argument_parser():
action="store", action="store",
dest="username", dest="username",
required=True, required=True,
help="username for a valid vk.com account", help="username for a valid vk.com account (pass empty if using --token)",
) )
parser.add_argument( parser.add_argument(
"-p", "-p",
@@ -27,7 +27,7 @@ def get_argument_parser():
action="store", action="store",
dest="password", dest="password",
required=True, required=True,
help="password for the valid vk.com account", help="password for the valid vk.com account (pass empty if using --token)",
) )
parser.add_argument( parser.add_argument(
"-t", "-t",

View File

@@ -3,7 +3,7 @@ import re
import shutil import shutil
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from typing import List from typing import List, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
import requests import requests
@@ -37,13 +37,13 @@ class VkScraper:
WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)") WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)")
PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)") PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)")
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)") VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+(?:_\w+)?)")
def __init__( def __init__(
self, self,
username: str, username: str,
password: str, password: str,
token: str = None, token: Optional[str] = None,
session_file="vk_config.v2.json", session_file="vk_config.v2.json",
captcha_handler=captcha_handler, captcha_handler=captcha_handler,
) -> None: ) -> None:
@@ -144,10 +144,11 @@ class VkScraper:
first_type = a["type"] first_type = a["type"]
attachment = a[first_type] attachment = a[first_type]
if first_type == "video": if first_type == "video":
video_path = f'video{attachment["owner_id"]}_{attachment["id"]}'
if "access_key" in attachment:
video_path += f"_{attachment['access_key']}"
attachments["video"].extend( attachments["video"].extend(
self.scrape_videos(f'video{attachment["owner_id"]}_{attachment["id"]}')[ self.scrape_videos(video_path)[0]
0
]
.get("attachments", {}) .get("attachments", {})
.get("video", [""]) .get("video", [""])
) )
@@ -338,7 +339,9 @@ class VkScraper:
filename = os.path.join(destination, f"{r['id']}_{i}.%(ext)s") filename = os.path.join(destination, f"{r['id']}_{i}.%(ext)s")
ydl = yt_dlp.YoutubeDL( ydl = yt_dlp.YoutubeDL(
{ {
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best", "format": (
"bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"
),
"merge_output_format": "mp4", "merge_output_format": "mp4",
"retries": 5, "retries": 5,
"noplaylist": True, "noplaylist": True,
@@ -352,9 +355,10 @@ class VkScraper:
info = ydl.extract_info(url, download=True) info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info) filename = ydl.prepare_filename(info)
if "unknown_video" in filename: if "unknown_video" in filename:
old_filename = filename
filename = shutil.copy( filename = shutil.copy(
filename, filename.replace("unknown_video", "mkv") filename, filename.replace("unknown_video", "mp4")
) )
os.remove(filename) os.remove(old_filename)
downloaded.append(filename) downloaded.append(filename)
return downloaded return downloaded

View File

@@ -15,9 +15,9 @@ class DateTimeEncoder(json.JSONEncoder):
def captcha_handler(captcha): def captcha_handler(captcha):
key = input( key = input(
f"CAPTCHA DETECTED, please solve it and input the solution. url={captcha.get_url()}:" f"CAPTCHA DETECTED, please solve it and input the solution. url= {captcha.get_url()} :"
).strip() ).strip()
return captcha.try_again(key) return captcha.try_again(key.strip())
@contextmanager @contextmanager

View File

@@ -2,7 +2,7 @@ _MAJOR = "0"
_MINOR = "3" _MINOR = "3"
# On main and in a nightly release the patch should be one ahead of the last # On main and in a nightly release the patch should be one ahead of the last
# released build. # released build.
_PATCH = "13" _PATCH = "28"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See # This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics. # https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = "" _SUFFIX = ""