Compare commits

..

12 Commits

Author SHA1 Message Date
msramalho
5b0f034c12 Bump version to v0.3.26 for release 2023-08-18 21:15:54 +01:00
msramalho
a1c098335c fix: private videos 2023-08-18 21:15:34 +01:00
msramalho
12a5d22f64 fix: certifi 2023-08-18 21:12:44 +01:00
Miguel Sozinho Ramalho
ab602e5d31 Update .readthedocs.yaml
https://blog.readthedocs.com/use-build-os-config/

https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
2023-08-16 18:34:36 +01:00
msramalho
67bc8b5569 Bump version to v0.3.24 for release 2023-05-10 17:09:22 +01:00
msramalho
021e7c2304 disables test due to CI 2023-05-10 17:08:39 +01:00
msramalho
91b6dcf291 Bump version to v0.3.23 for release 2023-05-10 16:47:53 +01:00
msramalho
2a1a4e2cae minor CI update 2023-05-10 16:47:39 +01:00
msramalho
fc6b914e2d Bump version to v0.3.22 for release 2023-05-10 16:28:30 +01:00
Logan Williams
d155c1364a Bump version number 2023-05-10 14:56:39 +02:00
Logan Williams
8882a87048 Fix import order 2023-05-10 14:33:47 +02:00
Logan Williams
a95c675e9c No implicit optional 2023-05-10 14:28:59 +02:00
10 changed files with 1098 additions and 834 deletions

View File

@@ -31,10 +31,10 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
python: ['3.7', '3.10'] python: ['3.7', '3.10']
task: # --show-capture=no on purpose task: # --show-capture=no on purpose, -s for captchas
- name: Test - name: Test
run: | run: |
pytest --show-capture=no --color=yes tests/ pytest -s --show-capture=no --color=yes tests/
include: include:
- python: '3.10' - python: '3.10'

View File

@@ -4,8 +4,12 @@ sphinx:
configuration: docs/source/conf.py configuration: docs/source/conf.py
fail_on_warning: false fail_on_warning: false
build:
os: "ubuntu-22.04"
tools:
python: "3.8"
python: python:
version: "3.8"
install: install:
- requirements: requirements.txt - requirements: requirements.txt
- requirements: dev-requirements.txt - requirements: dev-requirements.txt

View File

@@ -22,7 +22,7 @@ sphinx-autobuild = ">=2021.3.14"
sphinx-autodoc-typehints = "*" sphinx-autodoc-typehints = "*"
python-dotenv = ">=0.21.1" python-dotenv = ">=0.21.1"
brotli = ">=1.0.9" brotli = ">=1.0.9"
certifi = ">=2022.12.7" certifi = ">=2023.7.22"
charset-normalizer = ">=3.0.1" charset-normalizer = ">=3.0.1"
idna = ">=3.4" idna = ">=3.4"
mutagen = ">=1.46.0" mutagen = ">=1.46.0"

1855
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,7 @@ vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall1
# you can pass a token as well to avoid always authenticating # you can pass a token as well to avoid always authenticating
# and possibly getting captcha prompts # and possibly getting captcha prompts
# you can fetch the token from the bk_config.v2.json file generated under by searching for "access_token" # you can fetch the token from the vk_config.v2.json file generated under by searching for "access_token"
vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789 vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789
# save the JSON output into a file # save the JSON output into a file

View File

@@ -14,15 +14,16 @@ def test_login_fail():
VkScraper("invalid", "combination") VkScraper("invalid", "combination")
def test_login_custom_file(): # disabled due to CI
session_filename = "test-session.json" # def test_login_custom_file():
VkScraper( # session_filename = "test-session.json"
os.environ["VK_USERNAME"], # VkScraper(
os.environ["VK_PASSWORD"], # os.environ["VK_USERNAME"],
session_file=session_filename, # os.environ["VK_PASSWORD"],
) # session_file=session_filename,
assert os.path.isfile(session_filename) # )
os.unlink(session_filename) # assert os.path.isfile(session_filename)
# os.unlink(session_filename)
def test_login_success(): def test_login_success():
@@ -149,3 +150,21 @@ def test_scrape_video_only2():
vks.download_media(res, tempdir) vks.download_media(res, tempdir)
found_files = set(os.listdir(tempdir)) found_files = set(os.listdir(tempdir))
assert "video-17546758_456239898_0.mp4" in found_files assert "video-17546758_456239898_0.mp4" in found_files
def test_scrape_private_video():
"""
> Some videos are kept private and cannot be accessed without a passkey . In this case, send the ID in {owner_id}_{video_id}_{access_key}.
From https://dev.vk.com/ru/method/video.get
"""
res = vks.scrape("https://vk.com/wall-127774884_178565")
with tempfile.TemporaryDirectory(dir="./") as tempdir:
vks.download_media(res, tempdir)
expect_files = {
"wall-127774884_178565_0.mp4",
"wall-127774884_178565_1.mp4",
"wall-127774884_178565_2.mp4",
}
found_files = set(os.listdir(tempdir))
assert len(expect_files) == len(expect_files & found_files)

View File

@@ -19,7 +19,7 @@ def get_argument_parser():
action="store", action="store",
dest="username", dest="username",
required=True, required=True,
help="username for a valid vk.com account", help="username for a valid vk.com account (pass empty if using --token)",
) )
parser.add_argument( parser.add_argument(
"-p", "-p",
@@ -27,7 +27,7 @@ def get_argument_parser():
action="store", action="store",
dest="password", dest="password",
required=True, required=True,
help="password for the valid vk.com account", help="password for the valid vk.com account (pass empty if using --token)",
) )
parser.add_argument( parser.add_argument(
"-t", "-t",

View File

@@ -3,7 +3,7 @@ import re
import shutil import shutil
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from typing import List from typing import List, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
import requests import requests
@@ -37,13 +37,13 @@ class VkScraper:
WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)") WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)")
PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)") PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)")
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)") VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+(?:_\w+)?)")
def __init__( def __init__(
self, self,
username: str, username: str,
password: str, password: str,
token: str = None, token: Optional[str] = None,
session_file="vk_config.v2.json", session_file="vk_config.v2.json",
captcha_handler=captcha_handler, captcha_handler=captcha_handler,
) -> None: ) -> None:
@@ -144,10 +144,11 @@ class VkScraper:
first_type = a["type"] first_type = a["type"]
attachment = a[first_type] attachment = a[first_type]
if first_type == "video": if first_type == "video":
video_path = f'video{attachment["owner_id"]}_{attachment["id"]}'
if "access_key" in attachment:
video_path += f"_{attachment['access_key']}"
attachments["video"].extend( attachments["video"].extend(
self.scrape_videos(f'video{attachment["owner_id"]}_{attachment["id"]}')[ self.scrape_videos(video_path)[0]
0
]
.get("attachments", {}) .get("attachments", {})
.get("video", [""]) .get("video", [""])
) )
@@ -352,9 +353,10 @@ class VkScraper:
info = ydl.extract_info(url, download=True) info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info) filename = ydl.prepare_filename(info)
if "unknown_video" in filename: if "unknown_video" in filename:
old_filename = filename
filename = shutil.copy( filename = shutil.copy(
filename, filename.replace("unknown_video", "mkv") filename, filename.replace("unknown_video", "mp4")
) )
os.remove(filename) os.remove(old_filename)
downloaded.append(filename) downloaded.append(filename)
return downloaded return downloaded

View File

@@ -15,7 +15,7 @@ class DateTimeEncoder(json.JSONEncoder):
def captcha_handler(captcha): def captcha_handler(captcha):
key = input( key = input(
f"CAPTCHA DETECTED, please solve it and input the solution. url={captcha.get_url()}:" f"CAPTCHA DETECTED, please solve it and input the solution. url= {captcha.get_url()} :"
).strip() ).strip()
return captcha.try_again(key) return captcha.try_again(key)

View File

@@ -2,7 +2,7 @@ _MAJOR = "0"
_MINOR = "3" _MINOR = "3"
# On main and in a nightly release the patch should be one ahead of the last # On main and in a nightly release the patch should be one ahead of the last
# released build. # released build.
_PATCH = "15" _PATCH = "26"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See # This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics. # https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = "" _SUFFIX = ""