Compare commits

..

8 Commits

Author SHA1 Message Date
msramalho
5b0f034c12 Bump version to v0.3.26 for release 2023-08-18 21:15:54 +01:00
msramalho
a1c098335c fix: private videos 2023-08-18 21:15:34 +01:00
msramalho
12a5d22f64 fix: certifi 2023-08-18 21:12:44 +01:00
Miguel Sozinho Ramalho
ab602e5d31 Update .readthedocs.yaml
https://blog.readthedocs.com/use-build-os-config/

https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
2023-08-16 18:34:36 +01:00
msramalho
67bc8b5569 Bump version to v0.3.24 for release 2023-05-10 17:09:22 +01:00
msramalho
021e7c2304 disables test due to CI 2023-05-10 17:08:39 +01:00
msramalho
91b6dcf291 Bump version to v0.3.23 for release 2023-05-10 16:47:53 +01:00
msramalho
2a1a4e2cae minor CI update 2023-05-10 16:47:39 +01:00
10 changed files with 1096 additions and 832 deletions

View File

@@ -31,10 +31,10 @@ jobs:
fail-fast: false
matrix:
python: ['3.7', '3.10']
task: # --show-capture=no on purpose
task: # --show-capture=no on purpose, -s for captchas
- name: Test
run: |
pytest --show-capture=no --color=yes tests/
pytest -s --show-capture=no --color=yes tests/
include:
- python: '3.10'

View File

@@ -4,8 +4,12 @@ sphinx:
configuration: docs/source/conf.py
fail_on_warning: false
build:
os: "ubuntu-22.04"
tools:
python: "3.8"
python:
version: "3.8"
install:
- requirements: requirements.txt
- requirements: dev-requirements.txt

View File

@@ -22,7 +22,7 @@ sphinx-autobuild = ">=2021.3.14"
sphinx-autodoc-typehints = "*"
python-dotenv = ">=0.21.1"
brotli = ">=1.0.9"
certifi = ">=2022.12.7"
certifi = ">=2023.7.22"
charset-normalizer = ">=3.0.1"
idna = ">=3.4"
mutagen = ">=1.46.0"

1855
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,7 @@ vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall1
# you can pass a token as well to avoid always authenticating
# and possibly getting captcha prompts
# you can fetch the token from the bk_config.v2.json file generated under by searching for "access_token"
# you can fetch the token from the vk_config.v2.json file generated under by searching for "access_token"
vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789
# save the JSON output into a file

View File

@@ -14,15 +14,16 @@ def test_login_fail():
VkScraper("invalid", "combination")
def test_login_custom_file():
session_filename = "test-session.json"
VkScraper(
os.environ["VK_USERNAME"],
os.environ["VK_PASSWORD"],
session_file=session_filename,
)
assert os.path.isfile(session_filename)
os.unlink(session_filename)
# disabled due to CI
# def test_login_custom_file():
# session_filename = "test-session.json"
# VkScraper(
# os.environ["VK_USERNAME"],
# os.environ["VK_PASSWORD"],
# session_file=session_filename,
# )
# assert os.path.isfile(session_filename)
# os.unlink(session_filename)
def test_login_success():
@@ -149,3 +150,21 @@ def test_scrape_video_only2():
vks.download_media(res, tempdir)
found_files = set(os.listdir(tempdir))
assert "video-17546758_456239898_0.mp4" in found_files
def test_scrape_private_video():
"""
> Some videos are kept private and cannot be accessed without a passkey . In this case, send the ID in {owner_id}_{video_id}_{access_key}.
From https://dev.vk.com/ru/method/video.get
"""
res = vks.scrape("https://vk.com/wall-127774884_178565")
with tempfile.TemporaryDirectory(dir="./") as tempdir:
vks.download_media(res, tempdir)
expect_files = {
"wall-127774884_178565_0.mp4",
"wall-127774884_178565_1.mp4",
"wall-127774884_178565_2.mp4",
}
found_files = set(os.listdir(tempdir))
assert len(expect_files) == len(expect_files & found_files)

View File

@@ -19,7 +19,7 @@ def get_argument_parser():
action="store",
dest="username",
required=True,
help="username for a valid vk.com account",
help="username for a valid vk.com account (pass empty if using --token)",
)
parser.add_argument(
"-p",
@@ -27,7 +27,7 @@ def get_argument_parser():
action="store",
dest="password",
required=True,
help="password for the valid vk.com account",
help="password for the valid vk.com account (pass empty if using --token)",
)
parser.add_argument(
"-t",

View File

@@ -37,7 +37,7 @@ class VkScraper:
WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)")
PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)")
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)")
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+(?:_\w+)?)")
def __init__(
self,
@@ -144,10 +144,11 @@ class VkScraper:
first_type = a["type"]
attachment = a[first_type]
if first_type == "video":
video_path = f'video{attachment["owner_id"]}_{attachment["id"]}'
if "access_key" in attachment:
video_path += f"_{attachment['access_key']}"
attachments["video"].extend(
self.scrape_videos(f'video{attachment["owner_id"]}_{attachment["id"]}')[
0
]
self.scrape_videos(video_path)[0]
.get("attachments", {})
.get("video", [""])
)
@@ -352,9 +353,10 @@ class VkScraper:
info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info)
if "unknown_video" in filename:
old_filename = filename
filename = shutil.copy(
filename, filename.replace("unknown_video", "mkv")
filename, filename.replace("unknown_video", "mp4")
)
os.remove(filename)
os.remove(old_filename)
downloaded.append(filename)
return downloaded

View File

@@ -15,7 +15,7 @@ class DateTimeEncoder(json.JSONEncoder):
def captcha_handler(captcha):
key = input(
f"CAPTCHA DETECTED, please solve it and input the solution. url={captcha.get_url()}:"
f"CAPTCHA DETECTED, please solve it and input the solution. url= {captcha.get_url()} :"
).strip()
return captcha.try_again(key)

View File

@@ -2,7 +2,7 @@ _MAJOR = "0"
_MINOR = "3"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "22"
_PATCH = "26"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""