mirror of
https://github.com/bellingcat/vk-url-scraper.git
synced 2026-06-13 05:48:37 +03:00
fix: private videos
This commit is contained in:
@@ -150,3 +150,21 @@ def test_scrape_video_only2():
|
|||||||
vks.download_media(res, tempdir)
|
vks.download_media(res, tempdir)
|
||||||
found_files = set(os.listdir(tempdir))
|
found_files = set(os.listdir(tempdir))
|
||||||
assert "video-17546758_456239898_0.mp4" in found_files
|
assert "video-17546758_456239898_0.mp4" in found_files
|
||||||
|
|
||||||
|
|
||||||
|
def test_scrape_private_video():
|
||||||
|
"""
|
||||||
|
> Some videos are kept private and cannot be accessed without a passkey . In this case, send the ID in {owner_id}_{video_id}_{access_key}.
|
||||||
|
From https://dev.vk.com/ru/method/video.get
|
||||||
|
"""
|
||||||
|
res = vks.scrape("https://vk.com/wall-127774884_178565")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory(dir="./") as tempdir:
|
||||||
|
vks.download_media(res, tempdir)
|
||||||
|
expect_files = {
|
||||||
|
"wall-127774884_178565_0.mp4",
|
||||||
|
"wall-127774884_178565_1.mp4",
|
||||||
|
"wall-127774884_178565_2.mp4",
|
||||||
|
}
|
||||||
|
found_files = set(os.listdir(tempdir))
|
||||||
|
assert len(expect_files) == len(expect_files & found_files)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ def get_argument_parser():
|
|||||||
action="store",
|
action="store",
|
||||||
dest="username",
|
dest="username",
|
||||||
required=True,
|
required=True,
|
||||||
help="username for a valid vk.com account",
|
help="username for a valid vk.com account (pass empty if using --token)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-p",
|
"-p",
|
||||||
@@ -27,7 +27,7 @@ def get_argument_parser():
|
|||||||
action="store",
|
action="store",
|
||||||
dest="password",
|
dest="password",
|
||||||
required=True,
|
required=True,
|
||||||
help="password for the valid vk.com account",
|
help="password for the valid vk.com account (pass empty if using --token)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-t",
|
"-t",
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ class VkScraper:
|
|||||||
|
|
||||||
WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)")
|
WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)")
|
||||||
PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)")
|
PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)")
|
||||||
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)")
|
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+(?:_\w+)?)")
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -144,10 +144,11 @@ class VkScraper:
|
|||||||
first_type = a["type"]
|
first_type = a["type"]
|
||||||
attachment = a[first_type]
|
attachment = a[first_type]
|
||||||
if first_type == "video":
|
if first_type == "video":
|
||||||
|
video_path = f'video{attachment["owner_id"]}_{attachment["id"]}'
|
||||||
|
if "access_key" in attachment:
|
||||||
|
video_path += f"_{attachment['access_key']}"
|
||||||
attachments["video"].extend(
|
attachments["video"].extend(
|
||||||
self.scrape_videos(f'video{attachment["owner_id"]}_{attachment["id"]}')[
|
self.scrape_videos(video_path)[0]
|
||||||
0
|
|
||||||
]
|
|
||||||
.get("attachments", {})
|
.get("attachments", {})
|
||||||
.get("video", [""])
|
.get("video", [""])
|
||||||
)
|
)
|
||||||
@@ -352,9 +353,10 @@ class VkScraper:
|
|||||||
info = ydl.extract_info(url, download=True)
|
info = ydl.extract_info(url, download=True)
|
||||||
filename = ydl.prepare_filename(info)
|
filename = ydl.prepare_filename(info)
|
||||||
if "unknown_video" in filename:
|
if "unknown_video" in filename:
|
||||||
|
old_filename = filename
|
||||||
filename = shutil.copy(
|
filename = shutil.copy(
|
||||||
filename, filename.replace("unknown_video", "mkv")
|
filename, filename.replace("unknown_video", "mp4")
|
||||||
)
|
)
|
||||||
os.remove(filename)
|
os.remove(old_filename)
|
||||||
downloaded.append(filename)
|
downloaded.append(filename)
|
||||||
return downloaded
|
return downloaded
|
||||||
|
|||||||
Reference in New Issue
Block a user