mirror of
https://github.com/bellingcat/vk-url-scraper.git
synced 2026-06-12 05:18:35 +03:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
24a1313a65 | ||
|
|
64df4eec28 | ||
|
|
42bdc1441c | ||
|
|
c25880ee6d | ||
|
|
e1e3648852 | ||
|
|
c74dc280d8 | ||
|
|
ab15b35008 | ||
|
|
62c4536d0b |
3
.github/workflows/main.yml
vendored
3
.github/workflows/main.yml
vendored
@@ -29,7 +29,8 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python: ['3.7', '3.10']
|
# python: ['3.7', '3.10']
|
||||||
|
python: ['3.10']
|
||||||
task: # --show-capture=no on purpose
|
task: # --show-capture=no on purpose
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -131,5 +131,12 @@ def test_scrape_video_only():
|
|||||||
|
|
||||||
|
|
||||||
def test_scrape_video_only2():
|
def test_scrape_video_only2():
|
||||||
res = vks.scrape("https://vk.com/video-1_456239018")
|
res = vks.scrape("https://vk.com/video-17546758_456239898")
|
||||||
print(res[0]["attachments"]["video"][0])
|
with tempfile.TemporaryDirectory(dir="./") as tempdir:
|
||||||
|
vks.download_media(res, tempdir)
|
||||||
|
found_files = set(os.listdir(tempdir))
|
||||||
|
# different systems might attribute different extension
|
||||||
|
assert (
|
||||||
|
"video-17546758_456239898_0.webm" in found_files
|
||||||
|
or "video-17546758_456239898_0.mp4" in found_files
|
||||||
|
)
|
||||||
|
|||||||
@@ -312,8 +312,22 @@ class VkScraper:
|
|||||||
downloaded.append(filename)
|
downloaded.append(filename)
|
||||||
elif k == "video":
|
elif k == "video":
|
||||||
for i, url in enumerate(attachments):
|
for i, url in enumerate(attachments):
|
||||||
filename = os.path.join(destination, f"{r['id']}_{i}.mkv")
|
filename = os.path.join(destination, f"{r['id']}_{i}.%(ext)s")
|
||||||
ydl = yt_dlp.YoutubeDL({"outtmpl": filename, "quiet": True})
|
ydl = yt_dlp.YoutubeDL(
|
||||||
ydl.extract_info(url, download=True)
|
{
|
||||||
|
"outtmpl": filename,
|
||||||
|
"quiet": True,
|
||||||
|
"restrictfilenames": True,
|
||||||
|
"forcefilename": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
info = ydl.extract_info(url, download=True)
|
||||||
|
filename = ydl.prepare_filename(info)
|
||||||
|
if "unknown_video" in filename:
|
||||||
|
new_filename = filename.replace("unknown_video", "mkv")
|
||||||
|
with open(filename, "rb") as vin, open(new_filename, "wb") as vout:
|
||||||
|
vout.write(vin.read())
|
||||||
|
os.remove(filename)
|
||||||
|
filename = new_filename
|
||||||
downloaded.append(filename)
|
downloaded.append(filename)
|
||||||
return downloaded
|
return downloaded
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "2"
|
_MINOR = "2"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "1"
|
_PATCH = "4"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user