Bump version to v0.3.3 for release

applying feedback
Bump version to v0.3.2 for release
2026-06-12 13:28:37 +03:00 · 2022-06-21 18:24:24 +02:00 · 2022-06-21 18:24:04 +02:00 · 2022-06-21 14:58:27 +02:00
8 changed files with 59 additions and 48 deletions
--- a/docs/source/_static/favicon.ico
+++ b/docs/source/_static/favicon.ico
--- a/setup.py
+++ b/setup.py
@@ -56,8 +56,8 @@ setup(
    extras_require={"dev": read_requirements("dev-requirements.txt")},
    python_requires=">=3.7",
    entry_points={
-        'console_scripts': [
+        "console_scripts": [
-            'vk_url_scraper=vk_url_scraper.__main__:main',
+            "vk_url_scraper=vk_url_scraper.__main__:main",
        ],
    },
 )
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -2,17 +2,16 @@ import datetime
 import os
 import tempfile
 import pytest
 from vk_url_scraper import VkScraper
 # import pytest
 vks = None
-# def test_login_fail():
+def test_login_fail():
-#     with pytest.raises(Exception):
+    with pytest.raises(Exception):
-#         VkScraper("invalid", "combination")
+        VkScraper("invalid", "combination")
 def test_login_success():
@@ -102,7 +101,7 @@ def test_scrape_download_multiple_media():
            "wall-17315087_74182_2.jpg",
            "wall-17315087_74182_3.jpg",
            "wall-17315087_74182_4.jpg",
-            "wall-17315087_74182_0.mkv",
+            "wall-17315087_74182_0.mp4",
        }
        found_files = set(os.listdir(tempdir))
        assert len(expect_files) == len(expect_files & found_files)
@@ -138,8 +137,4 @@ def test_scrape_video_only2():
    with tempfile.TemporaryDirectory(dir="./") as tempdir:
        vks.download_media(res, tempdir)
        found_files = set(os.listdir(tempdir))
-        # different systems might attribute different extension
+        assert "video-17546758_456239898_0.mp4" in found_files
        assert (
            "video-17546758_456239898_0.webm" in found_files
            or "video-17546758_456239898_0.mp4" in found_files
        )
--- a/vk_url_scraper/init.py
+++ b/vk_url_scraper/init.py
@@ -1,2 +1,2 @@
 from .scraper import VkScraper
-from .utils import DateTimeEncoder, mkdir_if_not_exists
+from .utils import DateTimeEncoder, suppress_stdout
--- a/vk_url_scraper/main.py
+++ b/vk_url_scraper/main.py
@@ -35,7 +35,7 @@ def get_argument_parser():
        action="store",
        dest="token",
        required=False,
-        help="optional token, when passed authentication will not be performed - good to avoid captcha issues",
+        help="optional token, when passed username/password authentication will not be done - good to avoid captcha issues",
    )
    parser.add_argument(
        "-d",
--- a/vk_url_scraper/scraper.py
+++ b/vk_url_scraper/scraper.py
@@ -1,5 +1,6 @@
 import os
 import re
 import shutil
 from collections import defaultdict
 from datetime import datetime
 from typing import List
@@ -9,7 +10,7 @@ import requests
 import vk_api  # used to get api_token after authentication
 import yt_dlp  # to download videos from url
-from .utils import captcha_handler, mkdir_if_not_exists
+from .utils import captcha_handler, suppress_stdout
 class VkScraper:
@@ -306,7 +307,7 @@ class VkScraper:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
        }
-        mkdir_if_not_exists(destination)
+        os.makedirs(destination, exist_ok=True)
        downloaded = []
        for r in results:
            for k, attachments in r["attachments"].items():
@@ -319,23 +320,30 @@ class VkScraper:
                            f.write(d.content)
                            downloaded.append(filename)
                elif k == "video":
-                    for i, url in enumerate(attachments):
+                    with suppress_stdout():  # ytdlp is not 100% quiet
-                        filename = os.path.join(destination, f"{r['id']}_{i}.%(ext)s")
+                        for i, url in enumerate(attachments):
-                        ydl = yt_dlp.YoutubeDL(
+                            filename = os.path.join(destination, f"{r['id']}_{i}.%(ext)s")
-                            {
+                            ydl = yt_dlp.YoutubeDL(
-                                "outtmpl": filename,
+                                {
-                                "quiet": True,
+                                    "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
-                                "restrictfilenames": True,
+                                    "merge_output_format": "mp4",
-                                "forcefilename": True,
+                                    "retries": 5,
-                            }
+                                    "noplaylist": True,
-                        )
+                                    "outtmpl": filename,
-                        info = ydl.extract_info(url, download=True)
+                                    "quiet": True,
-                        filename = ydl.prepare_filename(info)
+                                    "restrictfilenames": True,
-                        if "unknown_video" in filename:
+                                    "forcefilename": True,
-                            new_filename = filename.replace("unknown_video", "mkv")
+                                    "simulate": False,
-                            with open(filename, "rb") as vin, open(new_filename, "wb") as vout:
+                                }
-                                vout.write(vin.read())
+                            )
-                            os.remove(filename)
+                            info = ydl.extract_info(url, download=True)
-                            filename = new_filename
+                            filename = ydl.prepare_filename(info)
-                        downloaded.append(filename)
+                            if "unknown_video" in filename:
                                print(f"before {filename=}")
                                filename = shutil.copy(
                                    filename, filename.replace("unknown_video", "mkv")
                                )
                                print(f"after {filename=}")
                                os.remove(filename)
                            downloaded.append(filename)
        return downloaded
--- a/vk_url_scraper/utils.py
+++ b/vk_url_scraper/utils.py
@@ -1,5 +1,7 @@
 import json
 import os
 import sys
 from contextlib import contextmanager
 from datetime import datetime
@@ -11,15 +13,21 @@ class DateTimeEncoder(json.JSONEncoder):
        return json.JSONEncoder.default(self, o)
 def mkdir_if_not_exists(folder):
    if not os.path.exists(folder):
        os.makedirs(folder)
 def captcha_handler(captcha):
-    print(
+    key = input(
-        f"CAPTCHA DETECTED, please solve it and input the solution. {captcha.sid=} {captcha.get_url()=}",
+        f"CAPTCHA DETECTED, please solve it and input the solution. {captcha.sid=} {captcha.get_url()=}:"
-        flush=True,
+    ).strip()
    )
    key = input(f"Enter captcha code for {captcha.get_url()}:").strip()
    return captcha.try_again(key)
@contextmanager
 def suppress_stdout():
    # https://thesmithfam.org/blog/2012/10/25/temporarily-suppress-console-output-in-python/
    # this is used to silence ytdlp which does not fully respects quite=True and outputs filenames to the console
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:
            yield
        finally:
            sys.stdout = old_stdout
--- a/vk_url_scraper/version.py
+++ b/vk_url_scraper/version.py
@@ -2,7 +2,7 @@ _MAJOR = "0"
 _MINOR = "3"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "1"
+_PATCH = "3"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""
Author	SHA1	Message	Date
Miguel Ramalho	5c965102a4	Bump version to v0.3.3 for release	2022-06-21 18:24:24 +02:00
msramalho	df10e6f55f	applying feedback	2022-06-21 18:24:04 +02:00
Miguel Ramalho	863dd44463	Bump version to v0.3.2 for release	2022-06-21 14:58:27 +02:00
`@@ -1,2 +1,2 @@`
	`from .scraper import VkScraper`	`from .scraper import VkScraper`
	`from .utils import DateTimeEncoder, mkdir_if_not_exists`	`from .utils import DateTimeEncoder, suppress_stdout`