diff --git a/.gitignore b/.gitignore index f61cfd6..ffbf928 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .env vk_config.v2.json output/ +tmp*/ # build artifacts .eggs/ diff --git a/Pipfile b/Pipfile index 216d572..e3e4d19 100644 --- a/Pipfile +++ b/Pipfile @@ -26,3 +26,6 @@ python-dotenv = "*" [requires] python_version = "3.9" + +[pipenv] +allow_prereleases = true diff --git a/README.md b/README.md index c195cca..9289f63 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,13 @@ # vk-url-scraper Python library to scrape data, and especially media links like videos and photos, from vk.com URLs. + [![PyPI version](https://badge.fury.io/py/vk-url-scraper.svg)](https://badge.fury.io/py/vk-url-scraper) [![PyPI download month](https://img.shields.io/pypi/dm/vk-url-scraper.svg)](https://pypi.python.org/pypi/vk-url-scraper/) +[![Documentation Status](https://readthedocs.org/projects/vk-url-scraper/badge/?version=latest)](https://vk-url-scraper.readthedocs.io/en/latest/?badge=latest) -You can use it via the [command line](#command-line-usage) or as a [python library](#python-library-usage). + +You can use it via the [command line](#command-line-usage) or as a [python library](#python-library-usage), check the **[documentation](https://vk-url-scraper.readthedocs.io/en/latest/)**. ## Installation You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`. @@ -25,6 +28,7 @@ vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall1 # you can pass a token as well to avoid always authenticating # and possibly getting captch prompts +# you can fetch the token from the bk_config.v2.json file generated under by searching for "access_token" vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789 # save the JSON output into a file @@ -96,6 +100,7 @@ To test the command line interface available in [__main__.py](__vk_url_scraper/_ 2. run `./scripts/release.sh` to create a tag and push, alternatively 1. `git tag vx.y.z` to tag version 2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/) +3. go to https://readthedocs.org/ to deploy new docs version (if webhook is not setup) ### Fixing a failed release diff --git a/tests/scraper_test.py b/tests/scraper_test.py index 2f31b7b..0d53799 100644 --- a/tests/scraper_test.py +++ b/tests/scraper_test.py @@ -14,6 +14,18 @@ def test_login_fail(): VkScraper("invalid", "combination") +def test_login_custom_file(): + session_filename = "test-session.json" + VkScraper( + os.environ["VK_USERNAME"], + os.environ["VK_PASSWORD"], + os.environ.get("VK_TOKEN"), + session_file=session_filename, + ) + assert os.path.isfile(session_filename) + os.unlink(session_filename) + + def test_login_success(): global vks vks = VkScraper( diff --git a/vk_url_scraper/scraper.py b/vk_url_scraper/scraper.py index ebda11f..836a3ae 100644 --- a/vk_url_scraper/scraper.py +++ b/vk_url_scraper/scraper.py @@ -40,7 +40,12 @@ class VkScraper: VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)") def __init__( - self, username: str, password: str, token: str = None, captcha_handler=captcha_handler + self, + username: str, + password: str, + token: str = None, + session_file="vk_config.v2.json", + captcha_handler=captcha_handler, ) -> None: """Initializes the scraper. @@ -55,9 +60,17 @@ class VkScraper: Matching password on vk.com token : str Access token received after authenticating, can be found in the vl_config.v2.json file + session_file : str + File name where the VK session is saved so future logins are easier + captcha_handler : func + Function that can receive a vk_api captcha instance and help the user solve it, default is a complete CLI handler """ self.session = vk_api.VkApi( - username, password, token=token, captcha_handler=captcha_handler + username, + password, + token=token, + config_filename=session_file, + captcha_handler=captcha_handler, ) if token is None or len(token) == 0: self.session.auth(token_only=True)