adds session_file name customization

This commit is contained in:
msramalho
2022-11-03 16:00:58 +00:00
parent 3a8a3f54c0
commit d1b27bef1d
5 changed files with 37 additions and 3 deletions

1
.gitignore vendored
View File

@@ -1,6 +1,7 @@
.env .env
vk_config.v2.json vk_config.v2.json
output/ output/
tmp*/
# build artifacts # build artifacts
.eggs/ .eggs/

View File

@@ -26,3 +26,6 @@ python-dotenv = "*"
[requires] [requires]
python_version = "3.9" python_version = "3.9"
[pipenv]
allow_prereleases = true

View File

@@ -1,10 +1,13 @@
# vk-url-scraper # vk-url-scraper
Python library to scrape data, and especially media links like videos and photos, from vk.com URLs. Python library to scrape data, and especially media links like videos and photos, from vk.com URLs.
[![PyPI version](https://badge.fury.io/py/vk-url-scraper.svg)](https://badge.fury.io/py/vk-url-scraper) [![PyPI version](https://badge.fury.io/py/vk-url-scraper.svg)](https://badge.fury.io/py/vk-url-scraper)
[![PyPI download month](https://img.shields.io/pypi/dm/vk-url-scraper.svg)](https://pypi.python.org/pypi/vk-url-scraper/) [![PyPI download month](https://img.shields.io/pypi/dm/vk-url-scraper.svg)](https://pypi.python.org/pypi/vk-url-scraper/)
[![Documentation Status](https://readthedocs.org/projects/vk-url-scraper/badge/?version=latest)](https://vk-url-scraper.readthedocs.io/en/latest/?badge=latest)
You can use it via the [command line](#command-line-usage) or as a [python library](#python-library-usage).
You can use it via the [command line](#command-line-usage) or as a [python library](#python-library-usage), check the **[documentation](https://vk-url-scraper.readthedocs.io/en/latest/)**.
## Installation ## Installation
You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`. You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`.
@@ -25,6 +28,7 @@ vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall1
# you can pass a token as well to avoid always authenticating # you can pass a token as well to avoid always authenticating
# and possibly getting captch prompts # and possibly getting captch prompts
# you can fetch the token from the bk_config.v2.json file generated under by searching for "access_token"
vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789 vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789
# save the JSON output into a file # save the JSON output into a file
@@ -96,6 +100,7 @@ To test the command line interface available in [__main__.py](__vk_url_scraper/_
2. run `./scripts/release.sh` to create a tag and push, alternatively 2. run `./scripts/release.sh` to create a tag and push, alternatively
1. `git tag vx.y.z` to tag version 1. `git tag vx.y.z` to tag version
2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/) 2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)
3. go to https://readthedocs.org/ to deploy new docs version (if webhook is not setup)
### Fixing a failed release ### Fixing a failed release

View File

@@ -14,6 +14,18 @@ def test_login_fail():
VkScraper("invalid", "combination") VkScraper("invalid", "combination")
def test_login_custom_file():
session_filename = "test-session.json"
VkScraper(
os.environ["VK_USERNAME"],
os.environ["VK_PASSWORD"],
os.environ.get("VK_TOKEN"),
session_file=session_filename,
)
assert os.path.isfile(session_filename)
os.unlink(session_filename)
def test_login_success(): def test_login_success():
global vks global vks
vks = VkScraper( vks = VkScraper(

View File

@@ -40,7 +40,12 @@ class VkScraper:
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)") VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)")
def __init__( def __init__(
self, username: str, password: str, token: str = None, captcha_handler=captcha_handler self,
username: str,
password: str,
token: str = None,
session_file="vk_config.v2.json",
captcha_handler=captcha_handler,
) -> None: ) -> None:
"""Initializes the scraper. """Initializes the scraper.
@@ -55,9 +60,17 @@ class VkScraper:
Matching password on vk.com Matching password on vk.com
token : str token : str
Access token received after authenticating, can be found in the vl_config.v2.json file Access token received after authenticating, can be found in the vl_config.v2.json file
session_file : str
File name where the VK session is saved so future logins are easier
captcha_handler : func
Function that can receive a vk_api captcha instance and help the user solve it, default is a complete CLI handler
""" """
self.session = vk_api.VkApi( self.session = vk_api.VkApi(
username, password, token=token, captcha_handler=captcha_handler username,
password,
token=token,
config_filename=session_file,
captcha_handler=captcha_handler,
) )
if token is None or len(token) == 0: if token is None or len(token) == 0:
self.session.auth(token_only=True) self.session.auth(token_only=True)