mirror of
https://github.com/bellingcat/vk-url-scraper.git
synced 2026-06-07 19:08:38 +03:00
adds session_file name customization
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,6 +1,7 @@
|
||||
.env
|
||||
vk_config.v2.json
|
||||
output/
|
||||
tmp*/
|
||||
# build artifacts
|
||||
|
||||
.eggs/
|
||||
|
||||
3
Pipfile
3
Pipfile
@@ -26,3 +26,6 @@ python-dotenv = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
||||
|
||||
[pipenv]
|
||||
allow_prereleases = true
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
# vk-url-scraper
|
||||
Python library to scrape data, and especially media links like videos and photos, from vk.com URLs.
|
||||
|
||||
|
||||
[](https://badge.fury.io/py/vk-url-scraper)
|
||||
[](https://pypi.python.org/pypi/vk-url-scraper/)
|
||||
[](https://vk-url-scraper.readthedocs.io/en/latest/?badge=latest)
|
||||
|
||||
You can use it via the [command line](#command-line-usage) or as a [python library](#python-library-usage).
|
||||
|
||||
You can use it via the [command line](#command-line-usage) or as a [python library](#python-library-usage), check the **[documentation](https://vk-url-scraper.readthedocs.io/en/latest/)**.
|
||||
|
||||
## Installation
|
||||
You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`.
|
||||
@@ -25,6 +28,7 @@ vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall1
|
||||
|
||||
# you can pass a token as well to avoid always authenticating
|
||||
# and possibly getting captch prompts
|
||||
# you can fetch the token from the bk_config.v2.json file generated under by searching for "access_token"
|
||||
vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789
|
||||
|
||||
# save the JSON output into a file
|
||||
@@ -96,6 +100,7 @@ To test the command line interface available in [__main__.py](__vk_url_scraper/_
|
||||
2. run `./scripts/release.sh` to create a tag and push, alternatively
|
||||
1. `git tag vx.y.z` to tag version
|
||||
2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)
|
||||
3. go to https://readthedocs.org/ to deploy new docs version (if webhook is not setup)
|
||||
|
||||
### Fixing a failed release
|
||||
|
||||
|
||||
@@ -14,6 +14,18 @@ def test_login_fail():
|
||||
VkScraper("invalid", "combination")
|
||||
|
||||
|
||||
def test_login_custom_file():
|
||||
session_filename = "test-session.json"
|
||||
VkScraper(
|
||||
os.environ["VK_USERNAME"],
|
||||
os.environ["VK_PASSWORD"],
|
||||
os.environ.get("VK_TOKEN"),
|
||||
session_file=session_filename,
|
||||
)
|
||||
assert os.path.isfile(session_filename)
|
||||
os.unlink(session_filename)
|
||||
|
||||
|
||||
def test_login_success():
|
||||
global vks
|
||||
vks = VkScraper(
|
||||
|
||||
@@ -40,7 +40,12 @@ class VkScraper:
|
||||
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)")
|
||||
|
||||
def __init__(
|
||||
self, username: str, password: str, token: str = None, captcha_handler=captcha_handler
|
||||
self,
|
||||
username: str,
|
||||
password: str,
|
||||
token: str = None,
|
||||
session_file="vk_config.v2.json",
|
||||
captcha_handler=captcha_handler,
|
||||
) -> None:
|
||||
"""Initializes the scraper.
|
||||
|
||||
@@ -55,9 +60,17 @@ class VkScraper:
|
||||
Matching password on vk.com
|
||||
token : str
|
||||
Access token received after authenticating, can be found in the vl_config.v2.json file
|
||||
session_file : str
|
||||
File name where the VK session is saved so future logins are easier
|
||||
captcha_handler : func
|
||||
Function that can receive a vk_api captcha instance and help the user solve it, default is a complete CLI handler
|
||||
"""
|
||||
self.session = vk_api.VkApi(
|
||||
username, password, token=token, captcha_handler=captcha_handler
|
||||
username,
|
||||
password,
|
||||
token=token,
|
||||
config_filename=session_file,
|
||||
captcha_handler=captcha_handler,
|
||||
)
|
||||
if token is None or len(token) == 0:
|
||||
self.session.auth(token_only=True)
|
||||
|
||||
Reference in New Issue
Block a user