token functionality

This commit is contained in:
msramalho
2022-06-21 14:23:54 +02:00
parent 90b72b6d22
commit 80b43f7c95
6 changed files with 24 additions and 35 deletions

View File

@@ -1,3 +1,3 @@
VK_USERNAME="your username" VK_USERNAME="your username"
VK_PASSWORD="your password" VK_PASSWORD="your password"
CAPTCHA_HANDLE_URL="url to a place you control and can updated with the captcha value during development for where to get the captcha" VK_TOKEN="optional token"

View File

@@ -21,7 +21,6 @@ env:
VK_USERNAME: ${{ secrets.VK_USERNAME }} VK_USERNAME: ${{ secrets.VK_USERNAME }}
VK_PASSWORD: ${{ secrets.VK_PASSWORD }} VK_PASSWORD: ${{ secrets.VK_PASSWORD }}
VK_TOKEN: ${{ secrets.VK_TOKEN }} VK_TOKEN: ${{ secrets.VK_TOKEN }}
CAPTCHA_HANDLE_URL: ${{ secrets.CAPTCHA_HANDLE_URL }}
jobs: jobs:
checks: checks:
@@ -33,10 +32,10 @@ jobs:
matrix: matrix:
# python: ['3.7', '3.10'] # python: ['3.7', '3.10']
python: ['3.10'] python: ['3.10']
task: # --show-capture=no on purpose task: # --show-capture=no on purpose
- name: Test - name: Test
run: | run: |
pytest -s --color=yes tests/ pytest --show-capture=no --color=yes tests/
include: include:
- python: '3.10' - python: '3.10'

View File

@@ -17,7 +17,9 @@ vks = None
def test_login_success(): def test_login_success():
global vks global vks
vks = VkScraper(os.environ["VK_USERNAME"], os.environ["VK_PASSWORD"], os.environ.get("VK_TOKEN")) vks = VkScraper(
os.environ["VK_USERNAME"], os.environ["VK_PASSWORD"], os.environ.get("VK_TOKEN")
)
def test_scrape_empty_urll(): def test_scrape_empty_urll():

View File

@@ -29,6 +29,14 @@ def get_argument_parser():
required=True, required=True,
help="password for the valid vk.com account", help="password for the valid vk.com account",
) )
parser.add_argument(
"-t",
"--token",
action="store",
dest="token",
required=False,
help="optional token, when passed authentication will not be performed - good to avoid captcha issues",
)
parser.add_argument( parser.add_argument(
"-d", "-d",
"--download", "--download",
@@ -50,7 +58,7 @@ def get_argument_parser():
def main(): def main():
parser = get_argument_parser() parser = get_argument_parser()
args = parser.parse_args() args = parser.parse_args()
vks = VkScraper(args.username, args.password) vks = VkScraper(args.username, args.password, args.token)
text = " ".join(args.urls) text = " ".join(args.urls)
res = vks.scrape(text) res = vks.scrape(text)
res_json = json.dumps(res, ensure_ascii=False, indent=4, cls=DateTimeEncoder) res_json = json.dumps(res, ensure_ascii=False, indent=4, cls=DateTimeEncoder)

View File

@@ -38,7 +38,9 @@ class VkScraper:
PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)") PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)")
VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)") VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)")
def __init__(self, username: str, password: str, token: str = None, captcha_handler=captcha_handler) -> None: def __init__(
self, username: str, password: str, token: str = None, captcha_handler=captcha_handler
) -> None:
"""Initializes the scraper. """Initializes the scraper.
This function receives a username and password (or access token) and performs This function receives a username and password (or access token) and performs
@@ -53,7 +55,9 @@ class VkScraper:
token : str token : str
Access token received after authenticating, can be found in the vl_config.v2.json file Access token received after authenticating, can be found in the vl_config.v2.json file
""" """
self.session = vk_api.VkApi(username, password, token=token, captcha_handler=captcha_handler) self.session = vk_api.VkApi(
username, password, token=token, captcha_handler=captcha_handler
)
if token is None or len(token) == 0: if token is None or len(token) == 0:
self.session.auth(token_only=True) self.session.auth(token_only=True)

View File

@@ -1,11 +1,7 @@
import json import json
import os import os
import re
import time
from datetime import datetime from datetime import datetime
import requests
class DateTimeEncoder(json.JSONEncoder): class DateTimeEncoder(json.JSONEncoder):
# to allow json.dump with datetimes do json.dumps(obj, cls=DateTimeEncoder) # to allow json.dump with datetimes do json.dumps(obj, cls=DateTimeEncoder)
@@ -22,28 +18,8 @@ def mkdir_if_not_exists(folder):
def captcha_handler(captcha): def captcha_handler(captcha):
print( print(
f"""CAPTCHA DETECTED, please solve it and put the solution into the webpage specified in the 'CAPTCHA_HANDLE_URL' env variable in the next 10min. Put the answer in the format "{captcha.sid}=SOLUTION". f"CAPTCHA DETECTED, please solve it and input the solution. {captcha.sid=} {captcha.get_url()=}",
{captcha.sid=}
{captcha.get_url()=}
""",
flush=True, flush=True,
) )
if "CAPTCHA_HANDLE_URL" in os.environ: key = input(f"Enter captcha code for {captcha.get_url()}:").strip()
url = os.environ["CAPTCHA_HANDLE_URL"] return captcha.try_again(key)
regex_string = re.compile(f"{captcha.sid}=(.*)")
for wait in (10 * 6) * [10]: # tries every 10s for 10min
print(f"sending request to {url=}", flush=True)
r = requests.get(url)
if r.status_code == 200:
print(f"got response {r.text=}", flush=True)
if key := regex_string.search(r.text):
print(f"got captcha result {key=} {key[1]=}", flush=True)
return captcha.try_again(key[1])
print(f"sleeping {wait} seconds", flush=True)
time.sleep(wait)
else:
key = input(f"Enter captcha code for {captcha.get_url()}:").strip()
return captcha.try_again(key)
return False