mirror of
https://github.com/bellingcat/vk-url-scraper.git
synced 2026-06-13 05:48:37 +03:00
captch fix 2
This commit is contained in:
@@ -2,16 +2,17 @@ import datetime
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from vk_url_scraper import VkScraper
|
from vk_url_scraper import VkScraper
|
||||||
|
|
||||||
|
# import pytest
|
||||||
|
|
||||||
|
|
||||||
vks = None
|
vks = None
|
||||||
|
|
||||||
|
|
||||||
def test_login_fail():
|
# def test_login_fail():
|
||||||
with pytest.raises(Exception):
|
# with pytest.raises(Exception):
|
||||||
VkScraper("invalid", "combination")
|
# VkScraper("invalid", "combination")
|
||||||
|
|
||||||
|
|
||||||
def test_login_success():
|
def test_login_success():
|
||||||
|
|||||||
@@ -22,27 +22,27 @@ def mkdir_if_not_exists(folder):
|
|||||||
|
|
||||||
def captcha_handler(captcha):
|
def captcha_handler(captcha):
|
||||||
print(
|
print(
|
||||||
f"""CAPTCHA DETECTED, please solve it and put the solution into the webpage specified in the 'CAPTCHA_HANDLE_URL' env variable in the next 60s. Put the answer in the format "{captcha.sid}=SOLUTION".
|
f"""CAPTCHA DETECTED, please solve it and put the solution into the webpage specified in the 'CAPTCHA_HANDLE_URL' env variable in the next 2min. Put the answer in the format "{captcha.sid}=SOLUTION".
|
||||||
|
|
||||||
{captcha.sid=}
|
{captcha.sid=}
|
||||||
{captcha.get_url()=}
|
{captcha.get_url()=}
|
||||||
{captcha.get_image()=}
|
""",
|
||||||
"""
|
flush=True,
|
||||||
)
|
)
|
||||||
if "CAPTCHA_HANDLE_URL" in os.environ:
|
if "CAPTCHA_HANDLE_URL" in os.environ:
|
||||||
url = os.environ["CAPTCHA_HANDLE_URL"]
|
url = os.environ["CAPTCHA_HANDLE_URL"]
|
||||||
regex_string = re.compile(f"{captcha.sid}=(.*)")
|
regex_string = re.compile(f"{captcha.sid}=(.*)")
|
||||||
for wait in 24 * [5]: # tries every 5s for 2min
|
for wait in 24 * [5]: # tries every 5s for 2min
|
||||||
print(f"sending request to {url=}")
|
print(f"sending request to {url=}", flush=True)
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
print(f"got response {r.text=}")
|
print(f"got response {r.text=}", flush=True)
|
||||||
if key := regex_string.search(r.text):
|
if key := regex_string.search(r.text):
|
||||||
print(f"got captcha result {key=}")
|
print(f"got captcha result {key=}", flush=True)
|
||||||
return captcha.try_again(key[0])
|
return captcha.try_again(key[0])
|
||||||
print(f"sleeping {wait} seconds")
|
print(f"sleeping {wait} seconds", flush=True)
|
||||||
time.sleep(wait)
|
time.sleep(wait)
|
||||||
else:
|
else:
|
||||||
key = input("Enter captcha code {0}: ".format(captcha.get_url())).strip()
|
key = input(f"Enter captcha code for {captcha.get_url()}:").strip()
|
||||||
return captcha.try_again(key[0])
|
return captcha.try_again(key[0])
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|||||||
Reference in New Issue
Block a user