From d96e0c0a3a4341af38ee23f858e701a2444064b0 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Tue, 21 Jun 2022 14:05:33 +0200 Subject: [PATCH] captcha fix --- vk_url_scraper/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vk_url_scraper/utils.py b/vk_url_scraper/utils.py index 889bcad..36400a2 100644 --- a/vk_url_scraper/utils.py +++ b/vk_url_scraper/utils.py @@ -22,7 +22,7 @@ def mkdir_if_not_exists(folder): def captcha_handler(captcha): print( - f"""CAPTCHA DETECTED, please solve it and put the solution into the webpage specified in the 'CAPTCHA_HANDLE_URL' env variable in the next 2min. Put the answer in the format "{captcha.sid}=SOLUTION". + f"""CAPTCHA DETECTED, please solve it and put the solution into the webpage specified in the 'CAPTCHA_HANDLE_URL' env variable in the next 10min. Put the answer in the format "{captcha.sid}=SOLUTION". {captcha.sid=} {captcha.get_url()=} @@ -32,18 +32,18 @@ def captcha_handler(captcha): if "CAPTCHA_HANDLE_URL" in os.environ: url = os.environ["CAPTCHA_HANDLE_URL"] regex_string = re.compile(f"{captcha.sid}=(.*)") - for wait in 24 * [5]: # tries every 5s for 2min + for wait in (10 * 6) * [10]: # tries every 10s for 10min print(f"sending request to {url=}", flush=True) r = requests.get(url) if r.status_code == 200: print(f"got response {r.text=}", flush=True) if key := regex_string.search(r.text): - print(f"got captcha result {key=}", flush=True) + print(f"got captcha result {key=} {key[1]=}", flush=True) return captcha.try_again(key[1]) print(f"sleeping {wait} seconds", flush=True) time.sleep(wait) else: key = input(f"Enter captcha code for {captcha.get_url()}:").strip() - return captcha.try_again(key[0]) + return captcha.try_again(key) return False