mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-07 19:08:30 +03:00
improves antibot flow and makes auth_wall detection optional
This commit is contained in:
@@ -129,15 +129,33 @@ class TestAntibotExtractorEnricher(TestExtractorBase):
|
||||
),
|
||||
(
|
||||
"https://seleniumbase.io/apps/turnstile",
|
||||
'id="captcha-success"',
|
||||
'<img id="captcha-success" src="https://seleniumbase.io/cdn/img/green_check.png" style="" width="180">',
|
||||
),
|
||||
(
|
||||
"https://seleniumbase.io/apps/form_turnstile",
|
||||
'<img id="captcha-success" src="https://seleniumbase.io/cdn/img/green_check.png" width="120" style="">',
|
||||
),
|
||||
(
|
||||
"https://gitlab.com/users/sign_in",
|
||||
"Password",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_download_with_cloudflare_turnstile(self, setup_module, make_item, url, in_html):
|
||||
def test_overcome_cloudflare_turnstile(self, setup_module, make_item, url, in_html):
|
||||
"""
|
||||
Test downloading a page with Cloudflare Turnstile captcha.
|
||||
"""
|
||||
|
||||
self.extractor = setup_module(
|
||||
self.extractor_module,
|
||||
{
|
||||
"save_to_pdf": True,
|
||||
"detect_auth_wall": False,
|
||||
"max_download_images": 5,
|
||||
"max_download_videos": "inf",
|
||||
},
|
||||
)
|
||||
|
||||
item = make_item(url)
|
||||
self.extractor.enrich(item)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user