diff --git a/snscrape/base.py b/snscrape/base.py index 9be17d8..80b0045 100644 --- a/snscrape/base.py +++ b/snscrape/base.py @@ -6,6 +6,8 @@ import functools import json import logging import requests +import requests.adapters +import urllib3.connection import time import warnings @@ -130,6 +132,33 @@ class URLItem(Item): return self._url +class _HTTPSAdapter(requests.adapters.HTTPAdapter): + def init_poolmanager(self, *args, **kwargs): + super().init_poolmanager(*args, **kwargs) + #FIXME: Uses private urllib3.PoolManager attribute pool_classes_by_scheme. + try: + self.poolmanager.pool_classes_by_scheme['https'].ConnectionCls = _HTTPSConnection + except (AttributeError, KeyError) as e: + logger.debug(f'Could not install TLS cipher logger: {type(e).__module__}.{type(e).__name__} {e!s}') + + +class _HTTPSConnection(urllib3.connection.HTTPSConnection): + def connect(self, *args, **kwargs): + conn = super().connect(*args, **kwargs) + #FIXME: Uses undocumented attribute self.sock and beyond. + try: + logger.debug(f'Connected to: {self.sock.getpeername()}') + except AttributeError: + # self.sock might be a urllib3.util.ssltransport.SSLTransport, which lacks getpeername. + pass + try: + logger.debug(f'Connection cipher: {self.sock.cipher()}') + except AttributeError: + # Shouldn't be possible, but better safe than sorry. + pass + return conn + + class ScraperException(Exception): pass @@ -143,6 +172,7 @@ class Scraper: self._retries = retries self._proxies = proxies self._session = requests.Session() + self._session.mount('https://', _HTTPSAdapter()) @abc.abstractmethod def get_items(self): diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 3d4df08..1f57887 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -635,11 +635,11 @@ class _CLIGuestTokenManager(GuestTokenManager): pass -class _TwitterTLSAdapter(requests.adapters.HTTPAdapter): +class _TwitterTLSAdapter(snscrape.base._HTTPSAdapter): def init_poolmanager(self, *args, **kwargs): #FIXME: When urllib3 2.0.0 is out and can be required, this should use urllib3.util.create_urllib3_context instead of the private, undocumented ssl_ module. kwargs['ssl_context'] = urllib3.util.ssl_.create_urllib3_context(ciphers = _CIPHERS_CHROME) - return super().init_poolmanager(*args, **kwargs) + super().init_poolmanager(*args, **kwargs) class _TwitterAPIType(enum.Enum):