added Gab scraper

This commit is contained in:
Tristan Lee
2022-02-28 12:11:21 -06:00
parent 7a257ea9f5
commit bc840e631d
6 changed files with 126 additions and 5 deletions

View File

@@ -19,6 +19,9 @@ class Scraper:
'DO_SPACES_KEY'),
aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))
self.headers = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0'}
pass
def __str__(self):
@@ -32,12 +35,13 @@ class Scraper:
def url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
n_retries = 0
r = requests.get(url)
r = requests.get(url, headers = self.headers)
while r.status_code != 200 and n_retries < 5:
logger.warning(f"{n_retries}/5: Request for {url} failed")
n_retries += 1
r = requests.get(url)
r = requests.get(url, headers = self.headers)
if r.status_code != 200:
logger.error(f"Could not fetch URL {url}")