changed URL parsing to use urllib

This commit is contained in:
Tristan Lee
2022-03-01 14:13:04 -06:00
parent ee4d64750b
commit f3d9dc91c6
4 changed files with 10 additions and 8 deletions

View File

@@ -4,6 +4,7 @@ import requests
import os
import boto3
from io import BytesIO
from urllib.parse import urlparse
from loguru import logger
class Scraper:
@@ -28,8 +29,7 @@ class Scraper:
return self.__version__
def url_to_key(self, url: str, content_type: str) -> str:
key = url.split('/')[-1]
key = key.split('?')[0]
key = urlparse(url).path.split('/')[-1]
return key
def url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]: