diff --git a/cisticola/__init__.py b/cisticola/__init__.py index 1960467..2b873cc 100644 --- a/cisticola/__init__.py +++ b/cisticola/__init__.py @@ -4,8 +4,6 @@ import cisticola.scraper.base from sqlalchemy.orm import sessionmaker from loguru import logger -MAX_POSTS = 10 - class ScraperController: """Registers scrapers, uses them to generate ScraperResults. Synchronizes @@ -49,8 +47,6 @@ class ScraperController: for post in posts: session.add(post) added += 1 - if added >= MAX_POSTS: - break session.commit() logger.info( diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 4d31370..a41a0b2 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -38,6 +38,8 @@ class Scraper: return url blob = r.content + + content_type = r.headers.get('Content-Type') if key is None: key = url.split('/')[-1] @@ -46,7 +48,7 @@ class Scraper: filename = self.__version__.replace(' ', '_') + '/' + key self.s3_client.upload_fileobj(BytesIO(blob), Bucket=os.getenv( - 'DO_BUCKET'), Key=filename, ExtraArgs={'ACL': 'public-read', 'ContentType': 'image/jpeg'}) + 'DO_BUCKET'), Key=filename, ExtraArgs={'ACL': 'public-read', 'ContentType': content_type}) archived_url = os.getenv('DO_URL') + '/' + filename