Remove MAX_POSTS, auto detect MIME type

Co-authored-by: Tristan Lee <tristan@bellingcat.com>
This commit is contained in:
Logan Williams
2022-02-25 08:52:42 +01:00
committed by GitHub
parent e6085689b5
commit 8ab56ff5ba
2 changed files with 3 additions and 5 deletions

View File

@@ -4,8 +4,6 @@ import cisticola.scraper.base
from sqlalchemy.orm import sessionmaker
from loguru import logger
MAX_POSTS = 10
class ScraperController:
"""Registers scrapers, uses them to generate ScraperResults. Synchronizes
@@ -49,8 +47,6 @@ class ScraperController:
for post in posts:
session.add(post)
added += 1
if added >= MAX_POSTS:
break
session.commit()
logger.info(

View File

@@ -38,6 +38,8 @@ class Scraper:
return url
blob = r.content
content_type = r.headers.get('Content-Type')
if key is None:
key = url.split('/')[-1]
@@ -46,7 +48,7 @@ class Scraper:
filename = self.__version__.replace(' ', '_') + '/' + key
self.s3_client.upload_fileobj(BytesIO(blob), Bucket=os.getenv(
'DO_BUCKET'), Key=filename, ExtraArgs={'ACL': 'public-read', 'ContentType': 'image/jpeg'})
'DO_BUCKET'), Key=filename, ExtraArgs={'ACL': 'public-read', 'ContentType': content_type})
archived_url = os.getenv('DO_URL') + '/' + filename