mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
Remove MAX_POSTS, auto detect MIME type
Co-authored-by: Tristan Lee <tristan@bellingcat.com>
This commit is contained in:
@@ -4,8 +4,6 @@ import cisticola.scraper.base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from loguru import logger
|
||||
|
||||
MAX_POSTS = 10
|
||||
|
||||
|
||||
class ScraperController:
|
||||
"""Registers scrapers, uses them to generate ScraperResults. Synchronizes
|
||||
@@ -49,8 +47,6 @@ class ScraperController:
|
||||
for post in posts:
|
||||
session.add(post)
|
||||
added += 1
|
||||
if added >= MAX_POSTS:
|
||||
break
|
||||
|
||||
session.commit()
|
||||
logger.info(
|
||||
|
||||
@@ -38,6 +38,8 @@ class Scraper:
|
||||
return url
|
||||
|
||||
blob = r.content
|
||||
|
||||
content_type = r.headers.get('Content-Type')
|
||||
|
||||
if key is None:
|
||||
key = url.split('/')[-1]
|
||||
@@ -46,7 +48,7 @@ class Scraper:
|
||||
filename = self.__version__.replace(' ', '_') + '/' + key
|
||||
|
||||
self.s3_client.upload_fileobj(BytesIO(blob), Bucket=os.getenv(
|
||||
'DO_BUCKET'), Key=filename, ExtraArgs={'ACL': 'public-read', 'ContentType': 'image/jpeg'})
|
||||
'DO_BUCKET'), Key=filename, ExtraArgs={'ACL': 'public-read', 'ContentType': content_type})
|
||||
|
||||
archived_url = os.getenv('DO_URL') + '/' + filename
|
||||
|
||||
|
||||
Reference in New Issue
Block a user