mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-08 03:28:35 +03:00
improves example files
This commit is contained in:
35
.env.example
Normal file
35
.env.example
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# main settings
|
||||||
|
USER_GROUPS_FILENAME=app/user-groups.yaml
|
||||||
|
# database
|
||||||
|
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
|
||||||
|
DATABASE_QUERY_LIMIT=100
|
||||||
|
|
||||||
|
# security settings
|
||||||
|
API_BEARER_TOKEN=TODO-MODIFY-THIS-API-TOKEN
|
||||||
|
ALLOWED_ORIGINS='["http://localhost:8000","http://localhost:8004","http://localhost:8081","https://auto-archiver.bellingcat.com"]'
|
||||||
|
CHROME_APP_IDS='[PROJECT_ID.apps.googleusercontent.com"]'
|
||||||
|
BLOCKED_EMAILS='[]'
|
||||||
|
# redis configuration
|
||||||
|
REDIS_PASSWORD=TODO-MODIFY-THIS-REDIS-PASSWORD
|
||||||
|
REDIS_HOSTNAME="localhost"
|
||||||
|
|
||||||
|
# cronjobs management, enable as needed
|
||||||
|
CRON_ARCHIVE_SHEETS=true
|
||||||
|
CRON_DELETE_STALE_SHEETS=true
|
||||||
|
DELETE_STALE_SHEETS_DAYS=7
|
||||||
|
CRON_DELETE_SCHEDULED_ARCHIVES=false
|
||||||
|
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS=14
|
||||||
|
|
||||||
|
# observability for prometheus
|
||||||
|
REPEAT_COUNT_METRICS_SECONDS=30
|
||||||
|
|
||||||
|
# mail service settings, if you want to email users
|
||||||
|
MAIL_FROM="noreply@auto-archiver.com"
|
||||||
|
MAIL_FROM_NAME="My Auto Archiver deployment"
|
||||||
|
MAIL_USERNAME="USERNAME"
|
||||||
|
MAIL_PASSWORD="PASSWORD"
|
||||||
|
MAIL_SERVER="mail.server.com"
|
||||||
|
MAIL_PORT=587
|
||||||
|
MAIL_STARTTLS=False
|
||||||
|
MAIL_SSL_TLS=True
|
||||||
|
|
||||||
@@ -5,5 +5,4 @@ BLOCKED_EMAILS='["blocked@example.com"]'
|
|||||||
|
|
||||||
DATABASE_PATH="sqlite:///auto-archiver.test.db"
|
DATABASE_PATH="sqlite:///auto-archiver.test.db"
|
||||||
API_BEARER_TOKEN=this_is_the_test_api_token
|
API_BEARER_TOKEN=this_is_the_test_api_token
|
||||||
USER_GROUPS_FILENAME=app/tests/user-groups.test.yaml
|
USER_GROUPS_FILENAME=app/tests/user-groups.test.yaml
|
||||||
SHEET_ORCHESTRATION_YAML=app/tests/orchestration.test.yaml
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
REDIS_PASSWORD=TODO
|
|
||||||
|
|
||||||
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
|
|
||||||
USER_GROUPS_FILENAME=app/user-groups.yaml
|
|
||||||
CHROME_APP_IDS=000000000000000000000000000000000000000000000.apps.googleusercontent.com,000000000000000000000000000000000000000000001.apps.googleusercontent.com
|
|
||||||
#ALLOWED_ORIGINS="http://localhost:8004" # dev only
|
|
||||||
|
|
||||||
|
|
||||||
API_BEARER_TOKEN=TODO
|
|
||||||
@@ -13,16 +13,7 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
# general
|
# general
|
||||||
SERVE_LOCAL_ARCHIVE: str = ""
|
SERVE_LOCAL_ARCHIVE: str = ""
|
||||||
USER_GROUPS_FILENAME: str = "user-groups.yaml"
|
USER_GROUPS_FILENAME: str = "app/user-groups.yaml"
|
||||||
SHEET_ORCHESTRATION_YAML : str = "secrets/orchestration-sheet.yaml"
|
|
||||||
|
|
||||||
# cronjobs
|
|
||||||
#TODO: disable by default?
|
|
||||||
CRON_ARCHIVE_SHEETS: bool = False
|
|
||||||
CRON_DELETE_STALE_SHEETS: bool = True
|
|
||||||
DELETE_STALE_SHEETS_DAYS: int = 14
|
|
||||||
CRON_DELETE_SCHEDULED_ARCHIVES: bool = True
|
|
||||||
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS: int = 14
|
|
||||||
|
|
||||||
# database
|
# database
|
||||||
DATABASE_PATH: str
|
DATABASE_PATH: str
|
||||||
@@ -31,26 +22,32 @@ class Settings(BaseSettings):
|
|||||||
def ASYNC_DATABASE_PATH(self) -> str:
|
def ASYNC_DATABASE_PATH(self) -> str:
|
||||||
return self.DATABASE_PATH.replace("sqlite://", "sqlite+aiosqlite://")
|
return self.DATABASE_PATH.replace("sqlite://", "sqlite+aiosqlite://")
|
||||||
|
|
||||||
|
# security
|
||||||
|
API_BEARER_TOKEN: Annotated[str, Len(min_length=20)]
|
||||||
|
ALLOWED_ORIGINS: Annotated[Set[str], Len(min_length=1)]
|
||||||
|
CHROME_APP_IDS: Annotated[Set[Annotated[str, Len(min_length=10)]], Len(min_length=1)]
|
||||||
|
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
|
||||||
|
|
||||||
# redis
|
# redis
|
||||||
REDIS_PASSWORD: str = ""
|
REDIS_PASSWORD: str = ""
|
||||||
REDIS_HOSTNAME: str = "localhost"
|
REDIS_HOSTNAME: str = "localhost"
|
||||||
|
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
|
||||||
@property
|
@property
|
||||||
def CELERY_BROKER_URL(self)-> str:
|
def CELERY_BROKER_URL(self)-> str:
|
||||||
if self.REDIS_PASSWORD:
|
if self.REDIS_PASSWORD:
|
||||||
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
|
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
|
||||||
return f"redis://{self.REDIS_HOSTNAME}:6379"
|
return f"redis://{self.REDIS_HOSTNAME}:6379"
|
||||||
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
|
|
||||||
|
# cronjobs
|
||||||
|
CRON_ARCHIVE_SHEETS: bool = False
|
||||||
|
CRON_DELETE_STALE_SHEETS: bool = False
|
||||||
|
DELETE_STALE_SHEETS_DAYS: int = 14
|
||||||
|
CRON_DELETE_SCHEDULED_ARCHIVES: bool = False
|
||||||
|
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS: int = 14
|
||||||
|
|
||||||
# observability
|
# observability
|
||||||
REPEAT_COUNT_METRICS_SECONDS: int = 30
|
REPEAT_COUNT_METRICS_SECONDS: int = 30
|
||||||
|
|
||||||
# security
|
|
||||||
API_BEARER_TOKEN: Annotated[str, Len(min_length=20)]
|
|
||||||
ALLOWED_ORIGINS: Annotated[Set[str], Len(min_length=1)]
|
|
||||||
CHROME_APP_IDS: Annotated[Set[Annotated[str, Len(min_length=10)]], Len(min_length=1)]
|
|
||||||
#TODO: deprecate blocklist?
|
|
||||||
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
|
|
||||||
|
|
||||||
# email configuration, if needed
|
# email configuration, if needed
|
||||||
MAIL_FROM: str = "noreply@bellingcat.com"
|
MAIL_FROM: str = "noreply@bellingcat.com"
|
||||||
MAIL_FROM_NAME: str = "Bellingcat's Auto Archiver"
|
MAIL_FROM_NAME: str = "Bellingcat's Auto Archiver"
|
||||||
|
|||||||
62
app/user-groups.example.yaml
Normal file
62
app/user-groups.example.yaml
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# NOTE: all emails should be lower-cased
|
||||||
|
users:
|
||||||
|
user01@example.com:
|
||||||
|
- group1
|
||||||
|
user02@example.com:
|
||||||
|
- group2
|
||||||
|
user03@example.com:
|
||||||
|
- group1
|
||||||
|
- group2
|
||||||
|
|
||||||
|
domains:
|
||||||
|
example.com:
|
||||||
|
- group-for-friends
|
||||||
|
gmail-example.com:
|
||||||
|
- group1
|
||||||
|
|
||||||
|
|
||||||
|
groups:
|
||||||
|
group1:
|
||||||
|
description: "Group 1 which can do everything, no limits"
|
||||||
|
orchestrator: secrets/orchestration.group1.yaml
|
||||||
|
orchestrator_sheet: secrets/orchestration.group1-sheet.yaml
|
||||||
|
permissions:
|
||||||
|
read: ["all"]
|
||||||
|
archive_url: true
|
||||||
|
archive_sheet: true
|
||||||
|
sheet_frequency: ["hourly", "daily"]
|
||||||
|
max_sheets: -1
|
||||||
|
max_archive_lifespan_months: -1
|
||||||
|
max_monthly_urls: -1
|
||||||
|
max_monthly_mbs: -1
|
||||||
|
manually_trigger_sheet: true
|
||||||
|
group2:
|
||||||
|
description: "Group that can only archive URLs, not sheets, they can search their own group and group-for-friends archives."
|
||||||
|
orchestrator: secrets/orchestration.group2.yaml
|
||||||
|
orchestrator_sheet: secrets/orchestration-group2-sheet.yaml
|
||||||
|
permissions:
|
||||||
|
read: ["group2", "group-for-friends"]
|
||||||
|
archive_url: true
|
||||||
|
max_archive_lifespan_months: 12
|
||||||
|
max_monthly_urls: 100
|
||||||
|
max_monthly_mbs: 1000
|
||||||
|
group-for-friends:
|
||||||
|
description: "Friends can have one sheet only which archives once a day"
|
||||||
|
orchestrator: secrets/orchestration.friends.yaml
|
||||||
|
orchestrator_sheet: secrets/orchestration.friends-sheet.yaml
|
||||||
|
permissions:
|
||||||
|
read: ["friends-1"]
|
||||||
|
archive_sheet: true
|
||||||
|
sheet_frequency: ["daily"]
|
||||||
|
max_sheets: 1
|
||||||
|
max_archive_lifespan_months: 12
|
||||||
|
max_monthly_urls: 1000
|
||||||
|
max_monthly_mbs: 1000
|
||||||
|
default:
|
||||||
|
description: "Public access, can only search public archives"
|
||||||
|
orchestrator: secrets/orchestration-default.yaml
|
||||||
|
orchestrator_sheet: secrets/orchestration-default.yaml
|
||||||
|
permissions:
|
||||||
|
read: ["default"]
|
||||||
|
read_public: true
|
||||||
|
|
||||||
@@ -58,7 +58,7 @@ def app_factory(settings = get_settings()):
|
|||||||
# prometheus exposed in /metrics with authentication
|
# prometheus exposed in /metrics with authentication
|
||||||
Instrumentator(should_group_status_codes=False, excluded_handlers=["/metrics", "/health", "/openapi.json", "/favicon.ico"]).instrument(app).expose(app, dependencies=[Depends(token_api_key_auth)])
|
Instrumentator(should_group_status_codes=False, excluded_handlers=["/metrics", "/health", "/openapi.json", "/favicon.ico"]).instrument(app).expose(app, dependencies=[Depends(token_api_key_auth)])
|
||||||
|
|
||||||
# TODO: recheck this for security, currently only needed for when local_storage is used
|
# TODO: recheck this for security, currently only needed for when local_storage is used in development
|
||||||
local_dir = settings.SERVE_LOCAL_ARCHIVE
|
local_dir = settings.SERVE_LOCAL_ARCHIVE
|
||||||
if not os.path.isdir(local_dir) and os.path.isdir(local_dir.replace("/app", ".")):
|
if not os.path.isdir(local_dir) and os.path.isdir(local_dir.replace("/app", ".")):
|
||||||
local_dir = local_dir.replace("/app", ".")
|
local_dir = local_dir.replace("/app", ".")
|
||||||
|
|||||||
0
database/.gitkeep
Normal file
0
database/.gitkeep
Normal file
@@ -18,7 +18,7 @@ services:
|
|||||||
command: uvicorn app.web:app --factory --host 0.0.0.0
|
command: uvicorn app.web:app --factory --host 0.0.0.0
|
||||||
volumes:
|
volumes:
|
||||||
- ./logs:/aa-api/logs
|
- ./logs:/aa-api/logs
|
||||||
- ./app/database:/aa-api/database
|
- ./database:/aa-api/database
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@@ -36,7 +36,7 @@ services:
|
|||||||
command: celery --app=app.worker.main.celery worker --loglevel=warning --logfile=/aa-api/logs/celery.log
|
command: celery --app=app.worker.main.celery worker --loglevel=warning --logfile=/aa-api/logs/celery.log
|
||||||
volumes:
|
volumes:
|
||||||
- ./logs:/aa-api/logs
|
- ./logs:/aa-api/logs
|
||||||
- ./app/database:/aa-api/database
|
- ./database:/aa-api/database
|
||||||
- /var/run/docker.sock:/var/run/docker.sock
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
- crawls:/crawls # BROWSERTRIX_HOME_HOST:BROWSERTRIX_HOME_CONTAINER, do not change /crawls
|
- crawls:/crawls # BROWSERTRIX_HOME_HOST:BROWSERTRIX_HOME_CONTAINER, do not change /crawls
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
Reference in New Issue
Block a user