mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-07 19:18:34 +03:00
improves example files
This commit is contained in:
35
.env.example
Normal file
35
.env.example
Normal file
@@ -0,0 +1,35 @@
|
||||
# main settings
|
||||
USER_GROUPS_FILENAME=app/user-groups.yaml
|
||||
# database
|
||||
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
|
||||
DATABASE_QUERY_LIMIT=100
|
||||
|
||||
# security settings
|
||||
API_BEARER_TOKEN=TODO-MODIFY-THIS-API-TOKEN
|
||||
ALLOWED_ORIGINS='["http://localhost:8000","http://localhost:8004","http://localhost:8081","https://auto-archiver.bellingcat.com"]'
|
||||
CHROME_APP_IDS='[PROJECT_ID.apps.googleusercontent.com"]'
|
||||
BLOCKED_EMAILS='[]'
|
||||
# redis configuration
|
||||
REDIS_PASSWORD=TODO-MODIFY-THIS-REDIS-PASSWORD
|
||||
REDIS_HOSTNAME="localhost"
|
||||
|
||||
# cronjobs management, enable as needed
|
||||
CRON_ARCHIVE_SHEETS=true
|
||||
CRON_DELETE_STALE_SHEETS=true
|
||||
DELETE_STALE_SHEETS_DAYS=7
|
||||
CRON_DELETE_SCHEDULED_ARCHIVES=false
|
||||
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS=14
|
||||
|
||||
# observability for prometheus
|
||||
REPEAT_COUNT_METRICS_SECONDS=30
|
||||
|
||||
# mail service settings, if you want to email users
|
||||
MAIL_FROM="noreply@auto-archiver.com"
|
||||
MAIL_FROM_NAME="My Auto Archiver deployment"
|
||||
MAIL_USERNAME="USERNAME"
|
||||
MAIL_PASSWORD="PASSWORD"
|
||||
MAIL_SERVER="mail.server.com"
|
||||
MAIL_PORT=587
|
||||
MAIL_STARTTLS=False
|
||||
MAIL_SSL_TLS=True
|
||||
|
||||
@@ -6,4 +6,3 @@ BLOCKED_EMAILS='["blocked@example.com"]'
|
||||
DATABASE_PATH="sqlite:///auto-archiver.test.db"
|
||||
API_BEARER_TOKEN=this_is_the_test_api_token
|
||||
USER_GROUPS_FILENAME=app/tests/user-groups.test.yaml
|
||||
SHEET_ORCHESTRATION_YAML=app/tests/orchestration.test.yaml
|
||||
@@ -1,9 +0,0 @@
|
||||
REDIS_PASSWORD=TODO
|
||||
|
||||
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
|
||||
USER_GROUPS_FILENAME=app/user-groups.yaml
|
||||
CHROME_APP_IDS=000000000000000000000000000000000000000000000.apps.googleusercontent.com,000000000000000000000000000000000000000000001.apps.googleusercontent.com
|
||||
#ALLOWED_ORIGINS="http://localhost:8004" # dev only
|
||||
|
||||
|
||||
API_BEARER_TOKEN=TODO
|
||||
@@ -13,16 +13,7 @@ class Settings(BaseSettings):
|
||||
|
||||
# general
|
||||
SERVE_LOCAL_ARCHIVE: str = ""
|
||||
USER_GROUPS_FILENAME: str = "user-groups.yaml"
|
||||
SHEET_ORCHESTRATION_YAML : str = "secrets/orchestration-sheet.yaml"
|
||||
|
||||
# cronjobs
|
||||
#TODO: disable by default?
|
||||
CRON_ARCHIVE_SHEETS: bool = False
|
||||
CRON_DELETE_STALE_SHEETS: bool = True
|
||||
DELETE_STALE_SHEETS_DAYS: int = 14
|
||||
CRON_DELETE_SCHEDULED_ARCHIVES: bool = True
|
||||
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS: int = 14
|
||||
USER_GROUPS_FILENAME: str = "app/user-groups.yaml"
|
||||
|
||||
# database
|
||||
DATABASE_PATH: str
|
||||
@@ -31,26 +22,32 @@ class Settings(BaseSettings):
|
||||
def ASYNC_DATABASE_PATH(self) -> str:
|
||||
return self.DATABASE_PATH.replace("sqlite://", "sqlite+aiosqlite://")
|
||||
|
||||
# security
|
||||
API_BEARER_TOKEN: Annotated[str, Len(min_length=20)]
|
||||
ALLOWED_ORIGINS: Annotated[Set[str], Len(min_length=1)]
|
||||
CHROME_APP_IDS: Annotated[Set[Annotated[str, Len(min_length=10)]], Len(min_length=1)]
|
||||
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
|
||||
|
||||
# redis
|
||||
REDIS_PASSWORD: str = ""
|
||||
REDIS_HOSTNAME: str = "localhost"
|
||||
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
|
||||
@property
|
||||
def CELERY_BROKER_URL(self)-> str:
|
||||
if self.REDIS_PASSWORD:
|
||||
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
|
||||
return f"redis://{self.REDIS_HOSTNAME}:6379"
|
||||
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
|
||||
|
||||
# cronjobs
|
||||
CRON_ARCHIVE_SHEETS: bool = False
|
||||
CRON_DELETE_STALE_SHEETS: bool = False
|
||||
DELETE_STALE_SHEETS_DAYS: int = 14
|
||||
CRON_DELETE_SCHEDULED_ARCHIVES: bool = False
|
||||
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS: int = 14
|
||||
|
||||
# observability
|
||||
REPEAT_COUNT_METRICS_SECONDS: int = 30
|
||||
|
||||
# security
|
||||
API_BEARER_TOKEN: Annotated[str, Len(min_length=20)]
|
||||
ALLOWED_ORIGINS: Annotated[Set[str], Len(min_length=1)]
|
||||
CHROME_APP_IDS: Annotated[Set[Annotated[str, Len(min_length=10)]], Len(min_length=1)]
|
||||
#TODO: deprecate blocklist?
|
||||
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
|
||||
|
||||
# email configuration, if needed
|
||||
MAIL_FROM: str = "noreply@bellingcat.com"
|
||||
MAIL_FROM_NAME: str = "Bellingcat's Auto Archiver"
|
||||
|
||||
62
app/user-groups.example.yaml
Normal file
62
app/user-groups.example.yaml
Normal file
@@ -0,0 +1,62 @@
|
||||
# NOTE: all emails should be lower-cased
|
||||
users:
|
||||
user01@example.com:
|
||||
- group1
|
||||
user02@example.com:
|
||||
- group2
|
||||
user03@example.com:
|
||||
- group1
|
||||
- group2
|
||||
|
||||
domains:
|
||||
example.com:
|
||||
- group-for-friends
|
||||
gmail-example.com:
|
||||
- group1
|
||||
|
||||
|
||||
groups:
|
||||
group1:
|
||||
description: "Group 1 which can do everything, no limits"
|
||||
orchestrator: secrets/orchestration.group1.yaml
|
||||
orchestrator_sheet: secrets/orchestration.group1-sheet.yaml
|
||||
permissions:
|
||||
read: ["all"]
|
||||
archive_url: true
|
||||
archive_sheet: true
|
||||
sheet_frequency: ["hourly", "daily"]
|
||||
max_sheets: -1
|
||||
max_archive_lifespan_months: -1
|
||||
max_monthly_urls: -1
|
||||
max_monthly_mbs: -1
|
||||
manually_trigger_sheet: true
|
||||
group2:
|
||||
description: "Group that can only archive URLs, not sheets, they can search their own group and group-for-friends archives."
|
||||
orchestrator: secrets/orchestration.group2.yaml
|
||||
orchestrator_sheet: secrets/orchestration-group2-sheet.yaml
|
||||
permissions:
|
||||
read: ["group2", "group-for-friends"]
|
||||
archive_url: true
|
||||
max_archive_lifespan_months: 12
|
||||
max_monthly_urls: 100
|
||||
max_monthly_mbs: 1000
|
||||
group-for-friends:
|
||||
description: "Friends can have one sheet only which archives once a day"
|
||||
orchestrator: secrets/orchestration.friends.yaml
|
||||
orchestrator_sheet: secrets/orchestration.friends-sheet.yaml
|
||||
permissions:
|
||||
read: ["friends-1"]
|
||||
archive_sheet: true
|
||||
sheet_frequency: ["daily"]
|
||||
max_sheets: 1
|
||||
max_archive_lifespan_months: 12
|
||||
max_monthly_urls: 1000
|
||||
max_monthly_mbs: 1000
|
||||
default:
|
||||
description: "Public access, can only search public archives"
|
||||
orchestrator: secrets/orchestration-default.yaml
|
||||
orchestrator_sheet: secrets/orchestration-default.yaml
|
||||
permissions:
|
||||
read: ["default"]
|
||||
read_public: true
|
||||
|
||||
@@ -58,7 +58,7 @@ def app_factory(settings = get_settings()):
|
||||
# prometheus exposed in /metrics with authentication
|
||||
Instrumentator(should_group_status_codes=False, excluded_handlers=["/metrics", "/health", "/openapi.json", "/favicon.ico"]).instrument(app).expose(app, dependencies=[Depends(token_api_key_auth)])
|
||||
|
||||
# TODO: recheck this for security, currently only needed for when local_storage is used
|
||||
# TODO: recheck this for security, currently only needed for when local_storage is used in development
|
||||
local_dir = settings.SERVE_LOCAL_ARCHIVE
|
||||
if not os.path.isdir(local_dir) and os.path.isdir(local_dir.replace("/app", ".")):
|
||||
local_dir = local_dir.replace("/app", ".")
|
||||
|
||||
0
database/.gitkeep
Normal file
0
database/.gitkeep
Normal file
@@ -18,7 +18,7 @@ services:
|
||||
command: uvicorn app.web:app --factory --host 0.0.0.0
|
||||
volumes:
|
||||
- ./logs:/aa-api/logs
|
||||
- ./app/database:/aa-api/database
|
||||
- ./database:/aa-api/database
|
||||
depends_on:
|
||||
- redis
|
||||
healthcheck:
|
||||
@@ -36,7 +36,7 @@ services:
|
||||
command: celery --app=app.worker.main.celery worker --loglevel=warning --logfile=/aa-api/logs/celery.log
|
||||
volumes:
|
||||
- ./logs:/aa-api/logs
|
||||
- ./app/database:/aa-api/database
|
||||
- ./database:/aa-api/database
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- crawls:/crawls # BROWSERTRIX_HOME_HOST:BROWSERTRIX_HOME_CONTAINER, do not change /crawls
|
||||
environment:
|
||||
|
||||
Reference in New Issue
Block a user