improves example files

This commit is contained in:
msramalho
2025-02-12 00:02:08 +00:00
parent 4f9d447ec7
commit fbfebd4671
8 changed files with 116 additions and 32 deletions

35
.env.example Normal file
View File

@@ -0,0 +1,35 @@
# main settings
USER_GROUPS_FILENAME=app/user-groups.yaml
# database
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
DATABASE_QUERY_LIMIT=100
# security settings
API_BEARER_TOKEN=TODO-MODIFY-THIS-API-TOKEN
ALLOWED_ORIGINS='["http://localhost:8000","http://localhost:8004","http://localhost:8081","https://auto-archiver.bellingcat.com"]'
CHROME_APP_IDS='[PROJECT_ID.apps.googleusercontent.com"]'
BLOCKED_EMAILS='[]'
# redis configuration
REDIS_PASSWORD=TODO-MODIFY-THIS-REDIS-PASSWORD
REDIS_HOSTNAME="localhost"
# cronjobs management, enable as needed
CRON_ARCHIVE_SHEETS=true
CRON_DELETE_STALE_SHEETS=true
DELETE_STALE_SHEETS_DAYS=7
CRON_DELETE_SCHEDULED_ARCHIVES=false
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS=14
# observability for prometheus
REPEAT_COUNT_METRICS_SECONDS=30
# mail service settings, if you want to email users
MAIL_FROM="noreply@auto-archiver.com"
MAIL_FROM_NAME="My Auto Archiver deployment"
MAIL_USERNAME="USERNAME"
MAIL_PASSWORD="PASSWORD"
MAIL_SERVER="mail.server.com"
MAIL_PORT=587
MAIL_STARTTLS=False
MAIL_SSL_TLS=True

View File

@@ -6,4 +6,3 @@ BLOCKED_EMAILS='["blocked@example.com"]'
DATABASE_PATH="sqlite:///auto-archiver.test.db"
API_BEARER_TOKEN=this_is_the_test_api_token
USER_GROUPS_FILENAME=app/tests/user-groups.test.yaml
SHEET_ORCHESTRATION_YAML=app/tests/orchestration.test.yaml

View File

@@ -1,9 +0,0 @@
REDIS_PASSWORD=TODO
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
USER_GROUPS_FILENAME=app/user-groups.yaml
CHROME_APP_IDS=000000000000000000000000000000000000000000000.apps.googleusercontent.com,000000000000000000000000000000000000000000001.apps.googleusercontent.com
#ALLOWED_ORIGINS="http://localhost:8004" # dev only
API_BEARER_TOKEN=TODO

View File

@@ -13,16 +13,7 @@ class Settings(BaseSettings):
# general
SERVE_LOCAL_ARCHIVE: str = ""
USER_GROUPS_FILENAME: str = "user-groups.yaml"
SHEET_ORCHESTRATION_YAML : str = "secrets/orchestration-sheet.yaml"
# cronjobs
#TODO: disable by default?
CRON_ARCHIVE_SHEETS: bool = False
CRON_DELETE_STALE_SHEETS: bool = True
DELETE_STALE_SHEETS_DAYS: int = 14
CRON_DELETE_SCHEDULED_ARCHIVES: bool = True
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS: int = 14
USER_GROUPS_FILENAME: str = "app/user-groups.yaml"
# database
DATABASE_PATH: str
@@ -31,26 +22,32 @@ class Settings(BaseSettings):
def ASYNC_DATABASE_PATH(self) -> str:
return self.DATABASE_PATH.replace("sqlite://", "sqlite+aiosqlite://")
# security
API_BEARER_TOKEN: Annotated[str, Len(min_length=20)]
ALLOWED_ORIGINS: Annotated[Set[str], Len(min_length=1)]
CHROME_APP_IDS: Annotated[Set[Annotated[str, Len(min_length=10)]], Len(min_length=1)]
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
# redis
REDIS_PASSWORD: str = ""
REDIS_HOSTNAME: str = "localhost"
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
@property
def CELERY_BROKER_URL(self)-> str:
if self.REDIS_PASSWORD:
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
return f"redis://{self.REDIS_HOSTNAME}:6379"
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
# cronjobs
CRON_ARCHIVE_SHEETS: bool = False
CRON_DELETE_STALE_SHEETS: bool = False
DELETE_STALE_SHEETS_DAYS: int = 14
CRON_DELETE_SCHEDULED_ARCHIVES: bool = False
DELETE_SCHEDULED_ARCHIVES_NOTIFY_DAYS: int = 14
# observability
REPEAT_COUNT_METRICS_SECONDS: int = 30
# security
API_BEARER_TOKEN: Annotated[str, Len(min_length=20)]
ALLOWED_ORIGINS: Annotated[Set[str], Len(min_length=1)]
CHROME_APP_IDS: Annotated[Set[Annotated[str, Len(min_length=10)]], Len(min_length=1)]
#TODO: deprecate blocklist?
BLOCKED_EMAILS: Annotated[Set[str], Len(min_length=0)] = set()
# email configuration, if needed
MAIL_FROM: str = "noreply@bellingcat.com"
MAIL_FROM_NAME: str = "Bellingcat's Auto Archiver"

View File

@@ -0,0 +1,62 @@
# NOTE: all emails should be lower-cased
users:
user01@example.com:
- group1
user02@example.com:
- group2
user03@example.com:
- group1
- group2
domains:
example.com:
- group-for-friends
gmail-example.com:
- group1
groups:
group1:
description: "Group 1 which can do everything, no limits"
orchestrator: secrets/orchestration.group1.yaml
orchestrator_sheet: secrets/orchestration.group1-sheet.yaml
permissions:
read: ["all"]
archive_url: true
archive_sheet: true
sheet_frequency: ["hourly", "daily"]
max_sheets: -1
max_archive_lifespan_months: -1
max_monthly_urls: -1
max_monthly_mbs: -1
manually_trigger_sheet: true
group2:
description: "Group that can only archive URLs, not sheets, they can search their own group and group-for-friends archives."
orchestrator: secrets/orchestration.group2.yaml
orchestrator_sheet: secrets/orchestration-group2-sheet.yaml
permissions:
read: ["group2", "group-for-friends"]
archive_url: true
max_archive_lifespan_months: 12
max_monthly_urls: 100
max_monthly_mbs: 1000
group-for-friends:
description: "Friends can have one sheet only which archives once a day"
orchestrator: secrets/orchestration.friends.yaml
orchestrator_sheet: secrets/orchestration.friends-sheet.yaml
permissions:
read: ["friends-1"]
archive_sheet: true
sheet_frequency: ["daily"]
max_sheets: 1
max_archive_lifespan_months: 12
max_monthly_urls: 1000
max_monthly_mbs: 1000
default:
description: "Public access, can only search public archives"
orchestrator: secrets/orchestration-default.yaml
orchestrator_sheet: secrets/orchestration-default.yaml
permissions:
read: ["default"]
read_public: true

View File

@@ -58,7 +58,7 @@ def app_factory(settings = get_settings()):
# prometheus exposed in /metrics with authentication
Instrumentator(should_group_status_codes=False, excluded_handlers=["/metrics", "/health", "/openapi.json", "/favicon.ico"]).instrument(app).expose(app, dependencies=[Depends(token_api_key_auth)])
# TODO: recheck this for security, currently only needed for when local_storage is used
# TODO: recheck this for security, currently only needed for when local_storage is used in development
local_dir = settings.SERVE_LOCAL_ARCHIVE
if not os.path.isdir(local_dir) and os.path.isdir(local_dir.replace("/app", ".")):
local_dir = local_dir.replace("/app", ".")

0
database/.gitkeep Normal file
View File

View File

@@ -18,7 +18,7 @@ services:
command: uvicorn app.web:app --factory --host 0.0.0.0
volumes:
- ./logs:/aa-api/logs
- ./app/database:/aa-api/database
- ./database:/aa-api/database
depends_on:
- redis
healthcheck:
@@ -36,7 +36,7 @@ services:
command: celery --app=app.worker.main.celery worker --loglevel=warning --logfile=/aa-api/logs/celery.log
volumes:
- ./logs:/aa-api/logs
- ./app/database:/aa-api/database
- ./database:/aa-api/database
- /var/run/docker.sock:/var/run/docker.sock
- crawls:/crawls # BROWSERTRIX_HOME_HOST:BROWSERTRIX_HOME_CONTAINER, do not change /crawls
environment: