mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-08 03:28:35 +03:00
introduces dynamic service_account emails read from the group's orchestration files
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
CHROME_APP_IDS='["1234567890"]'
|
||||
ALLOWED_ORIGINS='["allowed"]'
|
||||
BLOCKED_EMAILS='[]'
|
||||
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
|
||||
DATABASE_PATH="sqlite:///./app/database/auto-archiver.db"
|
||||
API_BEARER_TOKEN=THIS_API_TOKEN_SHOULD_NEVER_BE_USED
|
||||
@@ -0,0 +1,36 @@
|
||||
"""add new service_account_email column to groups table
|
||||
|
||||
Revision ID: 63ac79df4ad0
|
||||
Revises: 02b2f6d17ed0
|
||||
Create Date: 2025-02-11 21:53:23.293274
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '63ac79df4ad0'
|
||||
down_revision = '02b2f6d17ed0'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
NEW_COL = "service_account_email"
|
||||
TABLE = "groups"
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
inspector = sa.inspect(conn)
|
||||
columns = [col['name'] for col in inspector.get_columns(TABLE)]
|
||||
|
||||
if NEW_COL not in columns:
|
||||
op.add_column(TABLE, sa.Column(NEW_COL, sa.String, nullable=True, default=None))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
inspector = sa.inspect(conn)
|
||||
columns = [col['name'] for col in inspector.get_columns(TABLE)]
|
||||
if NEW_COL in columns:
|
||||
op.drop_column(TABLE, NEW_COL)
|
||||
@@ -87,6 +87,7 @@ class Group(Base):
|
||||
orchestrator = Column(String, default=None)
|
||||
orchestrator_sheet = Column(String, default=None)
|
||||
permissions = Column(JSON, default={})
|
||||
service_account_email = Column(String, default=None)
|
||||
domains = Column(JSON, default=[])
|
||||
|
||||
archives = relationship("Archive", back_populates="group")
|
||||
|
||||
@@ -33,11 +33,12 @@ class Settings(BaseSettings):
|
||||
|
||||
# redis
|
||||
REDIS_PASSWORD: str = ""
|
||||
REDIS_HOSTNAME: str = "localhost"
|
||||
@property
|
||||
def CELERY_BROKER_URL(self)-> str:
|
||||
if self.REDIS_PASSWORD:
|
||||
return f"redis://:{self.REDIS_PASSWORD}@localhost:6379"
|
||||
return "redis://localhost:6379"
|
||||
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
|
||||
return f"redis://{self.REDIS_HOSTNAME}:6379"
|
||||
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
|
||||
|
||||
# observability
|
||||
|
||||
@@ -15,6 +15,4 @@ def get_celery(name:str="") -> Celery:
|
||||
|
||||
|
||||
def get_redis() -> redis.Redis:
|
||||
from loguru import logger
|
||||
logger.debug(get_settings().CELERY_BROKER_URL)
|
||||
return redis.Redis.from_url(get_settings().CELERY_BROKER_URL)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import json
|
||||
import os
|
||||
import yaml
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, field_validator, Field, model_validator
|
||||
from pydantic import BaseModel, computed_field, field_validator, Field, model_validator
|
||||
from typing import Dict, List, Set
|
||||
from typing_extensions import Self
|
||||
|
||||
@@ -74,11 +75,39 @@ class GroupModel(BaseModel):
|
||||
permissions: GroupPermissions
|
||||
|
||||
@field_validator('orchestrator', 'orchestrator_sheet', mode='before')
|
||||
def validate_priority(cls, v):
|
||||
def validate_orchestrator(cls, v):
|
||||
if not os.path.exists(v):
|
||||
raise ValueError(f"Orchestrator file not found with this path: {v}")
|
||||
return v
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def service_account_email(self) -> str:
|
||||
if hasattr(self, "_service_account_email"):
|
||||
return self._service_account_email
|
||||
orch = yaml.safe_load(open(self.orchestrator_sheet))
|
||||
|
||||
def find_service_account_email(d):
|
||||
for k, v in d.items():
|
||||
if k == "service_account":
|
||||
return v
|
||||
if isinstance(v, dict):
|
||||
if result := find_service_account_email(v):
|
||||
return result
|
||||
return False
|
||||
|
||||
service_account_json = find_service_account_email(orch)
|
||||
if not service_account_json:
|
||||
raise ValueError(f"service_account key not found in orchestrator sheet file: {self.orchestrator_sheet}.")
|
||||
|
||||
with open(service_account_json) as f:
|
||||
self._service_account_email = json.load(f).get("client_email")
|
||||
|
||||
if not self._service_account_email:
|
||||
raise ValueError(f"Service account email not found in {service_account_json}.")
|
||||
|
||||
return self._service_account_email
|
||||
|
||||
|
||||
class UserGroupModel(BaseModel):
|
||||
users: Dict[str, List[str]] = Field(default_factory=dict)
|
||||
@@ -137,4 +166,4 @@ class UserGroupModel(BaseModel):
|
||||
|
||||
class GroupInfo(GroupPermissions):
|
||||
description: str = ""
|
||||
service_account_emails: list[str] = []
|
||||
service_account_email: str = ""
|
||||
|
||||
3
app/tests/fake_service_account.json
Normal file
3
app/tests/fake_service_account.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
|
||||
}
|
||||
@@ -12,6 +12,8 @@ steps:
|
||||
- console_db
|
||||
|
||||
configurations:
|
||||
gsheet_feeder:
|
||||
service_account: "app/tests/fake_service_account.json"
|
||||
cli_feeder:
|
||||
urls:
|
||||
- "url1"
|
||||
|
||||
@@ -277,13 +277,14 @@ def test_upsert_group(test_data, db_session):
|
||||
|
||||
assert db_session.query(models.Group).count() == 4
|
||||
|
||||
repeatable_params = ["desc 1", "orch.yaml", "sheet.yaml", {"read": ["all"]}, ["example.com"]]
|
||||
repeatable_params = ["desc 1", "orch.yaml", "sheet.yaml", "service_account_email@example.com", {"read": ["all"]}, ["example.com"]]
|
||||
|
||||
assert (g1 := crud.upsert_group(db_session, "spaceship", *repeatable_params)) is not None
|
||||
assert g1.id == "spaceship"
|
||||
assert g1.description == "desc 1"
|
||||
assert g1.orchestrator == "orch.yaml"
|
||||
assert g1.orchestrator_sheet == "sheet.yaml"
|
||||
assert g1.service_account_email == "service_account_email@example.com"
|
||||
assert g1.permissions == {"read": ["all"]}
|
||||
assert g1.domains == ["example.com"]
|
||||
assert len(g1.users) == 2
|
||||
|
||||
@@ -129,15 +129,16 @@ def get_user_groups(email: str) -> list[str]:
|
||||
|
||||
# --------------- INIT User-Groups
|
||||
|
||||
def upsert_group(db: Session, group_name: str, description: str, orchestrator: str, orchestrator_sheet: str, permissions: dict, domains: list) -> models.Group:
|
||||
def upsert_group(db: Session, group_name: str, description: str, orchestrator: str, orchestrator_sheet: str, service_account_email:str, permissions: dict, domains: list) -> models.Group:
|
||||
db_group = db.query(models.Group).filter(models.Group.id == group_name).first()
|
||||
if db_group is None:
|
||||
db_group = models.Group(id=group_name, description=description, orchestrator=orchestrator, orchestrator_sheet=orchestrator_sheet, permissions=permissions, domains=domains)
|
||||
db_group = models.Group(id=group_name, description=description, orchestrator=orchestrator, orchestrator_sheet=orchestrator_sheet, service_account_email=service_account_email, permissions=permissions, domains=domains)
|
||||
db.add(db_group)
|
||||
else:
|
||||
db_group.description = description
|
||||
db_group.orchestrator = orchestrator
|
||||
db_group.orchestrator_sheet = orchestrator_sheet
|
||||
db_group.service_account_email = service_account_email
|
||||
db_group.permissions = permissions
|
||||
db_group.domains = domains
|
||||
db.commit()
|
||||
@@ -180,7 +181,8 @@ def upsert_user_groups(db: Session):
|
||||
import json
|
||||
# upsert groups and save a map of groupid -> dbobject
|
||||
for group_id, g in ug.groups.items():
|
||||
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))
|
||||
logger.debug(f"GROUP {group_id} => {g.service_account_email}")
|
||||
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, g.service_account_email, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))
|
||||
db_groups: dict[str, models.Group] = {g.id: g for g in db.query(models.Group).all()}
|
||||
|
||||
# integrity checks
|
||||
|
||||
@@ -39,7 +39,7 @@ class UserState:
|
||||
)
|
||||
for group in self.user_groups:
|
||||
if not group.permissions: continue
|
||||
self._permissions[group.id] = GroupInfo(**group.permissions, description=group.description)
|
||||
self._permissions[group.id] = GroupInfo(**group.permissions, description=group.description, service_account_email=group.service_account_email)
|
||||
return self._permissions
|
||||
|
||||
@property
|
||||
|
||||
@@ -11,8 +11,8 @@ services:
|
||||
restart: always
|
||||
env_file: .env.prod
|
||||
environment:
|
||||
CELERY_BROKER_URL: redis://:${REDIS_PASSWORD}@redis:6379/0
|
||||
ENVIRONMENT_FILE: .env.prod
|
||||
REDIS_HOSTNAME: redis
|
||||
ports:
|
||||
- "127.0.0.1:8004:8000"
|
||||
#TODO: should prod have the --reload flag?
|
||||
@@ -42,7 +42,7 @@ services:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- crawls:/crawls # BROWSERTRIX_HOME_HOST:BROWSERTRIX_HOME_CONTAINER, do not change /crawls
|
||||
environment:
|
||||
CELERY_BROKER_URL: redis://:${REDIS_PASSWORD}@redis:6379/0
|
||||
REDIS_HOSTNAME: redis
|
||||
ENVIRONMENT_FILE: .env.prod
|
||||
WACZ_ENABLE_DOCKER: 1 # Enable calling docker from this container
|
||||
BROWSERTRIX_HOME_HOST: auto-archiver-api_crawls
|
||||
|
||||
Reference in New Issue
Block a user