introduces dynamic service_account emails read from the group's orchestration files

This commit is contained in:
msramalho
2025-02-11 22:50:00 +00:00
parent 0834f55520
commit 17b3705b64
12 changed files with 88 additions and 15 deletions

View File

@@ -1,5 +1,5 @@
CHROME_APP_IDS='["1234567890"]'
ALLOWED_ORIGINS='["allowed"]'
BLOCKED_EMAILS='[]'
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
DATABASE_PATH="sqlite:///./app/database/auto-archiver.db"
API_BEARER_TOKEN=THIS_API_TOKEN_SHOULD_NEVER_BE_USED

View File

@@ -0,0 +1,36 @@
"""add new service_account_email column to groups table
Revision ID: 63ac79df4ad0
Revises: 02b2f6d17ed0
Create Date: 2025-02-11 21:53:23.293274
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '63ac79df4ad0'
down_revision = '02b2f6d17ed0'
branch_labels = None
depends_on = None
NEW_COL = "service_account_email"
TABLE = "groups"
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
columns = [col['name'] for col in inspector.get_columns(TABLE)]
if NEW_COL not in columns:
op.add_column(TABLE, sa.Column(NEW_COL, sa.String, nullable=True, default=None))
def downgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
columns = [col['name'] for col in inspector.get_columns(TABLE)]
if NEW_COL in columns:
op.drop_column(TABLE, NEW_COL)

View File

@@ -87,6 +87,7 @@ class Group(Base):
orchestrator = Column(String, default=None)
orchestrator_sheet = Column(String, default=None)
permissions = Column(JSON, default={})
service_account_email = Column(String, default=None)
domains = Column(JSON, default=[])
archives = relationship("Archive", back_populates="group")

View File

@@ -33,11 +33,12 @@ class Settings(BaseSettings):
# redis
REDIS_PASSWORD: str = ""
REDIS_HOSTNAME: str = "localhost"
@property
def CELERY_BROKER_URL(self)-> str:
if self.REDIS_PASSWORD:
return f"redis://:{self.REDIS_PASSWORD}@localhost:6379"
return "redis://localhost:6379"
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
return f"redis://{self.REDIS_HOSTNAME}:6379"
REDIS_EXCEPTIONS_CHANNEL: str = "exceptions-channel"
# observability

View File

@@ -15,6 +15,4 @@ def get_celery(name:str="") -> Celery:
def get_redis() -> redis.Redis:
from loguru import logger
logger.debug(get_settings().CELERY_BROKER_URL)
return redis.Redis.from_url(get_settings().CELERY_BROKER_URL)

View File

@@ -1,7 +1,8 @@
import json
import os
import yaml
from loguru import logger
from pydantic import BaseModel, field_validator, Field, model_validator
from pydantic import BaseModel, computed_field, field_validator, Field, model_validator
from typing import Dict, List, Set
from typing_extensions import Self
@@ -74,11 +75,39 @@ class GroupModel(BaseModel):
permissions: GroupPermissions
@field_validator('orchestrator', 'orchestrator_sheet', mode='before')
def validate_priority(cls, v):
def validate_orchestrator(cls, v):
if not os.path.exists(v):
raise ValueError(f"Orchestrator file not found with this path: {v}")
return v
@computed_field
@property
def service_account_email(self) -> str:
if hasattr(self, "_service_account_email"):
return self._service_account_email
orch = yaml.safe_load(open(self.orchestrator_sheet))
def find_service_account_email(d):
for k, v in d.items():
if k == "service_account":
return v
if isinstance(v, dict):
if result := find_service_account_email(v):
return result
return False
service_account_json = find_service_account_email(orch)
if not service_account_json:
raise ValueError(f"service_account key not found in orchestrator sheet file: {self.orchestrator_sheet}.")
with open(service_account_json) as f:
self._service_account_email = json.load(f).get("client_email")
if not self._service_account_email:
raise ValueError(f"Service account email not found in {service_account_json}.")
return self._service_account_email
class UserGroupModel(BaseModel):
users: Dict[str, List[str]] = Field(default_factory=dict)
@@ -137,4 +166,4 @@ class UserGroupModel(BaseModel):
class GroupInfo(GroupPermissions):
description: str = ""
service_account_emails: list[str] = []
service_account_email: str = ""

View File

@@ -0,0 +1,3 @@
{
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
}

View File

@@ -12,6 +12,8 @@ steps:
- console_db
configurations:
gsheet_feeder:
service_account: "app/tests/fake_service_account.json"
cli_feeder:
urls:
- "url1"

View File

@@ -277,13 +277,14 @@ def test_upsert_group(test_data, db_session):
assert db_session.query(models.Group).count() == 4
repeatable_params = ["desc 1", "orch.yaml", "sheet.yaml", {"read": ["all"]}, ["example.com"]]
repeatable_params = ["desc 1", "orch.yaml", "sheet.yaml", "service_account_email@example.com", {"read": ["all"]}, ["example.com"]]
assert (g1 := crud.upsert_group(db_session, "spaceship", *repeatable_params)) is not None
assert g1.id == "spaceship"
assert g1.description == "desc 1"
assert g1.orchestrator == "orch.yaml"
assert g1.orchestrator_sheet == "sheet.yaml"
assert g1.service_account_email == "service_account_email@example.com"
assert g1.permissions == {"read": ["all"]}
assert g1.domains == ["example.com"]
assert len(g1.users) == 2

View File

@@ -129,15 +129,16 @@ def get_user_groups(email: str) -> list[str]:
# --------------- INIT User-Groups
def upsert_group(db: Session, group_name: str, description: str, orchestrator: str, orchestrator_sheet: str, permissions: dict, domains: list) -> models.Group:
def upsert_group(db: Session, group_name: str, description: str, orchestrator: str, orchestrator_sheet: str, service_account_email:str, permissions: dict, domains: list) -> models.Group:
db_group = db.query(models.Group).filter(models.Group.id == group_name).first()
if db_group is None:
db_group = models.Group(id=group_name, description=description, orchestrator=orchestrator, orchestrator_sheet=orchestrator_sheet, permissions=permissions, domains=domains)
db_group = models.Group(id=group_name, description=description, orchestrator=orchestrator, orchestrator_sheet=orchestrator_sheet, service_account_email=service_account_email, permissions=permissions, domains=domains)
db.add(db_group)
else:
db_group.description = description
db_group.orchestrator = orchestrator
db_group.orchestrator_sheet = orchestrator_sheet
db_group.service_account_email = service_account_email
db_group.permissions = permissions
db_group.domains = domains
db.commit()
@@ -180,7 +181,8 @@ def upsert_user_groups(db: Session):
import json
# upsert groups and save a map of groupid -> dbobject
for group_id, g in ug.groups.items():
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))
logger.debug(f"GROUP {group_id} => {g.service_account_email}")
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, g.service_account_email, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))
db_groups: dict[str, models.Group] = {g.id: g for g in db.query(models.Group).all()}
# integrity checks

View File

@@ -39,7 +39,7 @@ class UserState:
)
for group in self.user_groups:
if not group.permissions: continue
self._permissions[group.id] = GroupInfo(**group.permissions, description=group.description)
self._permissions[group.id] = GroupInfo(**group.permissions, description=group.description, service_account_email=group.service_account_email)
return self._permissions
@property

View File

@@ -11,8 +11,8 @@ services:
restart: always
env_file: .env.prod
environment:
CELERY_BROKER_URL: redis://:${REDIS_PASSWORD}@redis:6379/0
ENVIRONMENT_FILE: .env.prod
REDIS_HOSTNAME: redis
ports:
- "127.0.0.1:8004:8000"
#TODO: should prod have the --reload flag?
@@ -42,7 +42,7 @@ services:
- /var/run/docker.sock:/var/run/docker.sock
- crawls:/crawls # BROWSERTRIX_HOME_HOST:BROWSERTRIX_HOME_CONTAINER, do not change /crawls
environment:
CELERY_BROKER_URL: redis://:${REDIS_PASSWORD}@redis:6379/0
REDIS_HOSTNAME: redis
ENVIRONMENT_FILE: .env.prod
WACZ_ENABLE_DOCKER: 1 # Enable calling docker from this container
BROWSERTRIX_HOME_HOST: auto-archiver-api_crawls