mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-13 05:58:35 +03:00
feat: check for proper spreadsheet service_account access before archiving, plus email notification
This commit is contained in:
133
app/shared/utils/sheets.py
Normal file
133
app/shared/utils/sheets.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
"""Utilities for checking Google Sheet access permissions."""
|
||||||
|
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
import requests as http_requests
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from app.shared.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=32)
|
||||||
|
def get_service_account_json_path(orchestrator_sheet_path: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Extract the service account JSON file path from an orchestrator sheet
|
||||||
|
YAML config.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the service account JSON file, or None if not found.
|
||||||
|
"""
|
||||||
|
if not orchestrator_sheet_path:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(orchestrator_sheet_path) as f:
|
||||||
|
orch = yaml.safe_load(f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Could not read orchestrator sheet config {orchestrator_sheet_path}: {e}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not isinstance(orch, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def find_key(d: dict, key: str):
|
||||||
|
for k, v in d.items():
|
||||||
|
if k == key:
|
||||||
|
return v
|
||||||
|
if isinstance(v, dict):
|
||||||
|
if result := find_key(v, key):
|
||||||
|
return result
|
||||||
|
return None
|
||||||
|
|
||||||
|
return find_key(orch, "service_account")
|
||||||
|
|
||||||
|
|
||||||
|
def check_sheet_write_access(
|
||||||
|
service_account_json_path: str, sheet_id: str
|
||||||
|
) -> bool | None:
|
||||||
|
"""
|
||||||
|
Check if a Google service account has write (Editor) access to a Google
|
||||||
|
Sheet using the Google Drive API.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True: Service account has write access.
|
||||||
|
False: Service account does NOT have write access (or no access).
|
||||||
|
None: Could not determine (network/auth error) — caller should
|
||||||
|
proceed with archiving and let it fail naturally.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from google.auth.transport.requests import Request
|
||||||
|
from google.oauth2.service_account import Credentials
|
||||||
|
|
||||||
|
creds = Credentials.from_service_account_file(
|
||||||
|
service_account_json_path,
|
||||||
|
scopes=["https://www.googleapis.com/auth/drive.metadata.readonly"],
|
||||||
|
)
|
||||||
|
creds.refresh(Request())
|
||||||
|
|
||||||
|
resp = http_requests.get(
|
||||||
|
f"https://www.googleapis.com/drive/v3/files/{sheet_id}",
|
||||||
|
params={
|
||||||
|
"fields": "capabilities/canEdit",
|
||||||
|
"supportsAllDrives": "true",
|
||||||
|
},
|
||||||
|
headers={"Authorization": f"Bearer {creds.token}"},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
if resp.status_code == 404:
|
||||||
|
return False
|
||||||
|
if resp.status_code == 403:
|
||||||
|
return False
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.json().get("capabilities", {}).get("canEdit", False)
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
f"Unexpected Drive API response {resp.status_code} for sheet {sheet_id}: {resp.text}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.error(
|
||||||
|
f"Service account JSON not found: {service_account_json_path}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Could not check write access for sheet {sheet_id}: {e}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_sheet_access_error(
|
||||||
|
orchestrator_sheet_path: str | None,
|
||||||
|
service_account_email: str | None,
|
||||||
|
sheet_id: str,
|
||||||
|
) -> str | None:
|
||||||
|
"""
|
||||||
|
Check if the service account has write access to a Google Sheet.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An error message string if the sheet is NOT accessible, or None if
|
||||||
|
access is OK (or the check is indeterminate).
|
||||||
|
"""
|
||||||
|
if not orchestrator_sheet_path:
|
||||||
|
return None
|
||||||
|
|
||||||
|
sa_json_path = get_service_account_json_path(orchestrator_sheet_path)
|
||||||
|
if not sa_json_path:
|
||||||
|
return None
|
||||||
|
|
||||||
|
has_access = check_sheet_write_access(sa_json_path, sheet_id)
|
||||||
|
if has_access is False:
|
||||||
|
sa_display = (
|
||||||
|
service_account_email or "the Auto Archiver service account"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
f"The Google Sheet has not been shared with the Auto Archiver "
|
||||||
|
f"service account ({sa_display}). Please share the sheet with "
|
||||||
|
f"this email address and give it Editor permissions."
|
||||||
|
)
|
||||||
|
return None
|
||||||
171
app/tests/shared/utils/test_sheets.py
Normal file
171
app/tests/shared/utils/test_sheets.py
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
from unittest.mock import mock_open, patch
|
||||||
|
|
||||||
|
from app.shared.utils.sheets import (
|
||||||
|
check_sheet_write_access,
|
||||||
|
get_service_account_json_path,
|
||||||
|
get_sheet_access_error,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetServiceAccountJsonPath:
|
||||||
|
def test_returns_none_for_empty_path(self):
|
||||||
|
assert get_service_account_json_path("") is None
|
||||||
|
assert get_service_account_json_path(None) is None
|
||||||
|
|
||||||
|
def test_returns_path_from_orchestrator_yaml(self):
|
||||||
|
# The test orchestration file has a service_account key
|
||||||
|
get_service_account_json_path.cache_clear()
|
||||||
|
result = get_service_account_json_path(
|
||||||
|
"app/tests/orchestration.test.yaml"
|
||||||
|
)
|
||||||
|
assert result == "app/tests/fake_service_account.json"
|
||||||
|
|
||||||
|
def test_returns_none_for_missing_file(self):
|
||||||
|
get_service_account_json_path.cache_clear()
|
||||||
|
assert get_service_account_json_path("nonexistent/path.yaml") is None
|
||||||
|
|
||||||
|
def test_returns_none_for_invalid_yaml(self):
|
||||||
|
get_service_account_json_path.cache_clear()
|
||||||
|
with patch(
|
||||||
|
"builtins.open", mock_open(read_data="!!! invalid yaml {{{")
|
||||||
|
):
|
||||||
|
result = get_service_account_json_path("some/path.yaml")
|
||||||
|
# yaml.safe_load may return a string for some invalid inputs
|
||||||
|
# The function should not crash
|
||||||
|
assert result is None or isinstance(result, str)
|
||||||
|
|
||||||
|
def test_returns_none_when_no_service_account_key(self):
|
||||||
|
get_service_account_json_path.cache_clear()
|
||||||
|
yaml_content = "steps:\n feeders:\n - cli_feeder\n"
|
||||||
|
with patch("builtins.open", mock_open(read_data=yaml_content)):
|
||||||
|
assert get_service_account_json_path("some/path.yaml") is None
|
||||||
|
|
||||||
|
def test_finds_nested_service_account_key(self):
|
||||||
|
get_service_account_json_path.cache_clear()
|
||||||
|
yaml_content = (
|
||||||
|
"configurations:\n"
|
||||||
|
" gsheet_feeder_db:\n"
|
||||||
|
" service_account: secrets/nested_sa.json\n"
|
||||||
|
)
|
||||||
|
with patch("builtins.open", mock_open(read_data=yaml_content)):
|
||||||
|
result = get_service_account_json_path("some/path.yaml")
|
||||||
|
assert result == "secrets/nested_sa.json"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckSheetWriteAccess:
|
||||||
|
@patch("app.shared.utils.sheets.http_requests.get")
|
||||||
|
@patch(
|
||||||
|
"google.oauth2.service_account.Credentials.from_service_account_file"
|
||||||
|
)
|
||||||
|
def test_returns_true_when_can_edit(self, m_creds, m_get):
|
||||||
|
m_creds.return_value.token = "fake-token"
|
||||||
|
m_get.return_value.status_code = 200
|
||||||
|
m_get.return_value.json.return_value = {
|
||||||
|
"capabilities": {"canEdit": True}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = check_sheet_write_access("sa.json", "sheet123")
|
||||||
|
assert result is True
|
||||||
|
m_get.assert_called_once()
|
||||||
|
|
||||||
|
@patch("app.shared.utils.sheets.http_requests.get")
|
||||||
|
@patch(
|
||||||
|
"google.oauth2.service_account.Credentials.from_service_account_file"
|
||||||
|
)
|
||||||
|
def test_returns_false_when_cannot_edit(self, m_creds, m_get):
|
||||||
|
m_creds.return_value.token = "fake-token"
|
||||||
|
m_get.return_value.status_code = 200
|
||||||
|
m_get.return_value.json.return_value = {
|
||||||
|
"capabilities": {"canEdit": False}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = check_sheet_write_access("sa.json", "sheet123")
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@patch("app.shared.utils.sheets.http_requests.get")
|
||||||
|
@patch(
|
||||||
|
"google.oauth2.service_account.Credentials.from_service_account_file"
|
||||||
|
)
|
||||||
|
def test_returns_false_on_404(self, m_creds, m_get):
|
||||||
|
m_creds.return_value.token = "fake-token"
|
||||||
|
m_get.return_value.status_code = 404
|
||||||
|
|
||||||
|
result = check_sheet_write_access("sa.json", "sheet123")
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@patch("app.shared.utils.sheets.http_requests.get")
|
||||||
|
@patch(
|
||||||
|
"google.oauth2.service_account.Credentials.from_service_account_file"
|
||||||
|
)
|
||||||
|
def test_returns_false_on_403(self, m_creds, m_get):
|
||||||
|
m_creds.return_value.token = "fake-token"
|
||||||
|
m_get.return_value.status_code = 403
|
||||||
|
|
||||||
|
result = check_sheet_write_access("sa.json", "sheet123")
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@patch("app.shared.utils.sheets.http_requests.get")
|
||||||
|
@patch(
|
||||||
|
"google.oauth2.service_account.Credentials.from_service_account_file"
|
||||||
|
)
|
||||||
|
def test_returns_none_on_unexpected_status(self, m_creds, m_get):
|
||||||
|
m_creds.return_value.token = "fake-token"
|
||||||
|
m_get.return_value.status_code = 500
|
||||||
|
m_get.return_value.text = "Internal Server Error"
|
||||||
|
|
||||||
|
result = check_sheet_write_access("sa.json", "sheet123")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_returns_none_when_file_not_found(self):
|
||||||
|
result = check_sheet_write_access("nonexistent/sa.json", "sheet123")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"google.oauth2.service_account.Credentials.from_service_account_file",
|
||||||
|
side_effect=Exception("auth failed"),
|
||||||
|
)
|
||||||
|
def test_returns_none_on_auth_error(self, m_creds):
|
||||||
|
result = check_sheet_write_access("sa.json", "sheet123")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetSheetAccessError:
|
||||||
|
@patch("app.shared.utils.sheets.check_sheet_write_access")
|
||||||
|
@patch("app.shared.utils.sheets.get_service_account_json_path")
|
||||||
|
def test_returns_none_when_access_ok(self, m_get_path, m_check):
|
||||||
|
m_get_path.return_value = "sa.json"
|
||||||
|
m_check.return_value = True
|
||||||
|
|
||||||
|
result = get_sheet_access_error("orch.yaml", "sa@test.com", "sheet1")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
@patch("app.shared.utils.sheets.check_sheet_write_access")
|
||||||
|
@patch("app.shared.utils.sheets.get_service_account_json_path")
|
||||||
|
def test_returns_error_when_no_access(self, m_get_path, m_check):
|
||||||
|
m_get_path.return_value = "sa.json"
|
||||||
|
m_check.return_value = False
|
||||||
|
|
||||||
|
result = get_sheet_access_error("orch.yaml", "sa@test.com", "sheet1")
|
||||||
|
assert result is not None
|
||||||
|
assert "sa@test.com" in result
|
||||||
|
assert "Editor" in result
|
||||||
|
|
||||||
|
@patch("app.shared.utils.sheets.check_sheet_write_access")
|
||||||
|
@patch("app.shared.utils.sheets.get_service_account_json_path")
|
||||||
|
def test_returns_none_when_indeterminate(self, m_get_path, m_check):
|
||||||
|
m_get_path.return_value = "sa.json"
|
||||||
|
m_check.return_value = None
|
||||||
|
|
||||||
|
result = get_sheet_access_error("orch.yaml", "sa@test.com", "sheet1")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_returns_none_when_no_orchestrator_path(self):
|
||||||
|
assert get_sheet_access_error(None, "sa@test.com", "sheet1") is None
|
||||||
|
assert get_sheet_access_error("", "sa@test.com", "sheet1") is None
|
||||||
|
|
||||||
|
@patch("app.shared.utils.sheets.get_service_account_json_path")
|
||||||
|
def test_returns_none_when_no_sa_json_path(self, m_get_path):
|
||||||
|
m_get_path.return_value = None
|
||||||
|
|
||||||
|
result = get_sheet_access_error("orch.yaml", "sa@test.com", "sheet1")
|
||||||
|
assert result is None
|
||||||
@@ -344,3 +344,133 @@ class TestArchiveUserSheetEndpoint:
|
|||||||
assert r.json() == {
|
assert r.json() == {
|
||||||
"detail": "User cannot manually trigger sheet archiving in this group."
|
"detail": "User cannot manually trigger sheet archiving in this group."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestSheetAccessPermissionCheck:
|
||||||
|
"""Tests for the Google Sheet write access permission check."""
|
||||||
|
|
||||||
|
ERROR_MSG = (
|
||||||
|
"The Google Sheet has not been shared with the Auto Archiver "
|
||||||
|
"service account (sa@test.iam.gserviceaccount.com). Please "
|
||||||
|
"share the sheet with this email address and give it Editor "
|
||||||
|
"permissions."
|
||||||
|
)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"app.web.routers.sheet.get_sheet_access_error",
|
||||||
|
return_value=ERROR_MSG,
|
||||||
|
)
|
||||||
|
def test_create_sheet_no_write_access(
|
||||||
|
self, m_access, app_with_auth, db_session
|
||||||
|
):
|
||||||
|
"""Sheet creation is blocked when the SA has no write access."""
|
||||||
|
client = TestClient(app_with_auth)
|
||||||
|
data = {
|
||||||
|
"id": "no-access-sheet",
|
||||||
|
"name": "Test Sheet",
|
||||||
|
"group_id": "spaceship",
|
||||||
|
"frequency": "daily",
|
||||||
|
}
|
||||||
|
r = client.post("/sheet/create", json=data)
|
||||||
|
assert r.status_code == HTTPStatus.FORBIDDEN
|
||||||
|
assert "service account" in r.json()["detail"]
|
||||||
|
m_access.assert_called_once()
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"app.web.routers.sheet.get_sheet_access_error",
|
||||||
|
return_value=None,
|
||||||
|
)
|
||||||
|
def test_create_sheet_access_indeterminate_proceeds(
|
||||||
|
self, m_access, app_with_auth, db_session
|
||||||
|
):
|
||||||
|
"""Sheet creation proceeds when access check is indeterminate."""
|
||||||
|
client = TestClient(app_with_auth)
|
||||||
|
data = {
|
||||||
|
"id": "maybe-access-sheet",
|
||||||
|
"name": "Test Sheet",
|
||||||
|
"group_id": "spaceship",
|
||||||
|
"frequency": "daily",
|
||||||
|
}
|
||||||
|
r = client.post("/sheet/create", json=data)
|
||||||
|
assert r.status_code == HTTPStatus.CREATED
|
||||||
|
m_access.assert_called_once()
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"app.web.routers.sheet.get_sheet_access_error",
|
||||||
|
return_value=ERROR_MSG,
|
||||||
|
)
|
||||||
|
def test_archive_sheet_no_write_access(
|
||||||
|
self, m_access, app_with_auth, db_session
|
||||||
|
):
|
||||||
|
"""Manual trigger is blocked when the SA has no write access."""
|
||||||
|
db_session.add(
|
||||||
|
models.Sheet(
|
||||||
|
id="123-sheet-id",
|
||||||
|
name="Test Sheet 1",
|
||||||
|
author_id="rick@example.com",
|
||||||
|
group_id="spaceship",
|
||||||
|
frequency="hourly",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
db_session.commit()
|
||||||
|
client = TestClient(app_with_auth)
|
||||||
|
r = client.post("/sheet/123-sheet-id/archive")
|
||||||
|
assert r.status_code == HTTPStatus.FORBIDDEN
|
||||||
|
assert "service account" in r.json()["detail"]
|
||||||
|
assert "Editor" in r.json()["detail"]
|
||||||
|
m_access.assert_called_once()
|
||||||
|
|
||||||
|
@patch("app.web.routers.sheet.celery", return_value=MagicMock())
|
||||||
|
@patch(
|
||||||
|
"app.web.routers.sheet.get_sheet_access_error",
|
||||||
|
return_value=None,
|
||||||
|
)
|
||||||
|
def test_archive_sheet_access_ok_proceeds(
|
||||||
|
self, m_access, m_celery, app_with_auth, db_session
|
||||||
|
):
|
||||||
|
"""Manual trigger proceeds when access check passes."""
|
||||||
|
db_session.add(
|
||||||
|
models.Sheet(
|
||||||
|
id="123-sheet-id",
|
||||||
|
name="Test Sheet 1",
|
||||||
|
author_id="rick@example.com",
|
||||||
|
group_id="spaceship",
|
||||||
|
frequency="hourly",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
db_session.commit()
|
||||||
|
|
||||||
|
m_signature = MagicMock()
|
||||||
|
m_signature.apply_async.return_value = TaskResult(
|
||||||
|
id="task-123", status=STATUS_PENDING, result=""
|
||||||
|
)
|
||||||
|
m_celery.signature.return_value = m_signature
|
||||||
|
|
||||||
|
client = TestClient(app_with_auth)
|
||||||
|
r = client.post("/sheet/123-sheet-id/archive")
|
||||||
|
assert r.status_code == HTTPStatus.CREATED
|
||||||
|
m_access.assert_called_once()
|
||||||
|
m_celery.signature.assert_called_once()
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"app.web.routers.sheet.get_sheet_access_error",
|
||||||
|
return_value=ERROR_MSG,
|
||||||
|
)
|
||||||
|
def test_token_archive_sheet_no_write_access(
|
||||||
|
self, m_access, app_with_token, db_session
|
||||||
|
):
|
||||||
|
"""API token trigger is also blocked when SA has no write access."""
|
||||||
|
db_session.add(
|
||||||
|
models.Sheet(
|
||||||
|
id="token-sheet-id",
|
||||||
|
name="Token Sheet",
|
||||||
|
author_id="rick@example.com",
|
||||||
|
group_id="spaceship",
|
||||||
|
frequency="hourly",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
db_session.commit()
|
||||||
|
client = TestClient(app_with_token)
|
||||||
|
r = client.post("/sheet/token-sheet-id/archive")
|
||||||
|
assert r.status_code == HTTPStatus.FORBIDDEN
|
||||||
|
assert "service account" in r.json()["detail"]
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from app.shared.db.database import (
|
|||||||
from app.shared.log import logger
|
from app.shared.log import logger
|
||||||
from app.shared.settings import get_settings
|
from app.shared.settings import get_settings
|
||||||
from app.shared.task_messaging import get_celery
|
from app.shared.task_messaging import get_celery
|
||||||
|
from app.shared.utils.sheets import get_sheet_access_error
|
||||||
from app.web.db import crud
|
from app.web.db import crud
|
||||||
from app.web.middleware import increase_exceptions_counter
|
from app.web.middleware import increase_exceptions_counter
|
||||||
from app.web.utils.metrics import (
|
from app.web.utils.metrics import (
|
||||||
@@ -30,6 +31,11 @@ from app.web.utils.metrics import (
|
|||||||
|
|
||||||
celery = get_celery()
|
celery = get_celery()
|
||||||
|
|
||||||
|
# Throttle cache: track when each sheet was last notified about missing
|
||||||
|
# permissions so users are not spammed on every cron cycle.
|
||||||
|
_sheet_no_access_notified: dict[str, datetime.datetime] = {}
|
||||||
|
_NOTIFY_COOLDOWN = datetime.timedelta(hours=24)
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
@@ -113,12 +119,31 @@ async def archive_sheets_cronjob(
|
|||||||
frequency: str, interval: int, current_time_unit: int
|
frequency: str, interval: int, current_time_unit: int
|
||||||
):
|
):
|
||||||
triggered_jobs = []
|
triggered_jobs = []
|
||||||
|
no_access_sheets: dict[str, list[tuple]] = defaultdict(list)
|
||||||
|
|
||||||
async with get_db_async() as db:
|
async with get_db_async() as db:
|
||||||
sheets = await crud.get_sheets_by_id_hash(
|
sheets = await crud.get_sheets_by_id_hash(
|
||||||
db, frequency, str(interval), current_time_unit
|
db, frequency, str(interval), current_time_unit
|
||||||
)
|
)
|
||||||
for s in sheets:
|
for s in sheets:
|
||||||
|
# Check if service account has write access to the sheet
|
||||||
|
group = await db.get(models.Group, s.group_id)
|
||||||
|
if group and group.orchestrator_sheet:
|
||||||
|
access_error = get_sheet_access_error(
|
||||||
|
group.orchestrator_sheet,
|
||||||
|
group.service_account_email,
|
||||||
|
s.id,
|
||||||
|
)
|
||||||
|
if access_error:
|
||||||
|
no_access_sheets[s.author_id].append(
|
||||||
|
(s, group.service_account_email or "")
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
f"[CRON] Skipping sheet {s.id}: not shared with "
|
||||||
|
f"service account {group.service_account_email}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
group_queue = await crud.get_group_priority_async(db, s.group_id)
|
group_queue = await crud.get_group_priority_async(db, s.group_id)
|
||||||
task = celery.signature(
|
task = celery.signature(
|
||||||
"create_sheet_task",
|
"create_sheet_task",
|
||||||
@@ -132,11 +157,75 @@ async def archive_sheets_cronjob(
|
|||||||
).apply_async(**group_queue)
|
).apply_async(**group_queue)
|
||||||
|
|
||||||
triggered_jobs.append({"sheet_id": s.id, "task_id": task.id})
|
triggered_jobs.append({"sheet_id": s.id, "task_id": task.id})
|
||||||
|
|
||||||
|
if no_access_sheets:
|
||||||
|
await _notify_sheet_permission_issues(no_access_sheets)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"[CRON {frequency.upper()}:{current_time_unit}] Triggered {len(triggered_jobs)} sheet tasks: {triggered_jobs}"
|
f"[CRON {frequency.upper()}:{current_time_unit}] Triggered {len(triggered_jobs)} sheet tasks: {triggered_jobs}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _notify_sheet_permission_issues(
|
||||||
|
no_access_sheets: dict[str, list[tuple]],
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Send email notifications to users whose sheets are not shared with the
|
||||||
|
Auto Archiver service account. Throttled to at most one email per sheet
|
||||||
|
per 24 hours to avoid spamming.
|
||||||
|
"""
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
fastmail = FastMail(get_settings().mail_config)
|
||||||
|
|
||||||
|
for email, sheet_infos in no_access_sheets.items():
|
||||||
|
# Filter to sheets that haven't been notified recently
|
||||||
|
sheets_to_notify = []
|
||||||
|
for s, sa_email in sheet_infos:
|
||||||
|
last = _sheet_no_access_notified.get(s.id)
|
||||||
|
if not last or (now - last) >= _NOTIFY_COOLDOWN:
|
||||||
|
sheets_to_notify.append((s, sa_email))
|
||||||
|
_sheet_no_access_notified[s.id] = now
|
||||||
|
|
||||||
|
if not sheets_to_notify:
|
||||||
|
continue
|
||||||
|
|
||||||
|
list_of_sheets = "\n".join(
|
||||||
|
[
|
||||||
|
f'<li><a href="https://docs.google.com/spreadsheets/d/{s.id}">'
|
||||||
|
f"{s.name}</a> — share with <code>{sa_email}</code></li>"
|
||||||
|
for s, sa_email in sheets_to_notify
|
||||||
|
]
|
||||||
|
)
|
||||||
|
message = MessageSchema(
|
||||||
|
subject="Auto Archiver: Sheet Permission Issue",
|
||||||
|
recipients=[email],
|
||||||
|
body=f"""
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>Hi {email},</p>
|
||||||
|
<p>The following sheets could not be archived because they have
|
||||||
|
not been shared with the Auto Archiver service account with
|
||||||
|
<strong>Editor</strong> permissions:</p>
|
||||||
|
<ul>
|
||||||
|
{list_of_sheets}
|
||||||
|
</ul>
|
||||||
|
<p>Please open each sheet, click <em>Share</em>, and add the
|
||||||
|
service account email shown above as an <strong>Editor</strong>.
|
||||||
|
The sheets will be archived automatically on the next scheduled
|
||||||
|
run once access is granted.</p>
|
||||||
|
<p>Best,<br>The Auto Archiver team</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
""",
|
||||||
|
subtype=MessageType.html,
|
||||||
|
)
|
||||||
|
await fastmail.send_message(message)
|
||||||
|
logger.debug(
|
||||||
|
f"[CRON] Email sent to {email} about {len(sheets_to_notify)} "
|
||||||
|
f"sheet(s) with permission issues."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# TODO: on exception should logerror but also prometheus counter
|
# TODO: on exception should logerror but also prometheus counter
|
||||||
DELETE_WINDOW = (
|
DELETE_WINDOW = (
|
||||||
get_settings().DELETE_SCHEDULED_ARCHIVES_CHECK_EVERY_N_DAYS * 24 * 60 * 60
|
get_settings().DELETE_SCHEDULED_ARCHIVES_CHECK_EVERY_N_DAYS * 24 * 60 * 60
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from fastapi.responses import JSONResponse
|
|||||||
from sqlalchemy import exc
|
from sqlalchemy import exc
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.shared.db import models
|
||||||
from app.shared.db.database import get_db_dependency
|
from app.shared.db.database import get_db_dependency
|
||||||
from app.shared.schemas import (
|
from app.shared.schemas import (
|
||||||
DeleteResponse,
|
DeleteResponse,
|
||||||
@@ -13,6 +14,7 @@ from app.shared.schemas import (
|
|||||||
SubmitSheet,
|
SubmitSheet,
|
||||||
)
|
)
|
||||||
from app.shared.task_messaging import get_celery
|
from app.shared.task_messaging import get_celery
|
||||||
|
from app.shared.utils.sheets import get_sheet_access_error
|
||||||
from app.web.config import ALLOW_ANY_EMAIL
|
from app.web.config import ALLOW_ANY_EMAIL
|
||||||
from app.web.db import crud
|
from app.web.db import crud
|
||||||
from app.web.db.user_state import UserState
|
from app.web.db.user_state import UserState
|
||||||
@@ -53,6 +55,22 @@ def create_sheet(
|
|||||||
detail="Invalid frequency selected for this group.",
|
detail="Invalid frequency selected for this group.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check if the service account has write access to the Google Sheet
|
||||||
|
group = (
|
||||||
|
db.query(models.Group).filter(models.Group.id == sheet.group_id).first()
|
||||||
|
)
|
||||||
|
if group:
|
||||||
|
access_error = get_sheet_access_error(
|
||||||
|
group.orchestrator_sheet,
|
||||||
|
group.service_account_email,
|
||||||
|
sheet.id,
|
||||||
|
)
|
||||||
|
if access_error:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=HTTPStatus.FORBIDDEN,
|
||||||
|
detail=access_error,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return crud.create_sheet(
|
return crud.create_sheet(
|
||||||
db,
|
db,
|
||||||
@@ -138,6 +156,22 @@ def archive_user_sheet(
|
|||||||
group_queue = user.priority_group(sheet.group_id)
|
group_queue = user.priority_group(sheet.group_id)
|
||||||
author_id = user.email
|
author_id = user.email
|
||||||
|
|
||||||
|
# Check if the service account has write access to the Google Sheet
|
||||||
|
group = (
|
||||||
|
db.query(models.Group).filter(models.Group.id == sheet.group_id).first()
|
||||||
|
)
|
||||||
|
if group:
|
||||||
|
access_error = get_sheet_access_error(
|
||||||
|
group.orchestrator_sheet,
|
||||||
|
group.service_account_email,
|
||||||
|
sheet_id,
|
||||||
|
)
|
||||||
|
if access_error:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=HTTPStatus.FORBIDDEN,
|
||||||
|
detail=access_error,
|
||||||
|
)
|
||||||
|
|
||||||
task = celery.signature(
|
task = celery.signature(
|
||||||
"create_sheet_task",
|
"create_sheet_task",
|
||||||
args=[
|
args=[
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from app.shared.log import log_error
|
|||||||
from app.shared.settings import get_settings
|
from app.shared.settings import get_settings
|
||||||
from app.shared.task_messaging import get_celery, get_redis
|
from app.shared.task_messaging import get_celery, get_redis
|
||||||
from app.shared.utils.misc import get_all_urls
|
from app.shared.utils.misc import get_all_urls
|
||||||
|
from app.shared.utils.sheets import get_sheet_access_error
|
||||||
from app.worker.worker_log import logger, setup_celery_logger
|
from app.worker.worker_log import logger, setup_celery_logger
|
||||||
|
|
||||||
|
|
||||||
@@ -98,6 +99,30 @@ def create_sheet_task(self, sheet_json: str):
|
|||||||
)
|
)
|
||||||
logger.info(f"[queue={queue_name}] SHEET START {sheet=}")
|
logger.info(f"[queue={queue_name}] SHEET START {sheet=}")
|
||||||
|
|
||||||
|
# Early check: does the service account have write access to the sheet?
|
||||||
|
with get_db() as session:
|
||||||
|
group = worker_crud.get_group(session, sheet.group_id)
|
||||||
|
if group and group.orchestrator_sheet:
|
||||||
|
access_error = get_sheet_access_error(
|
||||||
|
group.orchestrator_sheet,
|
||||||
|
group.service_account_email,
|
||||||
|
sheet.sheet_id,
|
||||||
|
)
|
||||||
|
if access_error:
|
||||||
|
logger.warning(
|
||||||
|
f"SHEET SKIPPED {sheet.sheet_id}: {access_error}"
|
||||||
|
)
|
||||||
|
return schemas.CelerySheetTask(
|
||||||
|
success=False,
|
||||||
|
sheet_id=sheet.sheet_id,
|
||||||
|
time=datetime.datetime.now().isoformat(),
|
||||||
|
stats={
|
||||||
|
"archived": 0,
|
||||||
|
"failed": 0,
|
||||||
|
"errors": [access_error],
|
||||||
|
},
|
||||||
|
).model_dump()
|
||||||
|
|
||||||
args = get_orchestrator_args(
|
args = get_orchestrator_args(
|
||||||
sheet.group_id, True, [constants.SHEET_ID, sheet.sheet_id]
|
sheet.group_id, True, [constants.SHEET_ID, sheet.sheet_id]
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user