mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-11 04:58:33 +03:00
introduces user.manually_trigger_sheet and implements quotas for sheets
This commit is contained in:
@@ -84,7 +84,7 @@ class TaskDelete(Task):
|
||||
deleted: bool
|
||||
|
||||
|
||||
class ActiveUser(BaseModel):
|
||||
class ActiveUser(BaseModel):
|
||||
active: bool
|
||||
|
||||
|
||||
@@ -94,13 +94,6 @@ class SheetAdd(BaseModel):
|
||||
group_id: str
|
||||
frequency: str
|
||||
|
||||
@field_validator('frequency')
|
||||
def validate_frequency(cls, v):
|
||||
valid_frequencies = {"hourly", "daily"}
|
||||
if v not in {"hourly", "daily"}:
|
||||
raise ValueError(f"Invalid frequency: {v}. Must be one of {valid_frequencies}.")
|
||||
return v
|
||||
|
||||
|
||||
class SheetResponse(SheetAdd):
|
||||
author_id: str
|
||||
|
||||
@@ -117,24 +117,6 @@ class UserState:
|
||||
self._sheet_frequency.update(group.permissions.get("sheet_frequency", None))
|
||||
return self._sheet_frequency
|
||||
|
||||
@property
|
||||
def max_sheets(self):
|
||||
"""
|
||||
infer the user's sheet quota from the groups
|
||||
-1 means unlimited
|
||||
"""
|
||||
if not hasattr(self, '_max_sheets'):
|
||||
self._max_sheets = 0
|
||||
for group in self.user_groups:
|
||||
if not group.permissions: continue
|
||||
max_sheets = group.permissions.get("max_sheets", 0)
|
||||
if max_sheets == -1:
|
||||
self._max_sheets = -1
|
||||
return self._max_sheets
|
||||
self._max_sheets = max(self._max_sheets, max_sheets)
|
||||
|
||||
return self._max_sheets
|
||||
|
||||
@property
|
||||
def active(self) -> bool:
|
||||
"""
|
||||
@@ -147,15 +129,19 @@ class UserState:
|
||||
def in_group(self, group_id: str) -> bool:
|
||||
return group_id in self.user_groups_names
|
||||
|
||||
def has_quota_monthly_sheets(self) -> bool:
|
||||
def has_quota_monthly_sheets(self, group_id: str) -> bool:
|
||||
"""
|
||||
checks if a user has reached their sheet quota
|
||||
checks if a user has reached their sheet quota for a given group
|
||||
"""
|
||||
if self.max_sheets == -1: return True
|
||||
if group_id not in self.permissions:
|
||||
return False
|
||||
|
||||
user_sheets = self.db.query(models.Sheet).filter(models.Sheet.author_id == self.email).count()
|
||||
|
||||
return user_sheets < self.max_sheets
|
||||
user_sheets = self.db.query(models.Sheet).filter(models.Sheet.author_id == self.email, models.Sheet.group_id == group_id).count()
|
||||
|
||||
sheet_quota = self.permissions[group_id].max_sheets
|
||||
if sheet_quota == -1:
|
||||
return True
|
||||
return user_sheets < sheet_quota
|
||||
|
||||
def has_quota_max_monthly_urls(self) -> bool:
|
||||
"""
|
||||
@@ -210,18 +196,20 @@ class UserState:
|
||||
user_mbs = int(user_bytes / 1024 / 1024)
|
||||
return user_mbs < quota
|
||||
|
||||
# def can_manually_trigger(self) -> bool:
|
||||
# """
|
||||
# checks if a user is allowed to manually trigger a sheet
|
||||
# """
|
||||
# for group in self.user_groups:
|
||||
# if not group.permissions: continue
|
||||
# if group.permissions.get("manual_trigger", False):
|
||||
# return True
|
||||
# return False
|
||||
def can_manually_trigger(self, group_id:str) -> bool:
|
||||
"""
|
||||
checks if a user is allowed to manually trigger a sheet
|
||||
"""
|
||||
if group_id not in self.permissions:
|
||||
return False
|
||||
|
||||
return self.permissions[group_id].manually_trigger_sheet
|
||||
|
||||
def is_sheet_frequency_allowed(self, frequency: str) -> bool:
|
||||
def is_sheet_frequency_allowed(self, group_id:str, frequency: str) -> bool:
|
||||
"""
|
||||
checks if a user is allowed to create a sheet with this frequency
|
||||
checks if a user is allowed to create a sheet with this frequency for this group
|
||||
"""
|
||||
return frequency in self.sheet_frequency
|
||||
if group_id not in self.permissions:
|
||||
return False
|
||||
|
||||
return frequency in self.permissions[group_id].sheet_frequency
|
||||
|
||||
@@ -6,7 +6,7 @@ from sqlalchemy import exc
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from db.user_state import UserState
|
||||
from web.security import token_api_key_auth, get_user_auth, get_user_state
|
||||
from web.security import token_api_key_auth, get_user_state
|
||||
from db import schemas, crud
|
||||
from db.database import get_db_dependency
|
||||
from worker.main import create_sheet_task
|
||||
@@ -24,35 +24,35 @@ def create_sheet(
|
||||
if not user.in_group(sheet.group_id):
|
||||
raise HTTPException(status_code=403, detail="User does not have access to this group.")
|
||||
|
||||
if not user.has_quota_monthly_sheets():
|
||||
raise HTTPException(status_code=429, detail="User has reached their sheet quota.")
|
||||
|
||||
if not user.is_sheet_frequency_allowed(sheet.frequency):
|
||||
raise HTTPException(status_code=422, detail=f"Invalid frequency: {sheet.frequency}. Must be one of {user.sheet_frequency}")
|
||||
if not user.has_quota_monthly_sheets(sheet.group_id):
|
||||
raise HTTPException(status_code=429, detail="User has reached their sheet quota for this group.")
|
||||
|
||||
if not user.is_sheet_frequency_allowed(sheet.group_id, sheet.frequency):
|
||||
raise HTTPException(status_code=422, detail="Invalid frequency selected for this group.")
|
||||
|
||||
try:
|
||||
return crud.create_sheet(db, sheet.id, sheet.name, user.email, sheet.group_id, sheet.frequency)
|
||||
except exc.IntegrityError as e:
|
||||
raise HTTPException(status_code=400, detail="Sheet with this ID already exists.") from e
|
||||
raise HTTPException(status_code=400, detail="Sheet with this ID is already being archived.") from e
|
||||
|
||||
|
||||
@sheet_router.get("/mine", status_code=200, summary="Get the authenticated user's Google Sheets.")
|
||||
def get_user_sheets(
|
||||
email=Depends(get_user_auth),
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
) -> list[schemas.SheetResponse]:
|
||||
return crud.get_user_sheets(db, email)
|
||||
return crud.get_user_sheets(db, user.email)
|
||||
|
||||
|
||||
@sheet_router.delete("/{id}", summary="Delete a Google Sheet by ID.")
|
||||
def delete_sheet(
|
||||
id: str,
|
||||
email=Depends(get_user_auth),
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> schemas.TaskDelete:
|
||||
return JSONResponse({
|
||||
"id": id,
|
||||
"deleted": crud.delete_sheet(db, id, email)
|
||||
"deleted": crud.delete_sheet(db, id, user.email)
|
||||
})
|
||||
|
||||
|
||||
@@ -62,10 +62,6 @@ def archive_user_sheet(
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> schemas.Task:
|
||||
|
||||
#TODO: are we enabling manual triggers?
|
||||
# if not user.can_manually_trigger():
|
||||
# raise HTTPException(status_code=429, detail="User cannot manually trigger archiving tasks.")
|
||||
|
||||
sheet = crud.get_user_sheet(db, user.email, sheet_id=id)
|
||||
if not sheet:
|
||||
@@ -75,6 +71,9 @@ def archive_user_sheet(
|
||||
if not user.in_group(sheet.group_id):
|
||||
raise HTTPException(status_code=403, detail="User does not have access to this group.")
|
||||
|
||||
if not user.can_manually_trigger(sheet.group_id):
|
||||
raise HTTPException(status_code=429, detail="User cannot manually trigger sheet archiving in this group.")
|
||||
|
||||
task = create_sheet_task.delay(schemas.SubmitSheet(sheet_id=id, author_id=user.email, group=sheet.group_id).model_dump_json())
|
||||
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
@@ -82,7 +81,7 @@ def archive_user_sheet(
|
||||
|
||||
@sheet_router.post("/archive", status_code=201, summary="Trigger an archiving task for any GSheet with an API token.", response_description="task_id for the archiving task.")
|
||||
def archive_sheet(
|
||||
sheet: schemas.SubmitSheet, # TODO: replace with simpler model
|
||||
sheet: schemas.SubmitSheet,
|
||||
auth=Depends(token_api_key_auth)
|
||||
) -> schemas.Task:
|
||||
sheet.author_id = sheet.author_id or "api-endpoint"
|
||||
|
||||
@@ -35,6 +35,7 @@ class GroupPermissions(BaseModel):
|
||||
read_public: bool = False
|
||||
archive_url: bool = False
|
||||
archive_sheet: bool = False
|
||||
manually_trigger_sheet: bool = False
|
||||
sheet_frequency: Set[str] = Field(default_factory=list)
|
||||
max_sheets: int = 0
|
||||
max_archive_lifespan_months: int = 12
|
||||
@@ -85,7 +86,8 @@ class UserGroupModel(BaseModel):
|
||||
raise ValueError(f"Invalid user, it should be an address: {email}")
|
||||
if not v[email]:
|
||||
raise ValueError(f"User {email} has no explicitly listed groups, only include them here if they should be in a group.")
|
||||
return {k.lower().strip(): list(set([g.lower().strip() for g in v])) for k, v in v.items()}
|
||||
# all users belong to the default group
|
||||
return {k.lower().strip(): list(set(["default"] + [g.lower().strip() for g in v])) for k, v in v.items()}
|
||||
|
||||
@field_validator('domains', mode='before')
|
||||
@classmethod
|
||||
|
||||
@@ -37,14 +37,7 @@ def test_create_sheet_endpoint(app_with_auth, db_session):
|
||||
# already exists
|
||||
response = client_with_auth.post("/sheet/create", json=good_data)
|
||||
assert response.status_code == 400
|
||||
assert response.json() == {"detail": "Sheet with this ID already exists."}
|
||||
|
||||
# bad frequency
|
||||
bad_data = good_data.copy()
|
||||
bad_data["frequency"] = "every hour"
|
||||
response = client_with_auth.post("/sheet/create", json=bad_data)
|
||||
assert response.status_code == 422
|
||||
assert "Value error, Invalid frequency: every hour. Must be one of" in response.json()["detail"][0]["msg"]
|
||||
assert response.json() == {"detail": "Sheet with this ID is already being archived."}
|
||||
|
||||
# bad group
|
||||
bad_data = good_data.copy()
|
||||
@@ -66,16 +59,16 @@ def test_create_sheet_endpoint(app_with_auth, db_session):
|
||||
jerry_data["id"] = "jerry-sheet-id"
|
||||
response = client_jerry.post("/sheet/create", json=jerry_data)
|
||||
assert response.status_code == 422
|
||||
assert "Invalid frequency: hourly" in response.json()["detail"]
|
||||
jerry_data["frequency"] = "daily"
|
||||
assert response.json() == {"detail": "Invalid frequency selected for this group."}
|
||||
|
||||
jerry_data["frequency"] = "daily"
|
||||
# success for the first sheet, bad quota on second
|
||||
response = client_jerry.post("/sheet/create", json=jerry_data)
|
||||
assert response.status_code == 201
|
||||
|
||||
response = client_jerry.post("/sheet/create", json=jerry_data)
|
||||
assert response.status_code == 429
|
||||
assert response.json() == {"detail": "User has reached their sheet quota."}
|
||||
assert response.json() == {"detail": "User has reached their sheet quota for this group."}
|
||||
|
||||
|
||||
def test_get_user_sheets_endpoint(client_with_auth, db_session):
|
||||
@@ -155,6 +148,16 @@ def test_delete_sheet_endpoint(client_with_auth, db_session):
|
||||
|
||||
|
||||
class TestArchiveUserSheetEndpoint:
|
||||
@patch("worker.main.create_sheet_task.delay", return_value=TaskResult(id="123-taskid", status="PENDING", result=""))
|
||||
def test_normal_flow(self, m1, client_with_auth, db_session):
|
||||
from db import models
|
||||
db_session.add(models.Sheet(id="123-sheet-id", name="Test Sheet 1", author_id="morty@example.com", group_id="spaceship", frequency="hourly"))
|
||||
db_session.commit()
|
||||
r = client_with_auth.post("/sheet/123-sheet-id/archive")
|
||||
assert r.status_code == 201
|
||||
assert r.json() == {"id": "123-taskid"}
|
||||
m1.assert_called_once()
|
||||
|
||||
def test_token_auth(self, client_with_token, test_no_auth):
|
||||
test_no_auth(client_with_token.post, "/sheet/123-sheet-id/archive")
|
||||
|
||||
@@ -171,16 +174,6 @@ class TestArchiveUserSheetEndpoint:
|
||||
assert r.status_code == 403
|
||||
assert r.json() == {"detail": "No access to this sheet."}
|
||||
|
||||
@patch("worker.main.create_sheet_task.delay", return_value=TaskResult(id="123-taskid", status="PENDING", result=""))
|
||||
def test_normal_flow(self, m1, client_with_auth, db_session):
|
||||
from db import models
|
||||
db_session.add(models.Sheet(id="123-sheet-id", name="Test Sheet 1", author_id="morty@example.com", group_id="spaceship", frequency="hourly"))
|
||||
db_session.commit()
|
||||
r = client_with_auth.post("/sheet/123-sheet-id/archive")
|
||||
assert r.status_code == 201
|
||||
assert r.json() == {"id": "123-taskid"}
|
||||
m1.assert_called_once()
|
||||
|
||||
def test_user_not_in_group(self, client_with_auth, db_session):
|
||||
from db import models
|
||||
db_session.add(models.Sheet(id="123-sheet-id", name="Test Sheet 1", author_id="morty@example.com", group_id="interdimensional", frequency="hourly"))
|
||||
@@ -189,6 +182,14 @@ class TestArchiveUserSheetEndpoint:
|
||||
assert r.status_code == 403
|
||||
assert r.json() == {"detail": "User does not have access to this group."}
|
||||
|
||||
def test_user_cannot_manually_trigger(self, client_with_auth, db_session):
|
||||
from db import models
|
||||
db_session.add(models.Sheet(id="123-sheet-id", name="Test Sheet 1", author_id="morty@example.com", group_id="default", frequency="hourly"))
|
||||
db_session.commit()
|
||||
r = client_with_auth.post("/sheet/123-sheet-id/archive")
|
||||
assert r.status_code == 429
|
||||
assert r.json() == {"detail": "User cannot manually trigger sheet archiving in this group."}
|
||||
|
||||
|
||||
class TestTokenArchiveEndpoint:
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ groups:
|
||||
read: ["all"]
|
||||
archive_url: true
|
||||
archive_sheet: true
|
||||
manually_trigger_sheet: true
|
||||
sheet_frequency: ["hourly", "daily"]
|
||||
max_sheets: -1
|
||||
max_archive_lifespan_months: -1
|
||||
@@ -48,6 +49,7 @@ groups:
|
||||
read: ["interdimensional", "animated-characters"]
|
||||
archive_url: true
|
||||
archive_sheet: true
|
||||
manually_trigger_sheet: true
|
||||
sheet_frequency: ["hourly", "daily"]
|
||||
max_sheets: 5
|
||||
max_archive_lifespan_months: 12
|
||||
@@ -75,6 +77,7 @@ groups:
|
||||
permissions:
|
||||
# read: []
|
||||
archive_url: true
|
||||
# manually_trigger_sheet: false
|
||||
# archive_sheet: false
|
||||
# sheet_frequency: []
|
||||
# max_sheets: 0
|
||||
|
||||
Reference in New Issue
Block a user