From 5344cc56e7503abb77f44f93188c8c0fdd9b1595 Mon Sep 17 00:00:00 2001
From: msramalho <19508417+msramalho@users.noreply.github.com>
Date: Thu, 6 Feb 2025 18:41:12 +0000
Subject: [PATCH] introduces group/global usage & permissions, validates in
 endpoints and tests endpoints

---
 src/db/crud.py                                |  13 +-
 src/db/models.py                              |   8 +-
 src/db/schemas.py                             |   6 +-
 src/db/user_state.py                          | 159 +++++++++++++++---
 src/endpoints/default.py                      |  10 +-
 src/endpoints/url.py                          |  13 +-
 ...24ec4b1_rename_sheets_last_archived_col.py |  32 ++++
 src/tests/endpoints/test_sheet.py             |   9 +-
 src/tests/endpoints/test_url.py               |  39 ++++-
 src/worker/main.py                            |  15 +-
 10 files changed, 252 insertions(+), 52 deletions(-)
 create mode 100644 src/migrations/versions/1636724ec4b1_rename_sheets_last_archived_col.py

diff --git a/src/db/crud.py b/src/db/crud.py
index d9e27d9..0ede4f0 100644
--- a/src/db/crud.py
+++ b/src/db/crud.py
@@ -3,7 +3,7 @@ from functools import lru_cache
 from sqlalchemy.orm import Session, load_only
 from sqlalchemy import Column, or_, func
 from loguru import logger
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 
 from core.config import ALLOW_ANY_EMAIL
 from db.database import get_db
@@ -51,7 +51,7 @@ def search_archives_by_email(db: Session, email: str, skip: int = 0, limit: int
 
 
 def create_task(db: Session, task: schemas.ArchiveCreate, tags: list[models.Tag], urls: list[models.ArchiveUrl]):
-    db_task = models.Archive(id=task.id, url=task.url, result=task.result, public=task.public, author_id=task.author_id, group_id=task.group_id)
+    db_task = models.Archive(id=task.id, url=task.url, result=task.result, public=task.public, author_id=task.author_id, group_id=task.group_id, sheet_id=task.sheet_id)
     db_task.tags = tags
     db_task.urls = urls
     db.add(db_task)
@@ -246,8 +246,15 @@ def get_user_sheet(db: Session, email: str, sheet_id: str) -> models.Sheet:
 
 
 def get_user_sheets(db: Session, email: str) -> list[models.Sheet]:
-    return db.query(models.Sheet).filter(models.Sheet.author_id == email).order_by(models.Sheet.last_archived_at.desc()).all()
+    return db.query(models.Sheet).filter(models.Sheet.author_id == email).order_by(models.Sheet.last_url_archived_at.desc()).all()
 
+def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
+    db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
+    if db_sheet:
+        db_sheet.last_url_archived_at = datetime.now()
+        db.commit()
+        return True
+    return False
 
 def delete_sheet(db: Session, sheet_id: str, email: str) -> bool:
     db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id, models.Sheet.author_id == email).first()
diff --git a/src/db/models.py b/src/db/models.py
index d8b12c8..41d2c3c 100644
--- a/src/db/models.py
+++ b/src/db/models.py
@@ -25,16 +25,15 @@ association_table_user_groups = Table(
     Column("group_id", ForeignKey("groups.id")),
 )
 
+
 # data model tables
-
-
 class Archive(Base):
     __tablename__ = "archives"
 
     id = Column(String, primary_key=True, index=True)
     url = Column(String, index=True)
     result = Column(JSON, default=None)
-    public = Column(Boolean, default=True)  # if public=false, access to group and author
+    public = Column(Boolean, default=True)  # if public=false, access by group and author
     deleted = Column(Boolean, default=False)
     created_at = Column(DateTime(timezone=True), server_default=func.now())
     updated_at = Column(DateTime(timezone=True), onupdate=func.now())
@@ -102,8 +101,9 @@ class Sheet(Base):
     author_id = Column(String, ForeignKey("users.email"))
     group_id = Column(String, ForeignKey("groups.id"), doc="Group ID, user must be in a group to create a sheet.")
     frequency = Column(String, default="daily", doc="Frequency of archiving: hourly, daily, weekly.")
+    # TODO: stats is not needed, is it?
     stats = Column(JSON, default={}, doc="Sheet statistics like total links, total rows, ...")
-    last_archived_at = Column(DateTime(timezone=True), server_default=func.now(), doc="Last time a new link was archived.")
+    last_url_archived_at = Column(DateTime(timezone=True), server_default=func.now(), doc="Last time a new link was archived.")
     created_at = Column(DateTime(timezone=True), server_default=func.now())
     updated_at = Column(DateTime(timezone=True), onupdate=func.now())
 
diff --git a/src/db/schemas.py b/src/db/schemas.py
index 9c26a78..072d4c3 100644
--- a/src/db/schemas.py
+++ b/src/db/schemas.py
@@ -1,6 +1,6 @@
 from typing import Annotated
 from annotated_types import Len
-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel
 from datetime import datetime
 
 
@@ -21,6 +21,7 @@ class ArchiveCreate(BaseModel):
     group_id: str | None = None
     tags: set[Tag] | None = set()
     rearchive: bool = True
+    sheet_id: str | None = None
     # urls: list = []
 
 
@@ -97,9 +98,8 @@ class SheetAdd(BaseModel):
 
 class SheetResponse(SheetAdd):
     author_id: str
-    stats: dict | None
-    last_archived_at: datetime | None
     created_at: datetime
+    last_url_archived_at: datetime | None
 
 
 class ArchiveTrigger(BaseModel):
diff --git a/src/db/user_state.py b/src/db/user_state.py
index 2563877..8b3ad54 100644
--- a/src/db/user_state.py
+++ b/src/db/user_state.py
@@ -29,6 +29,11 @@ class UserState:
                 read_public=self.read_public,
                 archive_url=self.archive_url,
                 archive_sheet=self.archive_sheet,
+                # below are relevant only for /url endpoints
+                max_archive_lifespan_months=self.max_archive_lifespan_months,
+                max_monthly_urls=self.max_monthly_urls,
+                max_monthly_mbs=self.max_monthly_mbs,
+                priority=self.priority
             )
             for group in self.user_groups:
                 if not group.permissions: continue
@@ -117,6 +122,34 @@ class UserState:
                 self._sheet_frequency.update(group.permissions.get("sheet_frequency", None))
         return self._sheet_frequency
 
+    @property
+    def max_archive_lifespan_months(self) -> int:
+        if not hasattr(self, '_max_archive_lifespan_months'):
+            self._max_archive_lifespan_months = self._helper_for_grouping_max_numerical_permissions("max_archive_lifespan_months")
+        return self._max_archive_lifespan_months
+    
+    @property
+    def max_monthly_urls(self) -> int:
+        if not hasattr(self, '_max_monthly_urls'):
+            self._max_monthly_urls = self._helper_for_grouping_max_numerical_permissions("max_monthly_urls")
+        return self._max_monthly_urls
+
+    @property
+    def max_monthly_mbs(self) -> int:
+        if not hasattr(self, '_max_monthly_mbs'):
+            self._max_monthly_mbs = self._helper_for_grouping_max_numerical_permissions("max_monthly_mbs")
+        return self._max_monthly_mbs
+
+    @property
+    def priority(self) -> str:
+        if not hasattr(self, '_priority'):
+            self._priority = "low"
+            for group in self.user_groups:
+                if not group.permissions: continue
+                if group.permissions.get("priority", "low") == "high":
+                    self._priority = "high"
+        return self._priority
+
     @property
     def active(self) -> bool:
         """
@@ -125,34 +158,114 @@ class UserState:
         if not hasattr(self, '_active'):
             self._active = bool(self.read or self.read_public or self.archive_url or self.archive_sheet)
         return self._active
+    
+    def _helper_for_grouping_max_numerical_permissions(self, permission_name: str) -> int:
+        """
+        Iterates one of the numerical permissions where -1 means no restrictions and returns either -1 or the maximum value, defaults according to GroupPermissions
+        """
+        default = GroupPermissions.model_fields[permission_name].default
+        max_value = default
+        for group in self.user_groups:
+            if not group.permissions: continue
+            group_value = group.permissions.get(permission_name, default)
+            if group_value == -1:
+                max_value = -1
+                return max_value
+            max_value = max(max_value, group_value)
+        return max_value
 
     def in_group(self, group_id: str) -> bool:
         return group_id in self.user_groups_names
 
+    def usage(self) -> Dict:
+        """
+        returns the monthly quotas for the URLs/MBs and the totals for Sheets
+        """
+        current_month = datetime.now().month
+        current_year = datetime.now().year
+
+        # find and sum all user sheets over this month
+        user_sheets = self.db.query(
+            models.Sheet.group_id,
+            func.count(models.Sheet.id).label('sheet_count')
+        ).filter(models.Sheet.author_id == self.email).group_by(models.Sheet.group_id).all()
+
+        sheets_by_group = {sheet.group_id: sheet.sheet_count for sheet in user_sheets}
+
+        # find and sum all user urls over this month
+        urls_by_group = self.db.query(
+            models.Archive.group_id,
+            func.count(models.Archive.id).label('url_count'),
+            func.coalesce(func.sum(
+                func.coalesce(
+                    func.cast(
+                        func.json_extract(models.Archive.result, '$.metadata.total_bytes'),
+                        sqlalchemy.Integer
+                    ), 0
+                )
+            ), 0).label('total_bytes')
+        ).filter(
+            models.Archive.author_id == self.email,
+            func.extract('month', models.Archive.created_at) == current_month,
+            func.extract('year', models.Archive.created_at) == current_year
+        ).group_by(models.Archive.group_id).all()
+
+        # merge the two queries
+        usage_by_group = {
+            (url.group_id or ""): {
+                "monthly_urls": url.url_count,
+                "monthly_mbs": int(url.total_bytes / 1024 / 1024),
+                "total_sheets": 0
+            }
+            for url in urls_by_group
+        }
+        for group_id, sheet_count in sheets_by_group.items():
+            group_id = group_id or ""
+            if group_id in usage_by_group:
+                usage_by_group[group_id]["total_sheets"] = sheet_count
+            else:
+                usage_by_group[group_id] = {
+                    "monthly_urls": 0,
+                    "monthly_mbs": 0,
+                    "total_sheets": sheet_count
+                }
+
+        # calculate totals
+        total_sheets = sum([sheet.sheet_count for sheet in user_sheets])
+        total_bytes = sum([url.total_bytes for url in urls_by_group])
+        total_urls = sum([url.url_count for url in urls_by_group])
+
+        return {
+            "total_sheets": total_sheets,
+            "monthly_urls": total_urls,
+            "monthly_mbs": int(total_bytes / 1024 / 1024),
+            "groups": usage_by_group
+        }
+
     def has_quota_monthly_sheets(self, group_id: str) -> bool:
         """
         checks if a user has reached their sheet quota for a given group
         """
-        if group_id not in self.permissions: 
+        if group_id not in self.permissions:
             return False
 
         user_sheets = self.db.query(models.Sheet).filter(models.Sheet.author_id == self.email, models.Sheet.group_id == group_id).count()
-        
+
         sheet_quota = self.permissions[group_id].max_sheets
-        if sheet_quota == -1: 
+        if sheet_quota == -1:
             return True
         return user_sheets < sheet_quota
 
-    def has_quota_max_monthly_urls(self) -> bool:
+    def has_quota_max_monthly_urls(self, group_id:str) -> bool:
         """
-        checks if a user has reached their monthly url quota
+        checks if a user has reached their monthly url quota for a group, if global then group should be empty string
         """
         quota = 0
-        for group in self.user_groups:
-            if not group.permissions: continue
-            max_monthly_urls = group.permissions.get("max_monthly_urls", 0)
-            if max_monthly_urls == -1: return True
-            quota = max(quota, max_monthly_urls)
+        if not group_id:
+            quota = self.max_monthly_urls
+        else:
+            if group_id not in self.permissions: return False
+            quota = self.permissions[group_id].max_monthly_urls
 
         current_month = datetime.now().month
         current_year = datetime.now().year
@@ -164,16 +277,16 @@ class UserState:
 
         return user_urls < quota
 
-    def has_quota_max_monthly_mbs(self) -> bool:
+    def has_quota_max_monthly_mbs(self, group_id:str) -> bool:
         """
-        checks if a user has reached their monthly mb quota
+        checks if a user has reached their monthly MBs quota for a group, if global then group should be empty string
         """
         quota = 0
-        for group in self.user_groups:
-            if not group.permissions: continue
-            max_monthly_mbs = group.permissions.get("max_monthly_mbs", 0)
-            if max_monthly_mbs == -1: return True
-            quota = max(quota, max_monthly_mbs)
+        if not group_id:
+            quota = self.max_monthly_mbs
+        else:
+            if group_id not in self.permissions: return False
+            quota = self.permissions[group_id].max_monthly_mbs
 
         current_month = datetime.now().month
         current_year = datetime.now().year
@@ -196,20 +309,20 @@ class UserState:
         user_mbs = int(user_bytes / 1024 / 1024)
         return user_mbs < quota
 
-    def can_manually_trigger(self, group_id:str) -> bool:
+    def can_manually_trigger(self, group_id: str) -> bool:
         """
         checks if a user is allowed to manually trigger a sheet
         """
-        if group_id not in self.permissions: 
+        if group_id not in self.permissions:
             return False
-        
+
         return self.permissions[group_id].manually_trigger_sheet
 
-    def is_sheet_frequency_allowed(self, group_id:str, frequency: str) -> bool:
+    def is_sheet_frequency_allowed(self, group_id: str, frequency: str) -> bool:
         """
         checks if a user is allowed to create a sheet with this frequency for this group
         """
-        if group_id not in self.permissions: 
+        if group_id not in self.permissions:
             return False
-        
+
         return frequency in self.permissions[group_id].sheet_frequency
diff --git a/src/endpoints/default.py b/src/endpoints/default.py
index 6a70a0d..d5f712f 100644
--- a/src/endpoints/default.py
+++ b/src/endpoints/default.py
@@ -39,13 +39,21 @@ async def active(
     return {"active": user.active}
 
 
-# TODO: test
 @default_router.get("/user/permissions", summary="Get the user's global 'all' permissions and the permissions for each group they belong to.")
 def get_user_permissions(
     user: UserState = Depends(get_user_state),
 ) -> Dict[str, GroupPermissions]:
     return user.permissions
 
+@default_router.get("/user/usage", summary="Get the user's monthly URLs/MBs usage along with the total active sheets, breakdown by group.")
+def get_user_usage(
+    user: UserState = Depends(get_user_state),
+):
+    if not user.active:
+        raise HTTPException(status_code=403, detail="User is not active.")
+    return user.usage()
+    
+
 
 @default_router.get('/favicon.ico', include_in_schema=False)
 async def favicon():
diff --git a/src/endpoints/url.py b/src/endpoints/url.py
index 2578b5e..3d0aae1 100644
--- a/src/endpoints/url.py
+++ b/src/endpoints/url.py
@@ -13,6 +13,7 @@ from db import crud, schemas
 from db.database import get_db_dependency
 
 from worker.main import create_archive_task
+from urllib.parse import urlparse
 
 url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
 
@@ -25,14 +26,18 @@ def archive_url(
 ) -> schemas.Task:
     logger.info(f"new {archive.public=} task for {email=} and {archive.group_id=}: {archive.url}")
 
+    parsed_url = urlparse(archive.url)
+    if not all([parsed_url.scheme, parsed_url.netloc]):
+        raise HTTPException(status_code=400, detail="Invalid URL received.")
+
     if email != ALLOW_ANY_EMAIL:
         user = UserState(db, email)
-        if not user.has_quota_max_monthly_urls():
-            raise HTTPException(status_code=429, detail="User has reached their monthly URL quota.")
-        if not user.has_quota_max_monthly_mbs():
-            raise HTTPException(status_code=429, detail="User has reached their monthly MB quota.")
         if archive.group_id and not user.in_group(archive.group_id):
             raise HTTPException(status_code=403, detail="User does not have access to this group.")
+        if not user.has_quota_max_monthly_urls(archive.group_id):
+            raise HTTPException(status_code=429, detail="User has reached their monthly URL quota.")
+        if not user.has_quota_max_monthly_mbs(archive.group_id):
+            raise HTTPException(status_code=429, detail="User has reached their monthly MB quota.")
     
     # TODO: deprecate ArchiveCreate
     backwards_compatible_archive = schemas.ArchiveCreate(
diff --git a/src/migrations/versions/1636724ec4b1_rename_sheets_last_archived_col.py b/src/migrations/versions/1636724ec4b1_rename_sheets_last_archived_col.py
new file mode 100644
index 0000000..6c109f3
--- /dev/null
+++ b/src/migrations/versions/1636724ec4b1_rename_sheets_last_archived_col.py
@@ -0,0 +1,32 @@
+"""rename sheets last_archived col
+
+Revision ID: 1636724ec4b1
+Revises: a23aaf3ae930
+Create Date: 2025-02-05 19:19:01.984396
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '1636724ec4b1'
+down_revision = 'a23aaf3ae930'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    columns = [col['name'] for col in inspector.get_columns('sheets')]
+    if 'last_archived_at' in columns:
+        op.alter_column('sheets', 'last_archived_at', new_column_name='last_url_archived_at')
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    columns = [col['name'] for col in inspector.get_columns('sheets')]
+    if 'last_url_archived_at' in columns:
+        op.alter_column('sheets', 'last_url_archived_at', new_column_name='last_archived_at')
diff --git a/src/tests/endpoints/test_sheet.py b/src/tests/endpoints/test_sheet.py
index 908b9a8..129cd2a 100644
--- a/src/tests/endpoints/test_sheet.py
+++ b/src/tests/endpoints/test_sheet.py
@@ -29,8 +29,7 @@ def test_create_sheet_endpoint(app_with_auth, db_session):
     assert response.status_code == 201
     j = response.json()
     assert datetime.fromisoformat(j.pop("created_at"))
-    assert datetime.fromisoformat(j.pop("last_archived_at"))
-    assert j.pop("stats") == {}
+    assert datetime.fromisoformat(j.pop("last_url_archived_at"))
     assert j.pop("author_id") == 'morty@example.com'
     assert j == good_data
 
@@ -95,16 +94,15 @@ def test_get_user_sheets_endpoint(client_with_auth, db_session):
     assert isinstance(r, list)
     assert len(r) == 2
     assert datetime.fromisoformat(r[0].pop("created_at"))
-    assert datetime.fromisoformat(r[0].pop("last_archived_at"))
+    assert datetime.fromisoformat(r[0].pop("last_url_archived_at"))
     assert datetime.fromisoformat(r[1].pop("created_at"))
-    assert datetime.fromisoformat(r[1].pop("last_archived_at"))
+    assert datetime.fromisoformat(r[1].pop("last_url_archived_at"))
     assert r[0] == {
         'id': '123',
         'author_id': 'morty@example.com',
         'frequency': 'hourly',
         'group_id': 'spaceship',
         'name': 'Test Sheet 1',
-        'stats': {},
     }
     assert r[1] == {
         'id': '456',
@@ -112,7 +110,6 @@ def test_get_user_sheets_endpoint(client_with_auth, db_session):
         'frequency': 'daily',
         'group_id': 'interdimensional',
         'name': 'Test Sheet 2',
-        'stats': {},
     }
 
 
diff --git a/src/tests/endpoints/test_url.py b/src/tests/endpoints/test_url.py
index ee777f5..af198e3 100644
--- a/src/tests/endpoints/test_url.py
+++ b/src/tests/endpoints/test_url.py
@@ -7,36 +7,67 @@ def test_archive_url_unauthenticated(client, test_no_auth):
     test_no_auth(client.post, "/url/archive")
 
 
+@patch("endpoints.url.UserState")
 @patch("worker.main.create_archive_task.delay", return_value=TaskResult(id="123-456-789", status="PENDING", result=""))
-def test_archive_url(m1, client_with_auth):
+def test_archive_url(m1, m2, client_with_auth):
+    m_user_state = MagicMock()
+    m2.return_value = m_user_state
+
     # url is too short
     response = client_with_auth.post("/url/archive", json={"url": "bad"})
     assert response.status_code == 422
     assert response.json()["detail"][0]["msg"] == 'String should have at least 5 characters'
     m1.assert_not_called()
 
+    # url is invalid
+    response = client_with_auth.post("/url/archive", json={"url": "example.com"})
+    assert response.status_code == 400
+    assert response.json()["detail"] == "Invalid URL received."
+
     # valid request
+    m_user_state.has_quota_max_monthly_urls.return_value = True
+    m_user_state.has_quota_max_monthly_mbs.return_value = True
     response = client_with_auth.post("/url/archive", json={"url": "https://example.com"})
     assert response.status_code == 201
     assert response.json() == {'id': '123-456-789'}
-
     m1.assert_called_once()
     called_val = m1.call_args.args[0]
-    assert json.loads(called_val) == {"id": None, "url": "https://example.com", "result": None, "public": True, "author_id": "rick@example.com", "group_id": None, "tags": [], "rearchive": True}
+    assert json.loads(called_val) == {"id": None, "url": "https://example.com", "result": None, "public": True, "author_id": "rick@example.com", "group_id": None, "tags": [], "rearchive": True, "sheet_id":None}
+    m_user_state.has_quota_max_monthly_urls.assert_called_once()
+    m_user_state.has_quota_max_monthly_mbs.assert_called_once()
 
     # user is not in group
+    m_user_state.in_group.return_value = False
     response = client_with_auth.post("/url/archive", json={"url": "https://example.com", "group_id": "new-group"})
     assert response.status_code == 403
     assert response.json()["detail"] == "User does not have access to this group."
+    m_user_state.in_group.assert_called_once_with("new-group")
 
     # user is in group
+    m_user_state.in_group.return_value = True
     response = client_with_auth.post("/url/archive", json={"url": "https://example.com", "group_id": "spaceship"})
     assert response.status_code == 201
     assert response.json() == {'id': '123-456-789'}
-
     assert m1.call_count == 2
     called_val = m1.call_args.args[0]
     assert json.loads(called_val)["group_id"] == "spaceship"
+    m_user_state.in_group.assert_called_with("spaceship")
+
+    # user is over monthly URL quota
+    m_user_state.has_quota_max_monthly_urls.return_value = False
+    m_user_state.has_quota_max_monthly_mbs.return_value = True
+    response = client_with_auth.post("/url/archive", json={"url": "https://example.com", "group_id": "spaceship"})
+    assert response.status_code == 429
+    assert response.json()["detail"] == "User has reached their monthly URL quota."
+    m_user_state.has_quota_max_monthly_urls.assert_called_with("spaceship")
+
+    # user is over monthly MB quota
+    m_user_state.has_quota_max_monthly_urls.return_value = True
+    m_user_state.has_quota_max_monthly_mbs.return_value = False
+    response = client_with_auth.post("/url/archive", json={"url": "https://example.com", "group_id": "spacesuit"})
+    assert response.status_code == 429
+    assert response.json()["detail"] == "User has reached their monthly MB quota."
+    m_user_state.has_quota_max_monthly_mbs.assert_called_with("spacesuit")
 
 @patch("endpoints.url.UserState")
 def test_archive_url_quotas(m1, client_with_auth):
diff --git a/src/worker/main.py b/src/worker/main.py
index ac83a54..2896a23 100644
--- a/src/worker/main.py
+++ b/src/worker/main.py
@@ -19,6 +19,7 @@ from core.logging import log_error
 
 settings = get_settings()
 
+
 celery = Celery(__name__)
 celery.conf.broker_url = settings.CELERY_BROKER_URL
 celery.conf.result_backend = settings.CELERY_RESULT_BACKEND
@@ -48,6 +49,7 @@ def create_archive_task(self, archive_json: str):
                 return Metadata.choose_most_complete([a.result for a in archives])
 
     orchestrator = choose_orchestrator(archive.group_id, archive.author_id)
+    logger.info(f"Using orchestrator {orchestrator=}")
     result = orchestrator.feed_item(Metadata().set_url(url))
 
     try:
@@ -59,7 +61,7 @@ def create_archive_task(self, archive_json: str):
         raise e
     return result.to_dict()
 
-
+#TODO: refactor how user-groups are loaded and orchestrators chosen
 @celery.task(name="create_sheet_task", bind=True, autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={'max_retries': 0})
 def create_sheet_task(self, sheet_json: str):
     sheet = schemas.SubmitSheet.model_validate_json(sheet_json)
@@ -79,7 +81,8 @@ def create_sheet_task(self, sheet_json: str):
             continue
         try:
             #TODO: remove public from sheet in new refactor
-            insert_result_into_db(result, sheet.tags, sheet.public, sheet.group_id, sheet.author_id, models.generate_uuid())
+            #TODO: update the sheets table with the current date if any new archive was done
+            insert_result_into_db(result, sheet.tags, sheet.public, sheet.group_id, sheet.author_id, models.generate_uuid(), sheet.sheet_id)
             stats["archived"] += 1
         except exc.IntegrityError as e:
             logger.warning(f"cached result detected: {e}")
@@ -89,6 +92,10 @@ def create_sheet_task(self, sheet_json: str):
             stats["failed"] += 1
             stats["errors"].append(str(e))
 
+    if stats["archived"] > 0:
+        with get_db() as session:
+            crud.update_sheet_last_url_archived_at(session, sheet.sheet_id)
+
     logger.info(f"SHEET DONE {sheet=}")
     return {"success": True, "sheet": sheet.sheet_name, "sheet_id": sheet.sheet_id, "time": datetime.datetime.now().isoformat(), **stats}
 
@@ -165,7 +172,7 @@ def is_group_invalid_for_user(public: bool, group_id: str, author_id: str):
     return False
 
 
-def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_id: str, author_id: str, task_id: str) -> str:
+def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_id: str, author_id: str, task_id: str, sheet_id:str="") -> str:
     logger.info(f"INSERTING {public=} {group_id=} {author_id=} {tags=} into {task_id}")
     assert result, f"UNABLE TO archive: {result.get_url() if result else result}"
     with get_db() as session:
@@ -175,7 +182,7 @@ def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_
         # create DB TAGs if needed
         db_tags = [crud.create_tag(session, tag) for tag in tags]
         # insert archive
-        db_task = crud.create_task(session, task=schemas.ArchiveCreate(id=task_id, url=result.get_url(), result=json.loads(result.to_json()), public=public, author_id=author_id, group_id=group_id), tags=db_tags, urls=get_all_urls(result))
+        db_task = crud.create_task(session, task=schemas.ArchiveCreate(id=task_id, url=result.get_url(), result=json.loads(result.to_json()), public=public, author_id=author_id, group_id=group_id, sheet_id=sheet_id), tags=db_tags, urls=get_all_urls(result))
         logger.debug(f"Added {db_task.id=} to database on {db_task.created_at} ({db_task.author_id})")
         return db_task.id