mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-07 19:18:34 +03:00
Add pre-commit with GiHub Action (#56)
* Update pyproject.toml * add pre-commit * Create .pre-commit-config.yaml * Comment out ruff * Update .pre-commit-config.yaml * General formatting * Create format-and-fail.yml * Update ci.yml * Add pre-commit to dev dependencies * Update pyproject.toml
This commit is contained in:
@@ -2,4 +2,4 @@ CHROME_APP_IDS='["1234567890"]'
|
||||
ALLOWED_ORIGINS='["allowed"]'
|
||||
BLOCKED_EMAILS='[]'
|
||||
DATABASE_PATH="sqlite:///./database/auto-archiver.db"
|
||||
API_BEARER_TOKEN=THIS_API_TOKEN_SHOULD_NEVER_BE_USED
|
||||
API_BEARER_TOKEN=THIS_API_TOKEN_SHOULD_NEVER_BE_USED
|
||||
|
||||
@@ -35,4 +35,4 @@ MAIL_SSL_TLS=True
|
||||
|
||||
|
||||
# celery workers config
|
||||
CONCURRENCY=2
|
||||
CONCURRENCY=2
|
||||
|
||||
@@ -5,4 +5,4 @@ BLOCKED_EMAILS='["blocked@example.com"]'
|
||||
|
||||
DATABASE_PATH="sqlite:///auto-archiver.test.db"
|
||||
API_BEARER_TOKEN=this_is_the_test_api_token
|
||||
USER_GROUPS_FILENAME=app/tests/user-groups.test.yaml
|
||||
USER_GROUPS_FILENAME=app/tests/user-groups.test.yaml
|
||||
|
||||
10
.github/workflows/ci.yml
vendored
10
.github/workflows/ci.yml
vendored
@@ -1,13 +1,9 @@
|
||||
name: CI
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
branches: [ main, dev ]
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
branches: [ main, dev ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -41,4 +37,4 @@ jobs:
|
||||
run: poetry run coverage run -m pytest -v -ra --color=yes app/tests/
|
||||
|
||||
- name: Report coverage
|
||||
run: poetry run coverage report
|
||||
run: poetry run coverage report
|
||||
|
||||
16
.github/workflows/format-and-fail.yml
vendored
Normal file
16
.github/workflows/format-and-fail.yml
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
name: Format and Fail
|
||||
on:
|
||||
push:
|
||||
branches: [ main, dev ]
|
||||
pull_request:
|
||||
branches: [ main, dev ]
|
||||
|
||||
jobs:
|
||||
pre-commit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- uses: pre-commit/action@v3.0.0
|
||||
79
.pre-commit-config.yaml
Normal file
79
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,79 @@
|
||||
repos:
|
||||
- repo: https://github.com/nbQA-dev/nbQA
|
||||
rev: 1.8.5
|
||||
hooks:
|
||||
- id: nbqa-ruff
|
||||
args:
|
||||
- --fix
|
||||
- --target-version=py311
|
||||
- --ignore=E721,E722
|
||||
- --line-length=80
|
||||
- id: nbqa-black
|
||||
args:
|
||||
- --line-length=80
|
||||
- id: nbqa-isort
|
||||
args:
|
||||
- --float-to-top
|
||||
- --profile=black
|
||||
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: check-docstring-first
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-json
|
||||
- id: check-case-conflict
|
||||
- id: check-toml
|
||||
- id: check-merge-conflict
|
||||
- id: check-xml
|
||||
- id: check-yaml
|
||||
exclude: app/tests/user-groups.test.broken.yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: check-symlinks
|
||||
- id: mixed-line-ending
|
||||
- id: sort-simple-yaml
|
||||
- id: fix-encoding-pragma
|
||||
args:
|
||||
- --remove
|
||||
- id: pretty-format-json
|
||||
args:
|
||||
- --autofix
|
||||
|
||||
- repo: https://github.com/pre-commit/pygrep-hooks
|
||||
rev: v1.10.0
|
||||
hooks:
|
||||
- id: python-check-blanket-noqa
|
||||
- id: python-check-mock-methods
|
||||
- id: python-no-eval
|
||||
- id: python-no-log-warn
|
||||
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.12.0
|
||||
hooks:
|
||||
- id: isort
|
||||
name: Run isort to sort imports
|
||||
files: \.py$
|
||||
# To keep consistent with the global isort skip config defined in setup.cfg
|
||||
exclude: ^build/.*$|^.tox/.*$|^venv/.*$
|
||||
args:
|
||||
- --lines-after-imports=2
|
||||
- --profile=black
|
||||
- --line-length=80
|
||||
|
||||
# - repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# rev: v0.4.10
|
||||
# hooks:
|
||||
# - id: ruff
|
||||
# types_or: [python,pyi]
|
||||
# args:
|
||||
# - --fix
|
||||
# - --target-version=py311
|
||||
# - --select=B,C,E,F,W,B9
|
||||
# - --line-length=80
|
||||
# - --ignore=E203,E402,E501,E261
|
||||
# - id: ruff-format
|
||||
# types_or: [ python,pyi]
|
||||
# args:
|
||||
# - --target-version=py311
|
||||
# - --line-length=80
|
||||
2
LICENSE
2
LICENSE
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
||||
4
Makefile
4
Makefile
@@ -1,3 +1,7 @@
|
||||
.PHONY: lint
|
||||
lint:
|
||||
poetry run pre-commit run --all-files
|
||||
|
||||
.PHONY: clean-dev
|
||||
clean-dev:
|
||||
@echo -n "Are you sure? [yes/N] (this will delete volumes) " && read ans && [ $${ans:-N} = yes ]
|
||||
|
||||
@@ -14,7 +14,7 @@ To properly set up the API you need to install `docker` and to have these files,
|
||||
2. go through the example file and configure it according to your needs.
|
||||
3. you will need to create and reference at least one `secrets/orchestration.yaml` file, you can do so by following the instructions in the [auto-archiver](https://github.com/bellingcat/auto-archiver#installation) that automatically generates one for you. If you use the archive sheets feature you will need to create a `orchestrationsheets-sheets.yaml` file as well that should have the `gsheet_feeder` and `gsheet_db` enabled and configured, the auto-archiver has [extensive documentation](https://auto-archiver.readthedocs.io/en/latest/) on how to set this up.
|
||||
|
||||
Do not commit those files, they are .gitignored by default.
|
||||
Do not commit those files, they are .gitignored by default.
|
||||
We also advise you to keep any sensitive files in the `secrets/` folder which is pinned and gitignored.
|
||||
|
||||
We have examples for both of those files (`.env.example` and `user-groups.example.yaml`), and here's how to set them up whether you're in development or production:
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from logging.config import fileConfig
|
||||
from sqlalchemy import engine_from_config
|
||||
from sqlalchemy import pool
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 1636724ec4b1
|
||||
Create Date: 2025-02-08 15:22:20.392522
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: a23aaf3ae930
|
||||
Create Date: 2025-02-05 19:19:01.984396
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 02b2f6d17ed0
|
||||
Create Date: 2025-02-11 21:53:23.293274
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: fa012ec405b8
|
||||
Create Date: 2024-11-04 11:12:30.237299
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
"""modify archive url to have uuid id instead of url unique constraint
|
||||
|
||||
Revision ID: 9369a264945b
|
||||
Revises:
|
||||
Revises:
|
||||
Create Date: 2023-12-20 17:24:59.320691
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '9369a264945b'
|
||||
down_revision = None
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 89121d2c96d8
|
||||
Create Date: 2025-02-04 12:19:20.753570
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 93a611e4c066
|
||||
Create Date: 2024-10-31 09:36:50.360710
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# TODO: code in this file should eventually be moved to the auto-archiver code base
|
||||
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core import Media, Metadata
|
||||
from loguru import logger
|
||||
|
||||
from app.shared.db import models
|
||||
|
||||
|
||||
def get_all_urls(result: Metadata) -> List[models.ArchiveUrl]:
|
||||
db_urls = []
|
||||
for m in result.media:
|
||||
@@ -29,4 +31,3 @@ def convert_if_media(media):
|
||||
except Exception as e:
|
||||
logger.debug(f"error parsing {media} : {e}")
|
||||
return False
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
|
||||
import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.db import worker_crud
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
from functools import lru_cache
|
||||
from sqlalchemy import Engine, create_engine, event, text
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, AsyncEngine, async_sessionmaker
|
||||
from functools import lru_cache
|
||||
|
||||
from sqlalchemy import Engine, create_engine, event, text
|
||||
from sqlalchemy.ext.asyncio import (
|
||||
AsyncEngine,
|
||||
AsyncSession,
|
||||
async_sessionmaker,
|
||||
create_async_engine,
|
||||
)
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
@@ -1,8 +1,17 @@
|
||||
from sqlalchemy import Column, String, JSON, DateTime, Boolean, Table, ForeignKey
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy.orm import relationship, declarative_base
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import (
|
||||
JSON,
|
||||
Boolean,
|
||||
Column,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
String,
|
||||
Table,
|
||||
)
|
||||
from sqlalchemy.orm import declarative_base, relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from sqlalchemy.orm import Session
|
||||
from datetime import datetime
|
||||
|
||||
from app.shared.db import models
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.db import models
|
||||
|
||||
|
||||
# TODO: isolate database operations away from worker and into WEB
|
||||
# ONLY WORKER
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import traceback
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
@@ -6,7 +7,7 @@ from loguru import logger
|
||||
logger.add("logs/api_logs.log", retention="30 days")
|
||||
logger.add("logs/error_logs.log", retention="30 days", level="ERROR")
|
||||
|
||||
|
||||
|
||||
def log_error(e: Exception, traceback_str: str = None, extra:str = ""):
|
||||
if not traceback_str: traceback_str = traceback.format_exc()
|
||||
if extra: extra = f"{extra}\n"
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
from annotated_types import Len
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class SubmitSheet(BaseModel):
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
|
||||
from functools import lru_cache
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from typing import Annotated, Set
|
||||
|
||||
from annotated_types import Len
|
||||
from fastapi_mail import ConnectionConfig
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from typing import Annotated, Set
|
||||
from annotated_types import Len
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
|
||||
|
||||
model_config = SettingsConfigDict(env_file=os.environ.get("ENVIRONMENT_FILE") , env_file_encoding='utf-8', extra='ignore', str_strip_whitespace=True)
|
||||
|
||||
# general
|
||||
@@ -37,14 +38,14 @@ class Settings(BaseSettings):
|
||||
if self.REDIS_PASSWORD:
|
||||
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
|
||||
return f"redis://{self.REDIS_HOSTNAME}:6379"
|
||||
|
||||
|
||||
# cronjobs
|
||||
CRON_ARCHIVE_SHEETS: bool = False
|
||||
CRON_DELETE_STALE_SHEETS: bool = False
|
||||
DELETE_STALE_SHEETS_DAYS: int = 14
|
||||
CRON_DELETE_SCHEDULED_ARCHIVES: bool = False
|
||||
DELETE_SCHEDULED_ARCHIVES_CHECK_EVERY_N_DAYS: int = 7
|
||||
|
||||
|
||||
# observability
|
||||
REPEAT_COUNT_METRICS_SECONDS: int = 30
|
||||
|
||||
@@ -73,4 +74,4 @@ class Settings(BaseSettings):
|
||||
|
||||
@lru_cache
|
||||
def get_settings():
|
||||
return Settings()
|
||||
return Settings()
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
|
||||
from functools import lru_cache
|
||||
from celery import Celery
|
||||
import redis
|
||||
|
||||
from celery import Celery
|
||||
|
||||
import redis
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List, Set
|
||||
|
||||
import yaml
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, computed_field, field_validator, Field, model_validator
|
||||
from typing import Dict, List, Set
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
Field,
|
||||
computed_field,
|
||||
field_validator,
|
||||
model_validator,
|
||||
)
|
||||
from typing_extensions import Self
|
||||
|
||||
|
||||
|
||||
@@ -7,4 +7,4 @@ def fnv1a_hash_mod(s: str, modulo:int) -> int:
|
||||
hash ^= ord(char)
|
||||
hash *= fnv_prime
|
||||
hash &= 0xFFFFFFFF # Keep it 32-bit
|
||||
return (hash if hash < 0x80000000 else hash - 0x100000000) % modulo
|
||||
return (hash if hash < 0x80000000 else hash - 0x100000000) % modulo
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import os
|
||||
from typing import AsyncGenerator
|
||||
from fastapi.testclient import TestClient
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, AsyncEngine
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession
|
||||
|
||||
from app.shared.settings import Settings
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db.user_state import UserState
|
||||
|
||||
|
||||
@@ -65,10 +67,11 @@ def db_session(test_db):
|
||||
|
||||
@pytest_asyncio.fixture()
|
||||
async def async_test_db(get_settings: Settings):
|
||||
import asyncio
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import make_async_engine
|
||||
from app.web.db.crud import get_user_group_names
|
||||
import asyncio
|
||||
|
||||
get_user_group_names.cache_clear()
|
||||
engine = await make_async_engine(get_settings.ASYNC_DATABASE_PATH)
|
||||
@@ -108,8 +111,8 @@ async def async_db_session(async_test_db: AsyncEngine) -> AsyncGenerator[AsyncSe
|
||||
|
||||
@pytest.fixture()
|
||||
def app(db_session):
|
||||
from app.web.main import app_factory
|
||||
from app.web.db import crud
|
||||
from app.web.main import app_factory
|
||||
app = app_factory()
|
||||
crud.upsert_user_groups(db_session)
|
||||
return app
|
||||
@@ -123,7 +126,11 @@ def client(app):
|
||||
|
||||
@pytest.fixture()
|
||||
def app_with_auth(app, db_session):
|
||||
from app.web.security import get_token_or_user_auth, get_user_auth, get_user_state
|
||||
from app.web.security import (
|
||||
get_token_or_user_auth,
|
||||
get_user_auth,
|
||||
get_user_state,
|
||||
)
|
||||
app.dependency_overrides[get_token_or_user_auth] = lambda: "rick@example.com"
|
||||
app.dependency_overrides[get_user_auth] = lambda: "morty@example.com"
|
||||
app.dependency_overrides[get_user_state] = lambda: UserState(db_session, "MORTY@example.com")
|
||||
@@ -138,7 +145,7 @@ def client_with_auth(app_with_auth):
|
||||
|
||||
@pytest.fixture()
|
||||
def app_with_token(app):
|
||||
from app.web.security import token_api_key_auth, get_token_or_user_auth
|
||||
from app.web.security import get_token_or_user_auth, token_api_key_auth
|
||||
app.dependency_overrides[token_api_key_auth] = lambda: ALLOW_ANY_EMAIL
|
||||
app.dependency_overrides[get_token_or_user_auth] = lambda: ALLOW_ANY_EMAIL
|
||||
return app
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
|
||||
}
|
||||
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ configurations:
|
||||
gsheet_feeder:
|
||||
service_account: "app/tests/fake_service_account.json"
|
||||
cli_feeder:
|
||||
urls:
|
||||
urls:
|
||||
- "url1"
|
||||
hash_enricher:
|
||||
algorithm: "SHA-256"
|
||||
|
||||
@@ -3,4 +3,4 @@ def test_generate_uuid():
|
||||
|
||||
assert generate_uuid() != generate_uuid()
|
||||
assert len(generate_uuid()) == 36
|
||||
assert generate_uuid().count("-") == 4
|
||||
assert generate_uuid().count("-") == 4
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
from app.shared.db import models
|
||||
from app.shared.db import worker_crud, models
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.tests.web.db.test_crud import test_data
|
||||
|
||||
|
||||
def test_update_sheet_last_url_archived_at(db_session):
|
||||
|
||||
# Create test sheet
|
||||
@@ -19,7 +18,7 @@ def test_update_sheet_last_url_archived_at(db_session):
|
||||
db_session.refresh(test_sheet)
|
||||
assert isinstance(test_sheet.last_url_archived_at, datetime)
|
||||
assert test_sheet.last_url_archived_at > before
|
||||
|
||||
|
||||
# Test non-existent sheet
|
||||
assert worker_crud.update_sheet_last_url_archived_at(db_session, "non-existent-sheet") is False
|
||||
|
||||
@@ -73,8 +72,8 @@ def test_create_tag(db_session):
|
||||
|
||||
|
||||
def test_create_task(db_session):
|
||||
from app.shared.db import worker_crud
|
||||
from app.shared import schemas
|
||||
from app.shared.db import worker_crud
|
||||
|
||||
task = schemas.ArchiveCreate(
|
||||
id="archive-id-456-101",
|
||||
@@ -114,4 +113,4 @@ def test_create_task(db_session):
|
||||
assert nt.group_id == "spaceship"
|
||||
assert len(nt.tags) == 0
|
||||
assert len(nt.urls) == 0
|
||||
assert nt.created_at is not None
|
||||
assert nt.created_at is not None
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from app.shared.business_logic import get_store_archive_until, get_store_archive_until_or_never
|
||||
|
||||
from app.shared.business_logic import (
|
||||
get_store_archive_until,
|
||||
get_store_archive_until_or_never,
|
||||
)
|
||||
|
||||
|
||||
class Test_get_store_archive_until:
|
||||
|
||||
@@ -11,7 +11,7 @@ def test_fnv1a_hash_mod():
|
||||
|
||||
# Test different modulos
|
||||
hash1 = fnv1a_hash_mod("test", 5)
|
||||
hash2 = fnv1a_hash_mod("test", 10)
|
||||
hash2 = fnv1a_hash_mod("test", 10)
|
||||
assert 0 <= hash1 < 5
|
||||
assert 0 <= hash2 < 10
|
||||
|
||||
@@ -28,4 +28,4 @@ def test_fnv1a_hash_mod():
|
||||
assert 0 <= fnv1a_hash_mod("测试", 10) < 10
|
||||
|
||||
# Test modulo = 1 edge case
|
||||
assert fnv1a_hash_mod("test", 1) == 0
|
||||
assert fnv1a_hash_mod("test", 1) == 0
|
||||
|
||||
@@ -3,4 +3,4 @@ This is just an invalid yaml for testing
|
||||
|
||||
still broken: True
|
||||
- one
|
||||
- two
|
||||
- two
|
||||
|
||||
@@ -84,4 +84,4 @@ groups:
|
||||
# max_archive_lifespan_months: 12
|
||||
max_monthly_urls: 1
|
||||
# max_monthly_mbs: 50
|
||||
priority: "low"
|
||||
priority: "low"
|
||||
|
||||
@@ -3,10 +3,12 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.settings import Settings
|
||||
|
||||
from app.web.db import crud
|
||||
|
||||
|
||||
authors = ["rick@example.com", "morty@example.com", "jerry@example.com"]
|
||||
|
||||
|
||||
@@ -373,6 +375,7 @@ async def test_get_sheets_by_id_hash(async_db_session):
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_stale_sheets(async_db_session):
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from sqlalchemy.sql import select
|
||||
|
||||
now = datetime.now()
|
||||
@@ -435,4 +438,4 @@ async def test_delete_stale_sheets(async_db_session):
|
||||
|
||||
# Running again should not delete anything
|
||||
deleted = await crud.delete_stale_sheets(async_db_session, 7)
|
||||
assert len(deleted) == 0
|
||||
assert len(deleted) == 0
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
from unittest.mock import MagicMock, PropertyMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.shared.db import models
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from unittest.mock import MagicMock
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.shared.schemas import Usage, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo
|
||||
from app.web.config import VERSION
|
||||
from app.tests.web.db.test_crud import test_data
|
||||
from app.web.config import VERSION
|
||||
|
||||
|
||||
def test_endpoint_home(client_with_auth):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from app.shared.db import models
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
@@ -45,8 +45,8 @@ def test_create_sheet_endpoint(app_with_auth, db_session):
|
||||
assert response.json() == {"detail": "User does not have access to this group."}
|
||||
|
||||
# switch to jerry who's got less quota/permissions
|
||||
from app.web.security import get_user_state
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
app_with_auth.dependency_overrides[get_user_state] = lambda: UserState(db_session, "jerry@example.com")
|
||||
client_jerry = TestClient(app_with_auth)
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import shutil
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
|
||||
def test_lifespan(app):
|
||||
with TestClient(app) as client:
|
||||
@@ -25,7 +25,7 @@ def test_logging_middleware(m1, client_with_auth):
|
||||
client_with_auth.delete("/url/123")
|
||||
# creates one empty and one from above
|
||||
assert len(EXCEPTION_COUNTER.collect()[0].samples) == 2
|
||||
|
||||
|
||||
|
||||
def test_serve_local_archive_logic(get_settings):
|
||||
# create a test file first
|
||||
@@ -38,7 +38,7 @@ def test_serve_local_archive_logic(get_settings):
|
||||
get_settings.SERVE_LOCAL_ARCHIVE = "/app/local_archive_test"
|
||||
from app.web.main import app_factory
|
||||
app = app_factory(get_settings)
|
||||
|
||||
|
||||
# test
|
||||
client = TestClient(app)
|
||||
r = client.get("/app/local_archive_test/temp.txt")
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
from fastapi.security import HTTPAuthorizationCredentials
|
||||
import pytest
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
|
||||
@@ -108,8 +108,8 @@ async def test_authenticate_user_exception():
|
||||
|
||||
|
||||
def test_get_user_state():
|
||||
from app.web.security import get_user_state
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
mock_session = Mock()
|
||||
test_email = "test@example.com"
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
from datetime import datetime
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared import schemas
|
||||
from auto_archiver.core import Media, Metadata
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.db import models
|
||||
|
||||
|
||||
class Test_create_archive_task():
|
||||
URL = "https://example-live.com"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from app.web.main import app_factory
|
||||
|
||||
app = app_factory
|
||||
|
||||
app = app_factory
|
||||
|
||||
@@ -5,7 +5,7 @@ API_DESCRIPTION = """
|
||||
|
||||
**Usage notes:**
|
||||
- The API requires a Bearer token for most operations, which you can obtain by logging in with your Google account.
|
||||
- You can use this API to archive single URLs or entire Google Sheets.
|
||||
- You can use this API to archive single URLs or entire Google Sheets.
|
||||
- Once you submit a URL or Sheet for archiving, the API will return a task_id that you can use to check the status of the archiving process. It works asynchronously.
|
||||
"""
|
||||
BREAKING_CHANGES = {"minVersion": "0.4.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."}
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
from collections import defaultdict
|
||||
from functools import lru_cache
|
||||
from sqlalchemy.orm import Session, load_only
|
||||
from sqlalchemy import Column, or_, func, select
|
||||
from loguru import logger
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from functools import lru_cache
|
||||
|
||||
from cachetools import LRUCache, cached
|
||||
from cachetools.keys import hashkey
|
||||
from loguru import logger
|
||||
from sqlalchemy import Column, func, or_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import Session, load_only
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared.db import models
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.user_groups import UserGroups
|
||||
from app.shared.utils.misc import fnv1a_hash_mod
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
@@ -117,7 +118,7 @@ async def get_group_priority_async(db: AsyncSession, group_id: str) -> dict:
|
||||
@cached(cache=LRUCache(maxsize=128), key=lambda db, email: hashkey(email))
|
||||
def get_user_group_names(db: Session, email: str) -> list[str]:
|
||||
"""
|
||||
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
|
||||
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
|
||||
"""
|
||||
# TODO: the read: [group1, group2] permissions don't currently work
|
||||
if not email or not len(email) or "@" not in email: return []
|
||||
@@ -173,7 +174,7 @@ def upsert_user_groups(db: Session):
|
||||
def display_email_pii(email: str):
|
||||
return f"'{email[0:3]}...@{email.split('@')[1]}'"
|
||||
"""
|
||||
reads the user_groups yaml file and inserts any new users, groups,
|
||||
reads the user_groups yaml file and inserts any new users, groups,
|
||||
along with new participation of users in groups
|
||||
"""
|
||||
filename = get_settings().USER_GROUPS_FILENAME
|
||||
@@ -192,6 +193,7 @@ def upsert_user_groups(db: Session):
|
||||
for group in explicit_groups:
|
||||
group_domains[group].add(domain)
|
||||
import json
|
||||
|
||||
# upsert groups and save a map of groupid -> dbobject
|
||||
for group_id, g in ug.groups.items():
|
||||
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, g.service_account_email, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
|
||||
from typing import Dict, Set
|
||||
import sqlalchemy
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
from datetime import datetime
|
||||
from typing import Dict, Set
|
||||
|
||||
import sqlalchemy
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.user_groups import GroupInfo, GroupPermissions
|
||||
from app.shared.schemas import Usage, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo, GroupPermissions
|
||||
from app.web.db import crud
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from app.web.config import VERSION, BREAKING_CHANGES
|
||||
from app.shared.schemas import ActiveUser, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo
|
||||
from app.web.config import BREAKING_CHANGES, VERSION
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
from app.shared.user_groups import GroupInfo
|
||||
|
||||
|
||||
default_router = APIRouter()
|
||||
|
||||
@@ -42,7 +44,7 @@ def get_user_usage(
|
||||
if not user.active:
|
||||
raise HTTPException(status_code=403, detail="User is not active.")
|
||||
return user.usage()
|
||||
|
||||
|
||||
|
||||
|
||||
@default_router.get('/favicon.ico', include_in_schema=False)
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
import json
|
||||
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared import business_logic, schemas
|
||||
from app.shared.db import worker_crud
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.web.security import token_api_key_auth
|
||||
from app.shared.db import models
|
||||
from app.shared.log import log_error
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.security import token_api_key_auth
|
||||
|
||||
|
||||
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from sqlalchemy import exc
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.db.user_state import UserState
|
||||
from app.shared import schemas
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_user_state
|
||||
from app.web.db import crud
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
|
||||
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
|
||||
|
||||
@@ -78,4 +78,4 @@ def archive_user_sheet(
|
||||
group_queue = user.priority_group(sheet.group_id)
|
||||
task = celery.signature("create_sheet_task", args=[schemas.SubmitSheet(sheet_id=id, author_id=user.email, group_id=sheet.group_id).model_dump_json()]).apply_async(**group_queue)
|
||||
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
|
||||
@@ -3,10 +3,10 @@ from fastapi import APIRouter, Depends
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth
|
||||
from app.shared import schemas
|
||||
from app.shared.log import log_error
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth
|
||||
from app.web.utils.misc import custom_jsonable_encoder
|
||||
|
||||
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared import schemas
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth, get_user_state
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.shared.db.database import get_db_dependency
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from app.web.security import get_token_or_user_auth, get_user_state
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
|
||||
|
||||
celery = get_celery()
|
||||
@@ -47,7 +47,7 @@ def archive_url(
|
||||
else:
|
||||
archive_create.author_id = archive.author_id or email
|
||||
group_queue = convert_priority_to_queue_dict("high")
|
||||
|
||||
|
||||
|
||||
task = celery.signature("create_archive_task", args=[archive_create.model_dump_json()]).apply_async(**group_queue)
|
||||
task_response = schemas.Task(id=task.id)
|
||||
@@ -74,8 +74,8 @@ def search_by_url(
|
||||
|
||||
@url_router.delete("/{id}", summary="Delete a single URL archive by id.")
|
||||
def delete_archive(
|
||||
id:str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
id:str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
) -> schemas.DeleteResponse:
|
||||
logger.info(f"deleting url archive task {id} request by {user.email}")
|
||||
|
||||
@@ -1,22 +1,32 @@
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
import datetime
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import alembic.config
|
||||
from fastapi import FastAPI
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||
from fastapi_utils.tasks import repeat_every
|
||||
from loguru import logger
|
||||
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import get_db, get_db_async, make_engine, wal_checkpoint
|
||||
from app.shared import schemas
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import (
|
||||
get_db,
|
||||
get_db_async,
|
||||
make_engine,
|
||||
wal_checkpoint,
|
||||
)
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.middleware import increase_exceptions_counter
|
||||
from app.web.utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
|
||||
from app.web.utils.metrics import (
|
||||
measure_regular_metrics,
|
||||
redis_subscribe_worker_exceptions,
|
||||
)
|
||||
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@@ -183,4 +193,4 @@ async def delete_stale_sheets():
|
||||
async def generate_users_export_csv():
|
||||
#TODO: implement a cronjob that regularly requested user data to a CSV file
|
||||
# see https://colab.research.google.com/drive/1QDbo3QXHPBdiTuANlA1AWVvN-rqxuCPa?authuser=0#scrollTo=4nPXeSdK8RBT
|
||||
pass
|
||||
pass
|
||||
|
||||
@@ -1,24 +1,23 @@
|
||||
import os
|
||||
from fastapi import FastAPI, Depends
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from fastapi import Depends, FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from loguru import logger
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
|
||||
from app.web.middleware import logging_middleware
|
||||
from app.shared.task_messaging import get_celery
|
||||
|
||||
from app.web.security import token_api_key_auth
|
||||
from app.web.config import VERSION, API_DESCRIPTION
|
||||
from app.web.events import lifespan
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.config import API_DESCRIPTION, VERSION
|
||||
from app.web.endpoints.default import default_router
|
||||
from app.web.endpoints.url import url_router
|
||||
from app.web.endpoints.interoperability import interoperability_router
|
||||
from app.web.endpoints.sheet import sheet_router
|
||||
from app.web.endpoints.task import task_router
|
||||
from app.web.endpoints.interoperability import interoperability_router
|
||||
from app.web.endpoints.url import url_router
|
||||
from app.web.events import lifespan
|
||||
from app.web.middleware import logging_middleware
|
||||
from app.web.security import token_api_key_auth
|
||||
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@@ -57,4 +56,4 @@ def app_factory(settings = get_settings()):
|
||||
logger.warning(f"MOUNTing local archive, use this in development only {settings.SERVE_LOCAL_ARCHIVE}")
|
||||
app.mount(settings.SERVE_LOCAL_ARCHIVE, StaticFiles(directory=local_dir), name=settings.SERVE_LOCAL_ARCHIVE)
|
||||
|
||||
return app
|
||||
return app
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
|
||||
import traceback
|
||||
from loguru import logger
|
||||
|
||||
from fastapi import Request
|
||||
from loguru import logger
|
||||
|
||||
from app.shared.log import log_error
|
||||
from app.web.utils.metrics import EXCEPTION_COUNTER
|
||||
|
||||
@@ -25,7 +27,7 @@ async def increase_exceptions_counter(e: Exception, location:str="cronjob"):
|
||||
last_trace = traceback.extract_tb(e.__traceback__)[-1]
|
||||
_file, _line, func_name, _text = last_trace
|
||||
location = func_name
|
||||
except Exception as e:
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to get function name from cronjob exception traceback: {e}")
|
||||
EXCEPTION_COUNTER.labels(type=e.__class__.__name__, location=location).inc()
|
||||
log_error(e)
|
||||
log_error(e)
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import secrets
|
||||
|
||||
import requests
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from loguru import logger
|
||||
import requests, secrets
|
||||
from fastapi import HTTPException, status, Depends
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.settings import get_settings
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db.user_state import UserState
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
bearer_security = HTTPBearer()
|
||||
|
||||
@@ -80,4 +83,4 @@ def authenticate_user(access_token):
|
||||
|
||||
|
||||
def get_user_state(email:str=Depends(get_user_auth), db:Session=Depends(get_db_dependency)):
|
||||
return UserState(db, email)
|
||||
return UserState(db, email)
|
||||
|
||||
@@ -2,12 +2,13 @@ import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
from app.web.db import crud
|
||||
from app.shared.db.database import get_db
|
||||
from app.shared.log import log_error
|
||||
from app.shared.task_messaging import get_redis
|
||||
from app.web.db import crud
|
||||
|
||||
|
||||
# Custom metrics
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
import datetime
|
||||
import json
|
||||
import traceback
|
||||
|
||||
import traceback, datetime
|
||||
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
||||
from celery.signals import task_failure
|
||||
from loguru import logger
|
||||
from sqlalchemy import exc
|
||||
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import get_db
|
||||
from app.shared import business_logic, schemas
|
||||
from app.shared.task_messaging import get_celery, get_redis
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.log import log_error
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.shared.db import worker_crud
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.shared.db.database import get_db
|
||||
from app.shared.log import log_error
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.task_messaging import get_celery, get_redis
|
||||
from app.worker.worker_log import setup_celery_logger
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
celery = get_celery("worker")
|
||||
@@ -26,7 +27,7 @@ USER_GROUPS_FILENAME = settings.USER_GROUPS_FILENAME
|
||||
setup_celery_logger(celery)
|
||||
|
||||
# TODO: these are temporary PATCHES for new aa's functionality
|
||||
# logger.add("app/worker/worker_log.log", level="DEBUG")
|
||||
# logger.add("app/worker/worker_log.log", level="DEBUG")
|
||||
logger.remove = lambda x: print(f"logger.remove({x})")
|
||||
|
||||
# TODO: after release, as it requires updating past entries with sheet_id where tag is used, drop tags
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from loguru import logger
|
||||
from celery import Celery
|
||||
import sys
|
||||
|
||||
from celery import Celery
|
||||
from loguru import logger
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
|
||||
|
||||
celery = get_celery("worker")
|
||||
|
||||
def setup_celery_logger(celery):
|
||||
@@ -22,7 +24,7 @@ def setup_celery_logger(celery):
|
||||
if message.strip():
|
||||
logger.info(message.strip())
|
||||
# Required to prevent issues with buffered output
|
||||
def flush(self): pass
|
||||
def flush(self): pass
|
||||
def isatty(self): return False
|
||||
|
||||
sys.stdout = InterceptHandler()
|
||||
|
||||
@@ -12,7 +12,7 @@ services:
|
||||
- ALLOWED_ORIGINS=["http://localhost:8000","http://localhost:8004","http://localhost:8081","chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp"]
|
||||
- USER_GROUPS_FILENAME=/aa-api/app/user-groups.dev.yaml
|
||||
- DATABASE_PATH=sqlite:////aa-api/database/auto-archiver.db
|
||||
|
||||
|
||||
|
||||
worker:
|
||||
# command: watchmedo auto-restart --patterns="*.py" --recursive --ignore-directories -- celery -- --app=app.worker.main.celery worker -Q high_priority,low_priority --concurrency=${CONCURRENCY} --max-tasks-per-child=100
|
||||
|
||||
@@ -5,7 +5,7 @@ volumes:
|
||||
name: "auto-archiver-api"
|
||||
services:
|
||||
web:
|
||||
build:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: web.Dockerfile
|
||||
restart: always
|
||||
@@ -29,7 +29,7 @@ services:
|
||||
retries: 3
|
||||
|
||||
worker:
|
||||
build:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: worker.Dockerfile
|
||||
restart: always
|
||||
@@ -68,4 +68,4 @@ services:
|
||||
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
retries: 3
|
||||
|
||||
129
poetry.lock
generated
129
poetry.lock
generated
@@ -616,6 +616,18 @@ files = [
|
||||
[package.dependencies]
|
||||
pycparser = "*"
|
||||
|
||||
[[package]]
|
||||
name = "cfgv"
|
||||
version = "3.4.0"
|
||||
description = "Validate configuration and produce human readable error messages."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
|
||||
{file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "3.4.1"
|
||||
@@ -959,6 +971,18 @@ calendars = ["convertdate (>=2.2.1)", "hijridate"]
|
||||
fasttext = ["fasttext (>=0.9.1)", "numpy (>=1.19.3,<2)"]
|
||||
langdetect = ["langdetect (>=1.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "distlib"
|
||||
version = "0.3.9"
|
||||
description = "Distribution utilities"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"},
|
||||
{file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dnspython"
|
||||
version = "2.7.0"
|
||||
@@ -1097,6 +1121,23 @@ future = "*"
|
||||
[package.extras]
|
||||
dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4.6.1)", "pytest-mock (==1.10.4)", "tox (==3.12.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "filelock"
|
||||
version = "3.17.0"
|
||||
description = "A platform independent file lock."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338"},
|
||||
{file = "filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"]
|
||||
testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"]
|
||||
typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""]
|
||||
|
||||
[[package]]
|
||||
name = "future"
|
||||
version = "1.0.0"
|
||||
@@ -1409,6 +1450,21 @@ http2 = ["h2 (>=3,<5)"]
|
||||
socks = ["socksio (==1.*)"]
|
||||
zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "identify"
|
||||
version = "2.6.8"
|
||||
description = "File identification library for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "identify-2.6.8-py2.py3-none-any.whl", hash = "sha256:83657f0f766a3c8d0eaea16d4ef42494b39b34629a4b3192a9d020d349b3e255"},
|
||||
{file = "identify-2.6.8.tar.gz", hash = "sha256:61491417ea2c0c5c670484fd8abbb34de34cdae1e5f39a73ee65e48e4bb663fc"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
license = ["ukkonen"]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.10"
|
||||
@@ -1724,6 +1780,18 @@ files = [
|
||||
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodeenv"
|
||||
version = "1.9.1"
|
||||
description = "Node.js virtual environment builder"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
|
||||
{file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "2.1.3"
|
||||
@@ -1981,6 +2049,23 @@ tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "ole
|
||||
typing = ["typing-extensions ; python_version < \"3.10\""]
|
||||
xmp = ["defusedxml"]
|
||||
|
||||
[[package]]
|
||||
name = "platformdirs"
|
||||
version = "4.3.6"
|
||||
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
|
||||
{file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
|
||||
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
|
||||
type = ["mypy (>=1.11.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.5.0"
|
||||
@@ -1997,6 +2082,25 @@ files = [
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "pre-commit"
|
||||
version = "4.1.0"
|
||||
description = "A framework for managing and maintaining multi-language pre-commit hooks."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "pre_commit-4.1.0-py2.py3-none-any.whl", hash = "sha256:d29e7cb346295bcc1cc75fc3e92e343495e3ea0196c9ec6ba53f49f10ab6ae7b"},
|
||||
{file = "pre_commit-4.1.0.tar.gz", hash = "sha256:ae3f018575a588e30dfddfab9a05448bfbd6b73d78709617b5a2b853549716d4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cfgv = ">=2.0.0"
|
||||
identify = ">=1.0.0"
|
||||
nodeenv = ">=0.11.1"
|
||||
pyyaml = ">=5.1"
|
||||
virtualenv = ">=20.10.0"
|
||||
|
||||
[[package]]
|
||||
name = "prometheus-client"
|
||||
version = "0.21.1"
|
||||
@@ -2557,7 +2661,7 @@ version = "6.0.2"
|
||||
description = "YAML parser and emitter for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["web"]
|
||||
groups = ["dev", "web"]
|
||||
files = [
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
|
||||
@@ -3415,6 +3519,27 @@ files = [
|
||||
{file = "vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "virtualenv"
|
||||
version = "20.29.2"
|
||||
description = "Virtual Python Environment builder"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a"},
|
||||
{file = "virtualenv-20.29.2.tar.gz", hash = "sha256:fdaabebf6d03b5ba83ae0a02cfe96f48a716f4fae556461d180825866f75b728"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
distlib = ">=0.3.7,<1"
|
||||
filelock = ">=3.12.2,<4"
|
||||
platformdirs = ">=3.9.1,<5"
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
|
||||
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]
|
||||
|
||||
[[package]]
|
||||
name = "vk-api"
|
||||
version = "11.9.9"
|
||||
@@ -3688,4 +3813,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "11d734f2ee32206214a7ecb8dc3ec8d19a7b6281ee98b509a5bb8bdb647c674a"
|
||||
content-hash = "c4a5c50ac109c9912992ca86d2b5ec712c6bcfc84838bf42f90208b02cc27b3c"
|
||||
|
||||
@@ -52,4 +52,4 @@ pytest = ">=8.3.4,<9.0.0"
|
||||
httpx = ">=0.28.1,<0.29.0"
|
||||
coverage = ">=7.6.11,<8.0.0"
|
||||
pytest-asyncio = ">=0.25.3,<0.26.0"
|
||||
|
||||
pre-commit = "^4.1.0"
|
||||
|
||||
@@ -59,4 +59,3 @@ groups:
|
||||
permissions:
|
||||
read: ["default"]
|
||||
read_public: true
|
||||
|
||||
@@ -30,4 +30,4 @@ COPY alembic.ini ./
|
||||
COPY ./app/ ./app/
|
||||
COPY user-groups.* ./app/
|
||||
|
||||
ENTRYPOINT ["./poetry-venv/bin/poetry", "run"]
|
||||
ENTRYPOINT ["./poetry-venv/bin/poetry", "run"]
|
||||
|
||||
Reference in New Issue
Block a user