diff --git a/.gitignore b/.gitignore index e937aa2..7e92e36 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +user-groups.dev.yaml +user-groups.yaml orchestration.yaml my-archives *.pyc @@ -23,4 +25,5 @@ local_archive local_archive_test *db-wal *db-shm -copy-files.sh \ No newline at end of file +copy-files.sh +temp/ \ No newline at end of file diff --git a/README.md b/README.md index 18c553f..b292107 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,43 @@ [![CI](https://github.com/bellingcat/auto-archiver-api/workflows/CI/badge.svg)](https://github.com/bellingcat/auto-archiver-api/actions/workflows/ci.yaml) -An api that uses celery workers to process URL archive requests via [bellingcat/auto-archiver](https://github.com/bellingcat/auto-archiver), it allows authentication via Google OAuth Apps and enables CORS, everything runs on docker but development can be done without docker (except for redis). +A web API that uses celery workers to process URL archive requests via [bellingcat/auto-archiver](https://github.com/bellingcat/auto-archiver), it allows authentication via Google OAuth Apps and enables CORS, everything runs on docker but development can be done without docker (except for redis). +### setup +To properly set up the API you need to install `docker` and to edit 2 files: +1. a `.env` to configure the API, stays at the root level +2. a `user-groups.yaml` to manage user permissions +Do not commit those files, they are .gitignored by default. + +We have examples for both of those, and here's how to set them up whether you're in development or production: + +#### setup for DEVELOPMENT +```bash +# copy and modify the .env.dev file according to your needs +cp .env.example .env.dev +# copy the user-groups.example.yaml and modify it accordingly +cp user-groups.example.yaml user-groups.dev.yaml +# run the APP, make sure VPNs are off +make dev +# check it's running by calling the health endpoint +curl 'http://localhost:8004/health' +# > {"status":"ok"} +``` +now go to http://localhost:8004/docs#/ and you should see the API documentation + +#### setup for PRODUCTION +```bash +# copy and modify the .env.prod file according to your needs +cp .env.example .env.prod +# copy the user-groups.example.yaml and modify it accordingly +cp user-groups.example.yaml user-groups.yaml +# deploy the app +make prod +# check it's running by calling the health endpoint +curl 'http://localhost:8004/health' +# > {"status":"ok"} +``` +now go to http://localhost:8004/docs#/ and you should see the API documentation ## User, Domains, Groups, and permissions management there are 2 ways to access the API @@ -97,6 +132,16 @@ orchestrators: ## Database migrations check https://alembic.sqlalchemy.org/en/latest/tutorial.html#the-migration-environment +```bash +# set the env variables +export ENVIRONMENT_FILE=.env.alembic +# create a new migration with description in app/migrations +poetry run alembic revision -m "create account table" +# perform all migrations +poetry run alembic upgrade head +# downgrade by one migration +poetry run alembic downgrade -1 +``` * create migrations with `alembic revision -m "create account table"` * if running in the normal pipenv environment use `PIPENV_DOTENV_LOCATION=.env.alembic pipenv run` followed by: @@ -127,16 +172,12 @@ curl -XPOST -H "Authorization: Bearer GOOGLE_OAUTH_TOKEN" -H "Content-type: appl ### Testing ```bash -# can be done from top level but let's do it from the src folder for consistency with CI etc -cd src +# set the testing environment variables +export ENVIRONMENT_FILE=.env.test # run tests and generate coverage -PYTHONPATH=. PIPENV_DOTENV_LOCATION=.env.test pipenv run coverage run -m pytest -vv --disable-warnings --color=yes tests/ && pipenv run coverage html - +poetry run coverage run -m pytest -vv --disable-warnings --color=yes app/tests/ # get coverage report in command line -pipenv run coverage report - -# get coverage HTML -pipenv run coverage html - -# > open/run server on htmlcov/index.html to navigate through line coverage +poetry run coverage report +# get coverage report in HTML format +poetry run coverage html ``` diff --git a/app/example.user-groups.yaml b/app/example.user-groups.yaml deleted file mode 100644 index 707e39f..0000000 --- a/app/example.user-groups.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# email-level group access -users: - email1@example.com: - - group1 - - group2 - email2@example.com: - - group2 - email3@example-no-group.com: - -# domain-level group access (taken from the emails) -domains: - example.com: - - group3 - -orchestrators: - group1: secrets/orchestration-group1.yaml - group2: secrets/orchestration-group2.yaml - default: secrets/orchestration-default.yaml \ No newline at end of file diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 82e81e3..2151953 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -4,7 +4,8 @@ services: restart: "no" env_file: .env.dev volumes: - - ./app:/aa-api/app # for --reload to work + - ./app/web:/aa-api/app/web # for --reload to work + - ./app/shared:/aa-api/app/shared # for --reload to work environment: - ENVIRONMENT_FILE=.env.dev - SERVE_LOCAL_ARCHIVE=/aa-api/app/local_archive # See orchestration.yaml local_storage.save_to @@ -18,7 +19,8 @@ services: restart: "no" env_file: .env.dev volumes: - - ./app:/aa-api/app # for watchmedo + - ./app/worker:/aa-api/app/worker # for watchmedo to work + - ./app/shared:/aa-api/app/shared # for watchmedo to work redis: restart: "no" diff --git a/docker-compose.yml b/docker-compose.yml index 90b5f28..7730960 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ services: volumes: - ./logs:/aa-api/logs - ./database:/aa-api/database + - ./secrets:/aa-api/secrets depends_on: - redis healthcheck: @@ -37,6 +38,7 @@ services: volumes: - ./logs:/aa-api/logs - ./database:/aa-api/database + - ./secrets:/aa-api/secrets - /var/run/docker.sock:/var/run/docker.sock - crawls:/crawls # BROWSERTRIX_HOME_HOST:BROWSERTRIX_HOME_CONTAINER, do not change /crawls environment: diff --git a/app/user-groups.example.yaml b/user-groups.example.yaml similarity index 100% rename from app/user-groups.example.yaml rename to user-groups.example.yaml diff --git a/web.Dockerfile b/web.Dockerfile index 14a751b..d142877 100644 --- a/web.Dockerfile +++ b/web.Dockerfile @@ -13,11 +13,10 @@ RUN pip install --no-cache-dir poetry COPY pyproject.toml poetry.lock README.md . RUN poetry install --with web --no-interaction --no-ansi --no-cache -# Copy the application code +# Copy the application code and configurations COPY alembic.ini ./ -COPY .env* ./app/ -COPY ./secrets/ ./secrets/ COPY ./app/ ./app/ +COPY user-groups.* ./app/ # Run the FastAPI app with Uvicorn ENTRYPOINT ["poetry", "run"] diff --git a/worker.Dockerfile b/worker.Dockerfile index 4c70730..99ddf8a 100644 --- a/worker.Dockerfile +++ b/worker.Dockerfile @@ -27,8 +27,7 @@ RUN ./poetry-venv/bin/poetry install --without dev --no-root --no-cache # copy source code and .env files over COPY alembic.ini ./ -COPY .env* ./app/ -COPY ./secrets/ ./secrets/ COPY ./app/ ./app/ +COPY user-groups.* ./app/ ENTRYPOINT ["./poetry-venv/bin/poetry", "run"] \ No newline at end of file