added database migrations + soft delete

This commit is contained in:
msramalho
2023-02-27 13:12:22 +01:00
parent 39da4e4eb7
commit 56c18dd96b
16 changed files with 304 additions and 20 deletions

View File

@@ -17,6 +17,12 @@ cd /src
orchestration must be from the console(?) orchestration must be from the console(?)
* turn off VPNs if connection to docker is not working * turn off VPNs if connection to docker is not working
## Database migrations
check https://alembic.sqlalchemy.org/en/latest/tutorial.html#the-migration-environment
* create migrations with `alembic revision -m "create account table"`
* migrate to most recent with `alembic upgrade head`
* downgrade with `alembic downgrade -1`
## Release ## Release
Copy `.env` and `src/.env` to deployment, along with the contents of `secrets/` including `secrets/orchestration.yaml`. Copy `.env` and `src/.env` to deployment, along with the contents of `secrets/` including `secrets/orchestration.yaml`.
@@ -26,4 +32,5 @@ Then `docker compose up -d`.
#### updating packages/app/access #### updating packages/app/access
If pipenv packages are updated: `pipenv lock --requirements -r > requirements.txt` (manually comment line `-i https://pypi.org/simple`) and then `docker compose down` + `docker compose up --build -d` to build images with new packages. If pipenv packages are updated: `pipenv lock --requirements -r > requirements.txt` (manually comment line `-i https://pypi.org/simple`) and then `docker compose down` + `docker compose up --build -d` to build images with new packages.
New users should be added to the `src/.env` file `ALLOWED_EMAILS` prop New users should be added to the `src/.env` file `ALLOWED_EMAILS` prop

View File

@@ -46,6 +46,7 @@ services:
build: ./src build: ./src
restart: always restart: always
command: flower --app=worker.celery --port=5555 --broker=redis://:${REDIS_PASSWORD}@redis:6379/0 --basic_auth=${FLOWER_USERNAME}:${FLOWER_PASSWORD} command: flower --app=worker.celery --port=5555 --broker=redis://:${REDIS_PASSWORD}@redis:6379/0 --basic_auth=${FLOWER_USERNAME}:${FLOWER_PASSWORD}
env_file: src/.env
ports: ports:
- 5556:5555 - 5556:5555
environment: environment:

View File

@@ -3,4 +3,5 @@ GOOGLE_CHROME_APP_ID=0000000000000000000000000000000000.apps.googleusercontent.c
ALLOWED_EMAILS=email1,email2 ALLOWED_EMAILS=email1,email2
ORCHESTRATION_CONFIG_DEFAULT=secrets/orchestration.yaml ORCHESTRATION_CONFIG_DEFAULT=secrets/orchestration.yaml
# optional # optional
# ORCHESTRATION_CONFIG_BELLINGCAT=secrets/orchestration-bcat.yaml # ORCHESTRATION_CONFIG_BELLINGCAT=secrets/orchestration-bcat.yaml
DATABASE_PATH="sqlite:///./auto-archiver.db"

View File

@@ -18,6 +18,7 @@ python-dotenv = "*"
loguru = "*" loguru = "*"
sqlalchemy = "*" sqlalchemy = "*"
auto-archiver = "*" auto-archiver = "*"
alembic = "*"
[dev-packages] [dev-packages]

22
src/Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "3cd66e4888849b1bdd4ab32b2cf89ccff2260648da860c9890cb9ff9012ce451" "sha256": "8fe474e7b0a87b22d9b1f772066d0a26927e38ad081f25ef2afefe27f0b922dc"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@@ -32,6 +32,14 @@
"index": "pypi", "index": "pypi",
"version": "==0.18.0" "version": "==0.18.0"
}, },
"alembic": {
"hashes": [
"sha256:4d3bd32ecdbb7bbfb48a9fe9e6d6fd6a831a1b59d03e26e292210237373e7db5",
"sha256:6f1c2207369bf4f49f952057a33bb017fbe5c148c2a773b46906b806ea6e825f"
],
"index": "pypi",
"version": "==1.9.4"
},
"amqp": { "amqp": {
"hashes": [ "hashes": [
"sha256:70cdb10628468ff14e57ec2f751c7aa9e48e7e3651cfd62d431213c0c4e58f21", "sha256:70cdb10628468ff14e57ec2f751c7aa9e48e7e3651cfd62d431213c0c4e58f21",
@@ -805,6 +813,14 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==4.9.2" "version": "==4.9.2"
}, },
"mako": {
"hashes": [
"sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818",
"sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"
],
"markers": "python_version >= '3.7'",
"version": "==1.2.4"
},
"markdown-it-py": { "markdown-it-py": {
"hashes": [ "hashes": [
"sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30", "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30",
@@ -1172,7 +1188,7 @@
"sha256:2397d518c17bfbc16a3d414b1cf6d3c231fd8d322f21c755ac2215c9ee675537", "sha256:2397d518c17bfbc16a3d414b1cf6d3c231fd8d322f21c755ac2215c9ee675537",
"sha256:4e03a30b2570fa4f17fbc7293d850fb8276c66be106d55e460b9287de37e1dd2" "sha256:4e03a30b2570fa4f17fbc7293d850fb8276c66be106d55e460b9287de37e1dd2"
], ],
"markers": "python_version >= '3.6' and python_version < '4'", "markers": "python_version >= '3.6' and python_version < '4.0'",
"version": "==0.8.1" "version": "==0.8.1"
}, },
"pytz": { "pytz": {
@@ -1375,7 +1391,7 @@
"sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7",
"sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21" "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"
], ],
"markers": "python_version >= '3.6' and python_version < '4'", "markers": "python_version >= '3.6' and python_version < '4.0'",
"version": "==4.9" "version": "==4.9"
}, },
"s3transfer": { "s3transfer": {

106
src/alembic.ini Normal file
View File

@@ -0,0 +1,106 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts
script_location = migrations
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python-dateutil library that can be
# installed by adding `alembic[tz]` to the pip requirements
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =
# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; This defaults
# to migrations/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
# passed from .env
# sqlalchemy.url = driver://user:pass@localhost/dbname
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

@@ -15,10 +15,6 @@ def search_tasks_by_url(db: Session, url:str, skip: int = 0, limit: int = 100):
def search_tasks_by_email(db: Session, email:str, skip: int = 0, limit: int = 100): def search_tasks_by_email(db: Session, email:str, skip: int = 0, limit: int = 100):
return base_query(db).filter(models.Task.author==email).offset(skip).limit(limit).all() return base_query(db).filter(models.Task.author==email).offset(skip).limit(limit).all()
def base_query(db:Session):
# allow only some fields to be returned, for example author should remain hidden
return db.query(models.Task).options(load_only(models.Task.id, models.Task.created_at, models.Task.url, models.Task.result))
def create_task(db: Session, task: schemas.TaskCreate): def create_task(db: Session, task: schemas.TaskCreate):
db_task = models.Task(id=task.id, url=task.url, author=task.author, result=task.result) db_task = models.Task(id=task.id, url=task.url, author=task.author, result=task.result)
db.add(db_task) db.add(db_task)
@@ -26,10 +22,22 @@ def create_task(db: Session, task: schemas.TaskCreate):
db.refresh(db_task) db.refresh(db_task)
return db_task return db_task
# TODO: implement soft delete so that S3 content can be found ant not dangling # def delete_task(db: Session, task_id: str, email:str)->bool:
def delete_task(db: Session, task_id: str, email:str)->bool: # db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email).first()
db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email).first() # if db_task:
# db.delete(db_task)
# db.commit()
# return db_task is not None
def soft_delete_task(db: Session, task_id: str, email:str)->bool:
db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email, models.Task.deleted==False).first()
if db_task: if db_task:
db.delete(db_task) db_task.deleted = True
db.commit() db.commit()
return db_task is not None return db_task is not None
def base_query(db:Session):
# allow only some fields to be returned, for example author should remain hidden
return db.query(models.Task)\
.options(load_only(models.Task.id, models.Task.created_at, models.Task.url, models.Task.result))\
.filter(models.Task.deleted == False)

View File

@@ -1,9 +1,12 @@
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
import os
SQLALCHEMY_DATABASE_URL = "sqlite:///./auto-archiver.db" SQLALCHEMY_DATABASE_URL = os.environ.get("DATABASE_PATH")#"sqlite:///./auto-archiver.db"
# SQLALCHEMY_DATABASE_URL = "postgresql://user:password@postgresserver/db" # SQLALCHEMY_DATABASE_URL = "postgresql://user:password@postgresserver/db"
print("-"*50)
print(SQLALCHEMY_DATABASE_URL)
engine = create_engine( engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False} SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}

View File

@@ -1,4 +1,4 @@
from sqlalchemy import Column, String, JSON, DateTime from sqlalchemy import Column, String, JSON, DateTime, Boolean
from sqlalchemy.sql import func from sqlalchemy.sql import func
from .database import Base from .database import Base
@@ -12,5 +12,7 @@ class Task(Base):
result = Column(JSON, default=None) result = Column(JSON, default=None)
created_at = Column(DateTime(timezone=True), server_default=func.now()) created_at = Column(DateTime(timezone=True), server_default=func.now())
# updated_at = Column(DateTime(timezone=True), onupdate=func.now()) # updated_at = Column(DateTime(timezone=True), onupdate=func.now())
deleted = Column(Boolean, default=False)
# items = relationship("Item", back_populates="owner") # items = relationship("Item", back_populates="owner")

View File

@@ -10,6 +10,7 @@ class TaskCreate(BaseModel):
class Task(TaskCreate): class Task(TaskCreate):
created_at: datetime created_at: datetime
deleted: bool
class Config: class Config:
orm_mode = True orm_mode = True

View File

@@ -26,7 +26,7 @@ assert len(GOOGLE_CHROME_APP_ID)>10, "GOOGLE_CHROME_APP_ID env variable not set"
ALLOWED_EMAILS = set(os.environ.get("ALLOWED_EMAILS", "").split(",")) ALLOWED_EMAILS = set(os.environ.get("ALLOWED_EMAILS", "").split(","))
assert len(GOOGLE_CHROME_APP_ID)>=1, "at least one ALLOWED_EMAILS is required from the env variable" assert len(GOOGLE_CHROME_APP_ID)>=1, "at least one ALLOWED_EMAILS is required from the env variable"
ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",") ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",")
VERSION = "0.1.5" VERSION = "0.1.6"
app = FastAPI() app = FastAPI()
app.add_middleware( app.add_middleware(
@@ -95,7 +95,7 @@ def get_status(task_id, access_token:str, db: Session = Depends(get_db)):
logger.info(f"deleting task {task_id} request by {email}") logger.info(f"deleting task {task_id} request by {email}")
return JSONResponse({ return JSONResponse({
"id": task_id, "id": task_id,
"deleted": crud.delete_task(db, task_id, email) "deleted": crud.soft_delete_task(db, task_id, email)
}) })
@@ -103,11 +103,14 @@ def get_status(task_id, access_token:str, db: Session = Depends(get_db)):
def home(): def home():
return JSONResponse({"status": "good", "version": VERSION}) return JSONResponse({"status": "good", "version": VERSION})
import alembic.config
@app.on_event("startup") @app.on_event("startup")
async def on_startup(): async def on_startup():
# # Not needed if you setup a migration system like Alembic # # Not needed if you setup a migration system like Alembic
# await create_db_and_tables()https://github.com/bellingcat/auto-archiver/tree/dockerize # await create_db_and_tables()https://github.com/bellingcat/auto-archiver/tree/dockerize
models.Base.metadata.create_all(bind=engine) models.Base.metadata.create_all(bind=engine)
alembic.config.main(argv=['--raiseerr', 'upgrade', 'head'])
#### helper methods #### helper methods
def authenticate_user(access_token): def authenticate_user(access_token):

1
src/migrations/README Normal file
View File

@@ -0,0 +1 @@
Generic single-database configuration.

81
src/migrations/env.py Normal file
View File

@@ -0,0 +1,81 @@
from logging.config import fileConfig
import os
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
from dotenv import load_dotenv
load_dotenv()
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
config.set_main_option('sqlalchemy.url', os.environ.get("DATABASE_PATH"))
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name, disable_existing_loggers=False) # disable_existing_loggers prevents loguru disabling
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = None
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,24 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,27 @@
"""created tasks.deleted column
Revision ID: ae468b023078
Revises:
Create Date: 2023-02-27 12:40:24.146786
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'ae468b023078'
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
'tasks',
sa.Column('deleted', sa.Boolean, default=False, nullable=False, server_default=sa.sql.expression.false()),
)
def downgrade() -> None:
op.drop_column('tasks', 'deleted')

View File

@@ -8,6 +8,7 @@
# -i https://pypi.org/simple # -i https://pypi.org/simple
aiofiles==0.6.0 aiofiles==0.6.0
aiosqlite==0.18.0 aiosqlite==0.18.0
alembic==1.9.4
amqp==2.6.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' amqp==2.6.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
anyio==3.6.2; python_full_version >= '3.6.2' anyio==3.6.2; python_full_version >= '3.6.2'
argparse==1.4.0 argparse==1.4.0
@@ -58,6 +59,7 @@ jmespath==1.0.1; python_version >= '3.7'
kombu==4.6.11; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' kombu==4.6.11; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
loguru==0.6.0 loguru==0.6.0
lxml==4.9.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' lxml==4.9.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
mako==1.2.4; python_version >= '3.7'
markdown-it-py==2.2.0; python_version >= '3.7' markdown-it-py==2.2.0; python_version >= '3.7'
markupsafe==2.1.2; python_version >= '3.7' markupsafe==2.1.2; python_version >= '3.7'
marshmallow-enum==1.5.1 marshmallow-enum==1.5.1
@@ -86,7 +88,7 @@ pytest==6.2.4
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
python-dotenv==1.0.0 python-dotenv==1.0.0
python-slugify==8.0.1; python_version >= '3.7' python-slugify==8.0.1; python_version >= '3.7'
python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4' python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4.0'
pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
pytz==2022.7.1 pytz==2022.7.1
pyyaml==6.0; python_version >= '3.6' pyyaml==6.0; python_version >= '3.6'
@@ -96,7 +98,7 @@ requests-oauthlib==1.3.1; python_version >= '2.7' and python_version not in '3.0
requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
requests==2.28.2 requests==2.28.2
rich==13.3.1; python_version >= '3.7' rich==13.3.1; python_version >= '3.7'
rsa==4.9; python_version >= '3.6' and python_version < '4' rsa==4.9; python_version >= '3.6' and python_version < '4.0'
s3transfer==0.6.0; python_version >= '3.7' s3transfer==0.6.0; python_version >= '3.7'
selenium==4.8.2; python_version >= '3.7' selenium==4.8.2; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'