diff --git a/README.md b/README.md index 47d2130..180ebe7 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,12 @@ cd /src orchestration must be from the console(?) * turn off VPNs if connection to docker is not working +## Database migrations +check https://alembic.sqlalchemy.org/en/latest/tutorial.html#the-migration-environment + +* create migrations with `alembic revision -m "create account table"` +* migrate to most recent with `alembic upgrade head` +* downgrade with `alembic downgrade -1` ## Release Copy `.env` and `src/.env` to deployment, along with the contents of `secrets/` including `secrets/orchestration.yaml`. @@ -26,4 +32,5 @@ Then `docker compose up -d`. #### updating packages/app/access If pipenv packages are updated: `pipenv lock --requirements -r > requirements.txt` (manually comment line `-i https://pypi.org/simple`) and then `docker compose down` + `docker compose up --build -d` to build images with new packages. -New users should be added to the `src/.env` file `ALLOWED_EMAILS` prop \ No newline at end of file +New users should be added to the `src/.env` file `ALLOWED_EMAILS` prop + diff --git a/docker-compose.yml b/docker-compose.yml index 7318a02..ad63945 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -46,6 +46,7 @@ services: build: ./src restart: always command: flower --app=worker.celery --port=5555 --broker=redis://:${REDIS_PASSWORD}@redis:6379/0 --basic_auth=${FLOWER_USERNAME}:${FLOWER_PASSWORD} + env_file: src/.env ports: - 5556:5555 environment: diff --git a/src/.example.env b/src/.example.env index 1b7f28d..08675e7 100644 --- a/src/.example.env +++ b/src/.example.env @@ -3,4 +3,5 @@ GOOGLE_CHROME_APP_ID=0000000000000000000000000000000000.apps.googleusercontent.c ALLOWED_EMAILS=email1,email2 ORCHESTRATION_CONFIG_DEFAULT=secrets/orchestration.yaml # optional -# ORCHESTRATION_CONFIG_BELLINGCAT=secrets/orchestration-bcat.yaml \ No newline at end of file +# ORCHESTRATION_CONFIG_BELLINGCAT=secrets/orchestration-bcat.yaml +DATABASE_PATH="sqlite:///./auto-archiver.db" \ No newline at end of file diff --git a/src/Pipfile b/src/Pipfile index ed0e47a..a084b1a 100644 --- a/src/Pipfile +++ b/src/Pipfile @@ -18,6 +18,7 @@ python-dotenv = "*" loguru = "*" sqlalchemy = "*" auto-archiver = "*" +alembic = "*" [dev-packages] diff --git a/src/Pipfile.lock b/src/Pipfile.lock index 7c2fb2a..1a2c05e 100644 --- a/src/Pipfile.lock +++ b/src/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "3cd66e4888849b1bdd4ab32b2cf89ccff2260648da860c9890cb9ff9012ce451" + "sha256": "8fe474e7b0a87b22d9b1f772066d0a26927e38ad081f25ef2afefe27f0b922dc" }, "pipfile-spec": 6, "requires": { @@ -32,6 +32,14 @@ "index": "pypi", "version": "==0.18.0" }, + "alembic": { + "hashes": [ + "sha256:4d3bd32ecdbb7bbfb48a9fe9e6d6fd6a831a1b59d03e26e292210237373e7db5", + "sha256:6f1c2207369bf4f49f952057a33bb017fbe5c148c2a773b46906b806ea6e825f" + ], + "index": "pypi", + "version": "==1.9.4" + }, "amqp": { "hashes": [ "sha256:70cdb10628468ff14e57ec2f751c7aa9e48e7e3651cfd62d431213c0c4e58f21", @@ -805,6 +813,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==4.9.2" }, + "mako": { + "hashes": [ + "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818", + "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34" + ], + "markers": "python_version >= '3.7'", + "version": "==1.2.4" + }, "markdown-it-py": { "hashes": [ "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30", @@ -1172,7 +1188,7 @@ "sha256:2397d518c17bfbc16a3d414b1cf6d3c231fd8d322f21c755ac2215c9ee675537", "sha256:4e03a30b2570fa4f17fbc7293d850fb8276c66be106d55e460b9287de37e1dd2" ], - "markers": "python_version >= '3.6' and python_version < '4'", + "markers": "python_version >= '3.6' and python_version < '4.0'", "version": "==0.8.1" }, "pytz": { @@ -1375,7 +1391,7 @@ "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21" ], - "markers": "python_version >= '3.6' and python_version < '4'", + "markers": "python_version >= '3.6' and python_version < '4.0'", "version": "==4.9" }, "s3transfer": { diff --git a/src/alembic.ini b/src/alembic.ini new file mode 100644 index 0000000..62db386 --- /dev/null +++ b/src/alembic.ini @@ -0,0 +1,106 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = migrations + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python-dateutil library that can be +# installed by adding `alembic[tz]` to the pip requirements +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to migrations/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# passed from .env +# sqlalchemy.url = driver://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/src/db/crud.py b/src/db/crud.py index b3e4c0d..9447e13 100644 --- a/src/db/crud.py +++ b/src/db/crud.py @@ -15,10 +15,6 @@ def search_tasks_by_url(db: Session, url:str, skip: int = 0, limit: int = 100): def search_tasks_by_email(db: Session, email:str, skip: int = 0, limit: int = 100): return base_query(db).filter(models.Task.author==email).offset(skip).limit(limit).all() -def base_query(db:Session): - # allow only some fields to be returned, for example author should remain hidden - return db.query(models.Task).options(load_only(models.Task.id, models.Task.created_at, models.Task.url, models.Task.result)) - def create_task(db: Session, task: schemas.TaskCreate): db_task = models.Task(id=task.id, url=task.url, author=task.author, result=task.result) db.add(db_task) @@ -26,10 +22,22 @@ def create_task(db: Session, task: schemas.TaskCreate): db.refresh(db_task) return db_task -# TODO: implement soft delete so that S3 content can be found ant not dangling -def delete_task(db: Session, task_id: str, email:str)->bool: - db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email).first() +# def delete_task(db: Session, task_id: str, email:str)->bool: +# db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email).first() +# if db_task: +# db.delete(db_task) +# db.commit() +# return db_task is not None + +def soft_delete_task(db: Session, task_id: str, email:str)->bool: + db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email, models.Task.deleted==False).first() if db_task: - db.delete(db_task) + db_task.deleted = True db.commit() - return db_task is not None \ No newline at end of file + return db_task is not None + +def base_query(db:Session): + # allow only some fields to be returned, for example author should remain hidden + return db.query(models.Task)\ + .options(load_only(models.Task.id, models.Task.created_at, models.Task.url, models.Task.result))\ + .filter(models.Task.deleted == False) diff --git a/src/db/database.py b/src/db/database.py index 416fc5e..55c47ea 100644 --- a/src/db/database.py +++ b/src/db/database.py @@ -1,9 +1,12 @@ from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker +import os -SQLALCHEMY_DATABASE_URL = "sqlite:///./auto-archiver.db" +SQLALCHEMY_DATABASE_URL = os.environ.get("DATABASE_PATH")#"sqlite:///./auto-archiver.db" # SQLALCHEMY_DATABASE_URL = "postgresql://user:password@postgresserver/db" +print("-"*50) +print(SQLALCHEMY_DATABASE_URL) engine = create_engine( SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False} diff --git a/src/db/models.py b/src/db/models.py index 78d9df9..5f58fc4 100644 --- a/src/db/models.py +++ b/src/db/models.py @@ -1,4 +1,4 @@ -from sqlalchemy import Column, String, JSON, DateTime +from sqlalchemy import Column, String, JSON, DateTime, Boolean from sqlalchemy.sql import func from .database import Base @@ -12,5 +12,7 @@ class Task(Base): result = Column(JSON, default=None) created_at = Column(DateTime(timezone=True), server_default=func.now()) # updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + deleted = Column(Boolean, default=False) + # items = relationship("Item", back_populates="owner") diff --git a/src/db/schemas.py b/src/db/schemas.py index c7cbf93..03c5d32 100644 --- a/src/db/schemas.py +++ b/src/db/schemas.py @@ -10,6 +10,7 @@ class TaskCreate(BaseModel): class Task(TaskCreate): created_at: datetime + deleted: bool class Config: orm_mode = True \ No newline at end of file diff --git a/src/main.py b/src/main.py index a352be0..873afbc 100644 --- a/src/main.py +++ b/src/main.py @@ -26,7 +26,7 @@ assert len(GOOGLE_CHROME_APP_ID)>10, "GOOGLE_CHROME_APP_ID env variable not set" ALLOWED_EMAILS = set(os.environ.get("ALLOWED_EMAILS", "").split(",")) assert len(GOOGLE_CHROME_APP_ID)>=1, "at least one ALLOWED_EMAILS is required from the env variable" ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",") -VERSION = "0.1.5" +VERSION = "0.1.6" app = FastAPI() app.add_middleware( @@ -95,7 +95,7 @@ def get_status(task_id, access_token:str, db: Session = Depends(get_db)): logger.info(f"deleting task {task_id} request by {email}") return JSONResponse({ "id": task_id, - "deleted": crud.delete_task(db, task_id, email) + "deleted": crud.soft_delete_task(db, task_id, email) }) @@ -103,11 +103,14 @@ def get_status(task_id, access_token:str, db: Session = Depends(get_db)): def home(): return JSONResponse({"status": "good", "version": VERSION}) + +import alembic.config @app.on_event("startup") async def on_startup(): # # Not needed if you setup a migration system like Alembic # await create_db_and_tables()https://github.com/bellingcat/auto-archiver/tree/dockerize models.Base.metadata.create_all(bind=engine) + alembic.config.main(argv=['--raiseerr', 'upgrade', 'head']) #### helper methods def authenticate_user(access_token): diff --git a/src/migrations/README b/src/migrations/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/src/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/src/migrations/env.py b/src/migrations/env.py new file mode 100644 index 0000000..baf6ff6 --- /dev/null +++ b/src/migrations/env.py @@ -0,0 +1,81 @@ +from logging.config import fileConfig +import os +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context +from dotenv import load_dotenv + +load_dotenv() + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config +config.set_main_option('sqlalchemy.url', os.environ.get("DATABASE_PATH")) +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name, disable_existing_loggers=False) # disable_existing_loggers prevents loguru disabling + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/src/migrations/script.py.mako b/src/migrations/script.py.mako new file mode 100644 index 0000000..55df286 --- /dev/null +++ b/src/migrations/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/src/migrations/versions/ae468b023078_created_tasks_deleted_column.py b/src/migrations/versions/ae468b023078_created_tasks_deleted_column.py new file mode 100644 index 0000000..1f43f19 --- /dev/null +++ b/src/migrations/versions/ae468b023078_created_tasks_deleted_column.py @@ -0,0 +1,27 @@ +"""created tasks.deleted column + +Revision ID: ae468b023078 +Revises: +Create Date: 2023-02-27 12:40:24.146786 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'ae468b023078' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + 'tasks', + sa.Column('deleted', sa.Boolean, default=False, nullable=False, server_default=sa.sql.expression.false()), + ) + + +def downgrade() -> None: + op.drop_column('tasks', 'deleted') diff --git a/src/requirements.txt b/src/requirements.txt index 3c7e96d..4695367 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -8,6 +8,7 @@ # -i https://pypi.org/simple aiofiles==0.6.0 aiosqlite==0.18.0 +alembic==1.9.4 amqp==2.6.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' anyio==3.6.2; python_full_version >= '3.6.2' argparse==1.4.0 @@ -58,6 +59,7 @@ jmespath==1.0.1; python_version >= '3.7' kombu==4.6.11; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' loguru==0.6.0 lxml==4.9.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' +mako==1.2.4; python_version >= '3.7' markdown-it-py==2.2.0; python_version >= '3.7' markupsafe==2.1.2; python_version >= '3.7' marshmallow-enum==1.5.1 @@ -86,7 +88,7 @@ pytest==6.2.4 python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' python-dotenv==1.0.0 python-slugify==8.0.1; python_version >= '3.7' -python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4' +python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4.0' pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' pytz==2022.7.1 pyyaml==6.0; python_version >= '3.6' @@ -96,7 +98,7 @@ requests-oauthlib==1.3.1; python_version >= '2.7' and python_version not in '3.0 requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' requests==2.28.2 rich==13.3.1; python_version >= '3.7' -rsa==4.9; python_version >= '3.6' and python_version < '4' +rsa==4.9; python_version >= '3.6' and python_version < '4.0' s3transfer==0.6.0; python_version >= '3.7' selenium==4.8.2; python_version >= '3.7' six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'