#!/usr/bin/env python3 """ Minimal web UI for auto-archiver cloud deployments. Provides: - GET / → HTML form to submit URLs for archiving - POST /archive → Runs auto-archiver on submitted URLs - GET /results → Lists archived files available for download - GET /files/{path} → Serves archived files - GET /status → Health check """ import asyncio import html import os import secrets from datetime import datetime, timezone from pathlib import Path from fastapi import Depends, FastAPI, Form, HTTPException, Request, status from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse AUTH_PASSWORD = os.environ.get("AUTH_PASSWORD", "") ARCHIVE_DIR = Path("/app/local_archive") CONFIG_PATH = Path("/app/secrets/orchestration.yaml") COOKIE_NAME = "aa_session" # In-memory session tokens (reset on restart, which is fine for this use case) _valid_sessions: set[str] = set() # In-memory job log _jobs: list[dict] = [] app = FastAPI(title="Auto Archiver", docs_url=None, redoc_url=None) # ── Auth helpers ────────────────────────────────────────────────────── def _check_auth(request: Request): """Dependency: redirect to /login if auth is enabled and session is missing.""" if not AUTH_PASSWORD: return # auth disabled token = request.cookies.get(COOKIE_NAME, "") if token not in _valid_sessions: raise HTTPException( status_code=status.HTTP_307_TEMPORARY_REDIRECT, headers={"Location": "/login"}, ) # ── Pages ───────────────────────────────────────────────────────────── LOGIN_HTML = """ Auto Archiver – Login

🔐 Auto Archiver


{error}
""" MAIN_HTML = """ Auto Archiver


{jobs_html} """ RESULTS_HTML = """ Auto Archiver – Files

📁 Archived Files

← Back

{file_list} """ # ── Routes ──────────────────────────────────────────────────────────── @app.get("/login", response_class=HTMLResponse) async def login_page(): if not AUTH_PASSWORD: return RedirectResponse("/", status_code=302) return LOGIN_HTML.format(error="") @app.post("/login") async def login_submit(password: str = Form(...)): if not AUTH_PASSWORD: return RedirectResponse("/", status_code=302) if password != AUTH_PASSWORD: return HTMLResponse( LOGIN_HTML.format(error='

Wrong password.

'), status_code=401, ) token = secrets.token_urlsafe(32) _valid_sessions.add(token) resp = RedirectResponse("/", status_code=302) resp.set_cookie(COOKIE_NAME, token, httponly=True, samesite="lax", max_age=86400 * 30) return resp @app.get("/", response_class=HTMLResponse) async def index(request: Request, _=Depends(_check_auth)): logout = 'Logout' if AUTH_PASSWORD else "" jobs_html = _render_jobs() return MAIN_HTML.format(logout=logout, jobs_html=jobs_html) @app.post("/archive") async def archive(request: Request, urls: str = Form(...), _=Depends(_check_auth)): url_list = [u.strip() for u in urls.strip().splitlines() if u.strip()] if not url_list: raise HTTPException(400, "No URLs provided") job = { "id": len(_jobs) + 1, "urls": url_list, "status": "running", "started": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"), "output": "", } _jobs.insert(0, job) # Run in background so the user sees the page immediately asyncio.create_task(_run_archive(job)) return RedirectResponse("/", status_code=303) @app.get("/results", response_class=HTMLResponse) async def results(request: Request, _=Depends(_check_auth)): if not ARCHIVE_DIR.exists(): return RESULTS_HTML.format(file_list="

No archived files yet.

") files = sorted(ARCHIVE_DIR.rglob("*"), key=lambda p: p.stat().st_mtime, reverse=True) files = [f for f in files if f.is_file()] if not files: return RESULTS_HTML.format(file_list="

No archived files yet.

") items = [] for f in files[:200]: # cap listing rel = f.relative_to(ARCHIVE_DIR) items.append(f'
  • {html.escape(str(rel))}
  • ') return RESULTS_HTML.format(file_list="") @app.get("/files/{path:path}") async def serve_file(path: str, request: Request, _=Depends(_check_auth)): full = ARCHIVE_DIR / path if not full.exists() or not full.is_file(): raise HTTPException(404, "File not found") # Security: ensure the resolved path is within ARCHIVE_DIR try: full.resolve().relative_to(ARCHIVE_DIR.resolve()) except ValueError: raise HTTPException(403, "Forbidden") return FileResponse(full) @app.get("/status") async def health(): return {"status": "ok"} @app.get("/logout") async def logout(request: Request): token = request.cookies.get(COOKIE_NAME, "") _valid_sessions.discard(token) resp = RedirectResponse("/login", status_code=302) resp.delete_cookie(COOKIE_NAME) return resp # ── Helpers ─────────────────────────────────────────────────────────── async def _run_archive(job: dict): """Run auto-archiver as a subprocess for the given URLs.""" cmd = [ "python3", "-m", "auto_archiver", "--config", str(CONFIG_PATH), ] + job["urls"] try: proc = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT, cwd="/app", ) stdout, _ = await proc.communicate() job["output"] = stdout.decode(errors="replace")[-5000:] # keep last 5k chars job["status"] = "done" if proc.returncode == 0 else "failed" except Exception as e: job["output"] = str(e) job["status"] = "failed" def _render_jobs() -> str: if not _jobs: return '

    No archiving jobs yet. Submit URLs above to get started.

    ' rows = [] for j in _jobs[:50]: urls_str = html.escape(", ".join(j["urls"][:3])) if len(j["urls"]) > 3: urls_str += f" (+{len(j['urls']) - 3} more)" status_cls = j["status"] rows.append( f"{j['id']}" f"{urls_str}" f'{j["status"]}' f"{j['started']}" ) return ( "

    Recent Jobs

    " "" "" + "\n".join(rows) + "
    #URLsStatusStarted
    " )