From 9085d90b933e82b2d6a496c51c42ccec11dbec5b Mon Sep 17 00:00:00 2001 From: Jason Staack Date: Sun, 15 Mar 2026 06:27:34 -0500 Subject: [PATCH] fix(ci): use TRUNCATE CASCADE for test cleanup, remove superpowers docs - TRUNCATE CASCADE reliably cleans all test data regardless of FK order - Remove docs/superpowers/ from git tracking (already in .gitignore) Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/tests/integration/conftest.py | 16 +- .../plans/2026-03-14-setup-script.md | 1196 ----------------- .../specs/2026-03-14-saas-tiers-design.md | 346 ----- .../specs/2026-03-14-setup-script-design.md | 249 ---- .../specs/2026-03-14-vpn-isolation-design.md | 274 ---- 5 files changed, 4 insertions(+), 2077 deletions(-) delete mode 100644 docs/superpowers/plans/2026-03-14-setup-script.md delete mode 100644 docs/superpowers/specs/2026-03-14-saas-tiers-design.md delete mode 100644 docs/superpowers/specs/2026-03-14-setup-script-design.md delete mode 100644 docs/superpowers/specs/2026-03-14-vpn-isolation-design.md diff --git a/backend/tests/integration/conftest.py b/backend/tests/integration/conftest.py index c2cad1a..eedefe5 100644 --- a/backend/tests/integration/conftest.py +++ b/backend/tests/integration/conftest.py @@ -165,22 +165,14 @@ async def admin_session(admin_engine) -> AsyncGenerator[AsyncSession, None]: Cleanup deletes all rows from test tables after the test. """ session = AsyncSession(admin_engine, expire_on_commit=False) - # Clean up any leftover data from previous tests/runs BEFORE yielding - for table in _CLEANUP_TABLES: - try: - await session.execute(text(f"DELETE FROM {table}")) - except Exception: - pass + # TRUNCATE CASCADE reliably removes all data regardless of FK order + tables_csv = ", ".join(_CLEANUP_TABLES) + await session.execute(text(f"TRUNCATE {tables_csv} CASCADE")) await session.commit() try: yield session finally: - # Clean up all test data in reverse FK order - for table in _CLEANUP_TABLES: - try: - await session.execute(text(f"DELETE FROM {table}")) - except Exception: - pass # Table might not exist in some migration states + await session.execute(text(f"TRUNCATE {tables_csv} CASCADE")) await session.commit() await session.close() diff --git a/docs/superpowers/plans/2026-03-14-setup-script.md b/docs/superpowers/plans/2026-03-14-setup-script.md deleted file mode 100644 index 4b61595..0000000 --- a/docs/superpowers/plans/2026-03-14-setup-script.md +++ /dev/null @@ -1,1196 +0,0 @@ -# Setup Script Implementation Plan - -> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Create `setup.py` — an interactive production setup wizard that configures `.env.prod`, bootstraps OpenBao, builds Docker images, starts the stack, and verifies health. - -**Architecture:** Single-file Python script using only stdlib. Linear wizard flow with grouped sections. Auto-generates all secrets and numeric config; only prompts for ~6 human decisions. Integrates OpenBao bootstrap by starting its container and parsing credentials from logs. - -**Tech Stack:** Python 3.10+ stdlib (`secrets`, `subprocess`, `getpass`, `socket`, `pathlib`, `re`, `datetime`, `shutil`, `sys`, `signal`, `textwrap`) - -**Spec:** `docs/superpowers/specs/2026-03-14-setup-script-design.md` - ---- - -## File Map - -| Action | File | Responsibility | -|--------|------|----------------| -| Create | `setup.py` | Interactive setup wizard (root of project) | -| Modify | `docker-compose.yml:21,30` | Change `mikrotik` → `${POSTGRES_DB:-tod}` in default + healthcheck | -| Modify | `docker-compose.prod.yml:68` | Change hardcoded poller DATABASE_URL to `${POLLER_DATABASE_URL}` | -| Modify | `scripts/init-postgres.sql:6,26` | Change `mikrotik` → `tod` in GRANT statements | -| Modify | `.env.example:17,20-22,25,51` | Change `mikrotik` → `tod` in all references + CORS comment | -| Modify | `.env.staging.example:9,13-15,18` | Change `mikrotik` → `tod` in all references | -| Modify | `frontend/src/routes/login.tsx:235-241` | Wrap dev hint in `import.meta.env.DEV` guard | - ---- - -## Chunk 1: Database Rename & Login Fix - -### Task 1: Rename database from `mikrotik` to `tod` - -**Files:** -- Modify: `docker-compose.yml:21,30` -- Modify: `docker-compose.prod.yml:68` -- Modify: `scripts/init-postgres.sql:6,26` -- Modify: `.env.example:17,20-22,25,51` -- Modify: `.env.staging.example:9,13-15,18` - -- [ ] **Step 1: Update docker-compose.yml default and healthcheck** - -In `docker-compose.yml`, change line 21: -```yaml - POSTGRES_DB: ${POSTGRES_DB:-tod} -``` - -Change line 30: -```yaml - test: ["CMD-SHELL", "pg_isready -U postgres -d ${POSTGRES_DB:-tod}"] -``` - -- [ ] **Step 2: Update docker-compose.prod.yml poller DATABASE_URL** - -In `docker-compose.prod.yml`, change line 68 from: -```yaml - DATABASE_URL: postgres://poller_user:poller_password@postgres:5432/mikrotik -``` -to: -```yaml - DATABASE_URL: ${POLLER_DATABASE_URL:-postgres://poller_user:poller_password@postgres:5432/tod} -``` - -- [ ] **Step 3: Update init-postgres.sql** - -In `scripts/init-postgres.sql`, change line 6: -```sql -GRANT CONNECT ON DATABASE tod TO app_user; -``` - -Change line 26: -```sql -GRANT CONNECT ON DATABASE tod TO poller_user; -``` - -- [ ] **Step 4: Update .env.example** - -Replace all `mikrotik` references with `tod`, including the CORS comment on line 51: -``` -POSTGRES_DB=tod -DATABASE_URL=postgresql+asyncpg://postgres:postgres@postgres:5432/tod -SYNC_DATABASE_URL=postgresql+psycopg2://postgres:postgres@postgres:5432/tod -APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:app_password@postgres:5432/tod -POLLER_DATABASE_URL=postgres://poller_user:poller_password@postgres:5432/tod -``` - -Line 51 change: -``` -# Prod: set to your actual domain, e.g., https://tod.yourdomain.com -``` - -- [ ] **Step 4b: Update .env.staging.example** - -Replace all `mikrotik` references with `tod` in `.env.staging.example`: -``` -POSTGRES_DB=tod -DATABASE_URL=postgresql+asyncpg://postgres:CHANGE_ME_STAGING@postgres:5432/tod -SYNC_DATABASE_URL=postgresql+psycopg2://postgres:CHANGE_ME_STAGING@postgres:5432/tod -APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:CHANGE_ME_STAGING@postgres:5432/tod -POLLER_DATABASE_URL=postgres://poller_user:poller_password@postgres:5432/tod -``` - -- [ ] **Step 5: Commit** - -```bash -git add docker-compose.yml docker-compose.prod.yml scripts/init-postgres.sql .env.example .env.staging.example -git commit -m "refactor: rename database from mikrotik to tod" -``` - -### Task 2: Hide login page dev hint in production - -**Files:** -- Modify: `frontend/src/routes/login.tsx:235-241` - -- [ ] **Step 1: Wrap dev hint in DEV guard** - -In `frontend/src/routes/login.tsx`, replace lines 235-241: -```tsx - {/* First-run hint */} -
-

- First time? Use the credentials from your .env file - (FIRST_ADMIN_EMAIL / FIRST_ADMIN_PASSWORD). -

-
-``` - -With: -```tsx - {/* First-run hint (dev only) */} - {import.meta.env.DEV && ( -
-

- First time? Use the credentials from your .env file - (FIRST_ADMIN_EMAIL / FIRST_ADMIN_PASSWORD). -

-
- )} -``` - -- [ ] **Step 2: Commit** - -```bash -git add frontend/src/routes/login.tsx -git commit -m "fix: hide first-run credential hint in production builds" -``` - ---- - -## Chunk 2: Setup Script — Helpers & Pre-flight - -### Task 3: Create setup.py with helpers and pre-flight checks - -**Files:** -- Create: `setup.py` - -- [ ] **Step 1: Write the script header, color helpers, and pre-flight checks** - -Create `setup.py` with: - -```python -#!/usr/bin/env python3 -"""TOD Production Setup Wizard. - -Interactive setup script that configures .env.prod, bootstraps OpenBao, -builds Docker images, starts the stack, and verifies service health. - -Usage: - python3 setup.py -""" - -import base64 -import datetime -import getpass -import os -import pathlib -import re -import secrets -import shutil -import signal -import socket -import subprocess -import sys -import textwrap -import time - -# ── Constants ──────────────────────────────────────────────────────────────── - -PROJECT_ROOT = pathlib.Path(__file__).resolve().parent -ENV_PROD = PROJECT_ROOT / ".env.prod" -INIT_SQL_TEMPLATE = PROJECT_ROOT / "scripts" / "init-postgres.sql" -INIT_SQL_PROD = PROJECT_ROOT / "scripts" / "init-postgres-prod.sql" -COMPOSE_BASE = "docker-compose.yml" -COMPOSE_PROD = "docker-compose.prod.yml" -COMPOSE_CMD = [ - "docker", "compose", - "-f", COMPOSE_BASE, - "-f", COMPOSE_PROD, -] - -REQUIRED_PORTS = { - 5432: "PostgreSQL", - 6379: "Redis", - 4222: "NATS", - 8001: "API", - 3000: "Frontend", - 51820: "WireGuard (UDP)", -} - - -# ── Color helpers ──────────────────────────────────────────────────────────── - -def _supports_color() -> bool: - return hasattr(sys.stdout, "isatty") and sys.stdout.isatty() - -_COLOR = _supports_color() - -def _c(code: str, text: str) -> str: - return f"\033[{code}m{text}\033[0m" if _COLOR else text - -def green(t: str) -> str: return _c("32", t) -def yellow(t: str) -> str: return _c("33", t) -def red(t: str) -> str: return _c("31", t) -def cyan(t: str) -> str: return _c("36", t) -def bold(t: str) -> str: return _c("1", t) -def dim(t: str) -> str: return _c("2", t) - - -def banner(text: str) -> None: - width = 62 - print() - print(cyan("=" * width)) - print(cyan(f" {text}")) - print(cyan("=" * width)) - print() - - -def section(text: str) -> None: - print() - print(bold(f"--- {text} ---")) - print() - - -def ok(text: str) -> None: - print(f" {green('✓')} {text}") - - -def warn(text: str) -> None: - print(f" {yellow('!')} {text}") - - -def fail(text: str) -> None: - print(f" {red('✗')} {text}") - - -def info(text: str) -> None: - print(f" {dim('·')} {text}") - - -# ── Input helpers ──────────────────────────────────────────────────────────── - -def ask(prompt: str, default: str = "", required: bool = False, - secret: bool = False, validate=None) -> str: - """Prompt the user for input with optional default, validation, and secret mode.""" - suffix = f" [{default}]" if default else "" - full_prompt = f" {prompt}{suffix}: " - - while True: - if secret: - value = getpass.getpass(full_prompt) - else: - value = input(full_prompt) - - value = value.strip() - if not value and default: - value = default - - if required and not value: - warn("This field is required.") - continue - - if validate: - error = validate(value) - if error: - warn(error) - continue - - return value - - -def ask_yes_no(prompt: str, default: bool = False) -> bool: - """Ask a yes/no question.""" - hint = "Y/n" if default else "y/N" - while True: - answer = input(f" {prompt} [{hint}]: ").strip().lower() - if not answer: - return default - if answer in ("y", "yes"): - return True - if answer in ("n", "no"): - return False - warn("Please enter y or n.") - - -def mask_secret(value: str) -> str: - """Show first 8 chars of a secret, mask the rest.""" - if len(value) <= 8: - return value - return value[:8] + "..." - - -# ── Validators ─────────────────────────────────────────────────────────────── - -def validate_password_strength(value: str) -> str | None: - if len(value) < 12: - return "Password must be at least 12 characters." - return None - - -def validate_email(value: str) -> str | None: - if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", value): - return "Please enter a valid email address." - return None - - -def validate_domain(value: str) -> str | None: - # Strip protocol if provided - cleaned = re.sub(r"^https?://", "", value).rstrip("/") - if not re.match(r"^[a-zA-Z0-9]([a-zA-Z0-9\-]*\.)+[a-zA-Z]{2,}$", cleaned): - return "Please enter a valid domain (e.g. tod.example.com)." - return None - - -# ── System checks ──────────────────────────────────────────────────────────── - -def check_python_version() -> bool: - if sys.version_info < (3, 10): - fail(f"Python 3.10+ required, found {sys.version}") - return False - ok(f"Python {sys.version_info.major}.{sys.version_info.minor}") - return True - - -def check_docker() -> bool: - try: - result = subprocess.run( - ["docker", "info"], - capture_output=True, text=True, timeout=10, - ) - if result.returncode != 0: - fail("Docker is not running. Start Docker and try again.") - return False - ok("Docker Engine") - except FileNotFoundError: - fail("Docker is not installed.") - return False - except subprocess.TimeoutExpired: - fail("Docker is not responding.") - return False - - try: - result = subprocess.run( - ["docker", "compose", "version"], - capture_output=True, text=True, timeout=10, - ) - if result.returncode != 0: - fail("Docker Compose v2 is not available.") - return False - version_match = re.search(r"v?(\d+\.\d+)", result.stdout) - version_str = version_match.group(1) if version_match else "unknown" - ok(f"Docker Compose v{version_str}") - except FileNotFoundError: - fail("Docker Compose is not installed.") - return False - - return True - - -def check_ram() -> None: - try: - if sys.platform == "darwin": - result = subprocess.run( - ["sysctl", "-n", "hw.memsize"], - capture_output=True, text=True, timeout=5, - ) - ram_bytes = int(result.stdout.strip()) - else: - with open("/proc/meminfo") as f: - for line in f: - if line.startswith("MemTotal:"): - ram_bytes = int(line.split()[1]) * 1024 - break - else: - return - - ram_gb = ram_bytes / (1024 ** 3) - if ram_gb < 4: - warn(f"Only {ram_gb:.1f} GB RAM detected. 4 GB+ recommended for builds.") - else: - ok(f"{ram_gb:.1f} GB RAM") - except Exception: - info("Could not detect RAM — skipping check") - - -def check_ports() -> None: - for port, service in REQUIRED_PORTS.items(): - try: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(1) - result = s.connect_ex(("127.0.0.1", port)) - if result == 0: - warn(f"Port {port} ({service}) is already in use") - else: - ok(f"Port {port} ({service}) is free") - except Exception: - info(f"Could not check port {port} ({service})") - - -def check_existing_env() -> str: - """Check for existing .env.prod. Returns 'overwrite', 'backup', or 'abort'.""" - if not ENV_PROD.exists(): - return "overwrite" - - print() - warn(f"Existing .env.prod found at {ENV_PROD}") - print() - print(" What would you like to do?") - print(f" {bold('1)')} Overwrite it") - print(f" {bold('2)')} Back it up and create a new one") - print(f" {bold('3)')} Abort") - print() - - while True: - choice = input(" Choice [1/2/3]: ").strip() - if choice == "1": - return "overwrite" - elif choice == "2": - ts = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") - backup = ENV_PROD.with_name(f".env.prod.backup.{ts}") - shutil.copy2(ENV_PROD, backup) - ok(f"Backed up to {backup.name}") - return "overwrite" - elif choice == "3": - return "abort" - else: - warn("Please enter 1, 2, or 3.") - - -def preflight() -> bool: - """Run all pre-flight checks. Returns True if OK to proceed.""" - banner("TOD Production Setup") - print(" This wizard will configure your production environment,") - print(" generate secrets, bootstrap OpenBao, build images, and") - print(" start the stack.") - print() - - section("Pre-flight Checks") - - if not check_python_version(): - return False - if not check_docker(): - return False - check_ram() - check_ports() - - action = check_existing_env() - if action == "abort": - print() - info("Setup aborted.") - return False - - return True -``` - -- [ ] **Step 2: Make executable** - -```bash -chmod +x setup.py -``` - -- [ ] **Step 3: Verify script loads without errors** - -Run: `cd /Volumes/ssd01/v9/the-other-dude && python3 -c "import setup; print('OK')"` -Expected: `OK` - -- [ ] **Step 4: Commit** - -```bash -git add setup.py -git commit -m "feat(setup): add helpers, validators, and pre-flight checks" -``` - ---- - -## Chunk 3: Setup Script — Wizard Sections & Env Generation - -### Task 4: Add wizard sections and .env.prod generation - -**Files:** -- Modify: `setup.py` - -- [ ] **Step 1: Add the wizard configuration functions** - -Append to `setup.py` before the end: - -```python -# ── Secret generation ──────────────────────────────────────────────────────── - -def generate_jwt_secret() -> str: - return secrets.token_urlsafe(64) - - -def generate_encryption_key() -> str: - return base64.b64encode(secrets.token_bytes(32)).decode() - - -def generate_db_password() -> str: - return secrets.token_urlsafe(24) - - -def generate_admin_password() -> str: - return secrets.token_urlsafe(18) - - -# ── Wizard sections ───────────────────────────────────────────────────────── - -def wizard_database(config: dict) -> None: - section("Database") - info("PostgreSQL superuser password — used for migrations and admin operations.") - info("The app and poller service passwords will be auto-generated.") - print() - - config["postgres_password"] = ask( - "PostgreSQL superuser password", - required=True, - secret=True, - validate=validate_password_strength, - ) - - config["app_user_password"] = generate_db_password() - config["poller_user_password"] = generate_db_password() - config["postgres_db"] = "tod" - - ok("Database passwords configured") - info(f"app_user password: {mask_secret(config['app_user_password'])}") - info(f"poller_user password: {mask_secret(config['poller_user_password'])}") - - -def wizard_security(config: dict) -> None: - section("Security") - info("Auto-generating cryptographic keys...") - print() - - config["jwt_secret"] = generate_jwt_secret() - config["encryption_key"] = generate_encryption_key() - - ok("JWT signing key generated") - ok("Credential encryption key generated") - print() - warn("Save these somewhere safe — they cannot be recovered if lost:") - info(f"JWT_SECRET_KEY={mask_secret(config['jwt_secret'])}") - info(f"CREDENTIAL_ENCRYPTION_KEY={mask_secret(config['encryption_key'])}") - - -def wizard_admin(config: dict) -> None: - section("Admin Account") - info("The first admin account is created on initial startup.") - print() - - config["admin_email"] = ask( - "Admin email", - default="admin@the-other-dude.dev", - required=True, - validate=validate_email, - ) - - print() - info("Enter a password or press Enter to auto-generate one.") - password = ask("Admin password", secret=True) - - if password: - error = validate_password_strength(password) - while error: - warn(error) - password = ask("Admin password", secret=True, required=True, - validate=validate_password_strength) - error = None # ask() already validated - config["admin_password"] = password - config["admin_password_generated"] = False - else: - config["admin_password"] = generate_admin_password() - config["admin_password_generated"] = True - ok(f"Generated password: {bold(config['admin_password'])}") - warn("Save this now — it will not be shown again after setup.") - - -def wizard_email(config: dict) -> None: - section("Email (SMTP)") - info("Email is used for password reset links.") - print() - - if not ask_yes_no("Configure SMTP now?", default=False): - config["smtp_configured"] = False - info("Skipped — you can re-run setup.py later to configure email.") - return - - config["smtp_configured"] = True - config["smtp_host"] = ask("SMTP host", required=True) - config["smtp_port"] = ask("SMTP port", default="587") - config["smtp_user"] = ask("SMTP username (optional)") - config["smtp_password"] = ask("SMTP password (optional)", secret=True) if config["smtp_user"] else "" - config["smtp_from"] = ask("From address", required=True, validate=validate_email) - config["smtp_tls"] = ask_yes_no("Use TLS?", default=True) - - -def wizard_domain(config: dict) -> None: - section("Web / Domain") - info("Your production domain, used for CORS and email links.") - print() - - raw = ask("Production domain (e.g. tod.example.com)", required=True, validate=validate_domain) - domain = re.sub(r"^https?://", "", raw).rstrip("/") - config["domain"] = domain - config["app_base_url"] = f"https://{domain}" - config["cors_origins"] = f"https://{domain}" - - ok(f"APP_BASE_URL=https://{domain}") - ok(f"CORS_ORIGINS=https://{domain}") - - -# ── Summary ────────────────────────────────────────────────────────────────── - -def show_summary(config: dict) -> bool: - banner("Configuration Summary") - - print(f" {bold('Database')}") - print(f" POSTGRES_DB = {config['postgres_db']}") - print(f" POSTGRES_PASSWORD = {mask_secret(config['postgres_password'])}") - print(f" app_user password = {mask_secret(config['app_user_password'])}") - print(f" poller_user password = {mask_secret(config['poller_user_password'])}") - print() - - print(f" {bold('Security')}") - print(f" JWT_SECRET_KEY = {mask_secret(config['jwt_secret'])}") - print(f" ENCRYPTION_KEY = {mask_secret(config['encryption_key'])}") - print() - - print(f" {bold('Admin Account')}") - print(f" Email = {config['admin_email']}") - print(f" Password = {'(auto-generated)' if config.get('admin_password_generated') else mask_secret(config['admin_password'])}") - print() - - print(f" {bold('Email')}") - if config.get("smtp_configured"): - print(f" SMTP_HOST = {config['smtp_host']}") - print(f" SMTP_PORT = {config['smtp_port']}") - print(f" SMTP_FROM = {config['smtp_from']}") - print(f" SMTP_TLS = {config['smtp_tls']}") - else: - print(f" {dim('(not configured)')}") - print() - - print(f" {bold('Web')}") - print(f" Domain = {config['domain']}") - print(f" APP_BASE_URL = {config['app_base_url']}") - print() - - print(f" {bold('OpenBao')}") - print(f" {dim('(will be captured automatically during bootstrap)')}") - print() - - return ask_yes_no("Write .env.prod with these settings?", default=True) -``` - -- [ ] **Step 2: Add the .env.prod writer and init SQL generator** - -Append to `setup.py`: - -```python -# ── File writers ───────────────────────────────────────────────────────────── - -def write_env_prod(config: dict) -> None: - """Write the .env.prod file.""" - db = config["postgres_db"] - pg_pw = config["postgres_password"] - app_pw = config["app_user_password"] - poll_pw = config["poller_user_password"] - ts = datetime.datetime.now().isoformat(timespec="seconds") - - smtp_block = "" - if config.get("smtp_configured"): - smtp_block = f"""\ -SMTP_HOST={config['smtp_host']} -SMTP_PORT={config['smtp_port']} -SMTP_USER={config.get('smtp_user', '')} -SMTP_PASSWORD={config.get('smtp_password', '')} -SMTP_USE_TLS={'true' if config.get('smtp_tls') else 'false'} -SMTP_FROM_ADDRESS={config['smtp_from']}""" - else: - smtp_block = """\ -# Email not configured — re-run setup.py to add SMTP -SMTP_HOST= -SMTP_PORT=587 -SMTP_USER= -SMTP_PASSWORD= -SMTP_USE_TLS=true -SMTP_FROM_ADDRESS=noreply@example.com""" - - content = f"""\ -# ============================================================ -# TOD Production Environment — generated by setup.py -# Generated: {ts} -# ============================================================ - -# --- Database --- -POSTGRES_DB={db} -POSTGRES_USER=postgres -POSTGRES_PASSWORD={pg_pw} -DATABASE_URL=postgresql+asyncpg://postgres:{pg_pw}@postgres:5432/{db} -SYNC_DATABASE_URL=postgresql+psycopg2://postgres:{pg_pw}@postgres:5432/{db} -APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:{app_pw}@postgres:5432/{db} -POLLER_DATABASE_URL=postgres://poller_user:{poll_pw}@postgres:5432/{db} - -# --- Security --- -JWT_SECRET_KEY={config['jwt_secret']} -CREDENTIAL_ENCRYPTION_KEY={config['encryption_key']} - -# --- OpenBao (KMS) --- -OPENBAO_ADDR=http://openbao:8200 -OPENBAO_TOKEN=PLACEHOLDER_RUN_SETUP -BAO_UNSEAL_KEY=PLACEHOLDER_RUN_SETUP - -# --- Admin Bootstrap --- -FIRST_ADMIN_EMAIL={config['admin_email']} -FIRST_ADMIN_PASSWORD={config['admin_password']} - -# --- Email --- -{smtp_block} - -# --- Web --- -APP_BASE_URL={config['app_base_url']} -CORS_ORIGINS={config['cors_origins']} - -# --- Application --- -ENVIRONMENT=production -LOG_LEVEL=info -DEBUG=false -APP_NAME=TOD - The Other Dude - -# --- Storage --- -GIT_STORE_PATH=/data/git-store -FIRMWARE_CACHE_DIR=/data/firmware-cache -WIREGUARD_CONFIG_PATH=/data/wireguard -WIREGUARD_GATEWAY=wireguard -CONFIG_RETENTION_DAYS=90 - -# --- Redis & NATS --- -REDIS_URL=redis://redis:6379/0 -NATS_URL=nats://nats:4222 - -# --- Poller --- -POLL_INTERVAL_SECONDS=60 -CONNECTION_TIMEOUT_SECONDS=10 -COMMAND_TIMEOUT_SECONDS=30 - -# --- Remote Access --- -TUNNEL_PORT_MIN=49000 -TUNNEL_PORT_MAX=49100 -TUNNEL_IDLE_TIMEOUT=300 -SSH_RELAY_PORT=8080 -SSH_IDLE_TIMEOUT=900 - -# --- Config Backup --- -CONFIG_BACKUP_INTERVAL=21600 -CONFIG_BACKUP_MAX_CONCURRENT=10 -""" - - ENV_PROD.write_text(content) - ok(f"Wrote {ENV_PROD.name}") - - -def write_init_sql_prod(config: dict) -> None: - """Generate init-postgres-prod.sql with production passwords.""" - app_pw = config["app_user_password"] - poll_pw = config["poller_user_password"] - db = config["postgres_db"] - - content = f"""\ --- Production database init — generated by setup.py --- Passwords match those in .env.prod - -DO $$ -BEGIN - IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'app_user') THEN - CREATE ROLE app_user WITH LOGIN PASSWORD '{app_pw}' NOSUPERUSER NOCREATEDB NOCREATEROLE; - END IF; -END -$$; - -GRANT CONNECT ON DATABASE {db} TO app_user; -GRANT USAGE ON SCHEMA public TO app_user; - -DO $$ -BEGIN - IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'poller_user') THEN - CREATE ROLE poller_user WITH LOGIN PASSWORD '{poll_pw}' NOSUPERUSER NOCREATEDB NOCREATEROLE BYPASSRLS; - END IF; -END -$$; - -GRANT CONNECT ON DATABASE {db} TO poller_user; -GRANT USAGE ON SCHEMA public TO poller_user; -""" - - INIT_SQL_PROD.write_text(content) - ok(f"Wrote {INIT_SQL_PROD.name}") -``` - -- [ ] **Step 3: Verify script still loads** - -Run: `cd /Volumes/ssd01/v9/the-other-dude && python3 -c "import setup; print('OK')"` -Expected: `OK` - -- [ ] **Step 4: Commit** - -```bash -git add setup.py -git commit -m "feat(setup): add wizard sections and env file generation" -``` - ---- - -## Chunk 4: Setup Script — OpenBao Bootstrap, Build, Start, Health Check - -### Task 5: Add OpenBao bootstrap, image builds, stack start, and health checks - -**Files:** -- Modify: `setup.py` - -- [ ] **Step 1: Add OpenBao bootstrap function** - -Append to `setup.py`: - -```python -# ── Docker operations ──────────────────────────────────────────────────────── - -def run_compose(*args, check: bool = True, capture: bool = False, - timeout: int = 600) -> subprocess.CompletedProcess: - """Run a docker compose command with the prod overlay.""" - cmd = COMPOSE_CMD + ["--env-file", str(ENV_PROD)] + list(args) - return subprocess.run( - cmd, - capture_output=capture, - text=True, - timeout=timeout, - check=check, - cwd=PROJECT_ROOT, - ) - - -def bootstrap_openbao(config: dict) -> bool: - """Start OpenBao, capture credentials, update .env.prod.""" - section("OpenBao Bootstrap") - info("Starting PostgreSQL and OpenBao containers...") - - try: - run_compose("up", "-d", "postgres", "openbao") - except subprocess.CalledProcessError as e: - fail("Failed to start OpenBao containers.") - info(str(e)) - return False - - info("Waiting for OpenBao to initialize (up to 60s)...") - - # Wait for the container to be healthy - deadline = time.time() + 60 - healthy = False - while time.time() < deadline: - result = subprocess.run( - ["docker", "inspect", "--format", "{{.State.Health.Status}}", "tod_openbao"], - capture_output=True, text=True, timeout=10, - ) - status = result.stdout.strip() - if status == "healthy": - healthy = True - break - time.sleep(2) - - if not healthy: - fail("OpenBao did not become healthy within 60 seconds.") - warn("Your .env.prod has placeholder tokens. To fix manually:") - info(" docker compose logs openbao") - info(" Look for BAO_UNSEAL_KEY and OPENBAO_TOKEN lines") - info(" Update .env.prod with those values") - return False - - ok("OpenBao is healthy") - - # Parse credentials from container logs - info("Capturing OpenBao credentials from logs...") - result = subprocess.run( - ["docker", "compose", "-f", COMPOSE_BASE, "-f", COMPOSE_PROD, "logs", "openbao"], - capture_output=True, text=True, timeout=30, cwd=PROJECT_ROOT, - ) - - logs = result.stdout + result.stderr - unseal_match = re.search(r"BAO_UNSEAL_KEY=(\S+)", logs) - token_match = re.search(r"OPENBAO_TOKEN=(\S+)", logs) - - if unseal_match and token_match: - unseal_key = unseal_match.group(1) - root_token = token_match.group(1) - - # Update .env.prod - env_content = ENV_PROD.read_text() - env_content = env_content.replace("OPENBAO_TOKEN=PLACEHOLDER_RUN_SETUP", - f"OPENBAO_TOKEN={root_token}") - env_content = env_content.replace("BAO_UNSEAL_KEY=PLACEHOLDER_RUN_SETUP", - f"BAO_UNSEAL_KEY={unseal_key}") - ENV_PROD.write_text(env_content) - - ok("OpenBao credentials captured and saved to .env.prod") - info(f"OPENBAO_TOKEN={mask_secret(root_token)}") - info(f"BAO_UNSEAL_KEY={mask_secret(unseal_key)}") - return True - else: - # OpenBao was already initialized — check if .env.prod has real values - env_content = ENV_PROD.read_text() - if "PLACEHOLDER_RUN_SETUP" in env_content: - warn("Could not find credentials in logs (OpenBao may already be initialized).") - warn("Check 'docker compose logs openbao' and update .env.prod manually.") - return False - else: - ok("OpenBao already initialized — existing credentials in .env.prod") - return True -``` - -- [ ] **Step 2: Add image build function** - -Append to `setup.py`: - -```python -def build_images() -> bool: - """Build Docker images one at a time to avoid OOM.""" - section("Building Images") - info("Building images sequentially to avoid memory issues...") - print() - - services = ["api", "poller", "frontend", "winbox-worker"] - - for i, service in enumerate(services, 1): - info(f"[{i}/{len(services)}] Building {service}...") - try: - run_compose("build", service, timeout=900) - ok(f"{service} built successfully") - except subprocess.CalledProcessError: - fail(f"Failed to build {service}") - print() - warn("To retry this build:") - info(f" docker compose -f {COMPOSE_BASE} -f {COMPOSE_PROD} build {service}") - return False - except subprocess.TimeoutExpired: - fail(f"Build of {service} timed out (15 min)") - return False - - print() - ok("All images built successfully") - return True -``` - -- [ ] **Step 3: Add stack start and health check functions** - -Append to `setup.py`: - -```python -def start_stack() -> bool: - """Start the full stack.""" - section("Starting Stack") - info("Bringing up all services...") - - try: - run_compose("up", "-d") - ok("Stack started") - return True - except subprocess.CalledProcessError as e: - fail("Failed to start stack") - info(str(e)) - return False - - -def health_check(config: dict) -> None: - """Poll service health for up to 60 seconds.""" - section("Health Check") - info("Checking service health (up to 60s)...") - print() - - services = [ - ("tod_postgres", "PostgreSQL"), - ("tod_redis", "Redis"), - ("tod_nats", "NATS"), - ("tod_openbao", "OpenBao"), - ("tod_api", "API"), - ("tod_poller", "Poller"), - ("tod_frontend", "Frontend"), - ("tod_winbox_worker", "WinBox Worker"), - ] - - deadline = time.time() + 60 - pending = dict(services) - - while pending and time.time() < deadline: - for container, label in list(pending.items()): - try: - result = subprocess.run( - ["docker", "inspect", "--format", - "{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}", - container], - capture_output=True, text=True, timeout=5, - ) - status = result.stdout.strip() - if status in ("healthy", "running"): - ok(f"{label}: {status}") - del pending[container] - except Exception: - pass - - if pending: - time.sleep(3) - - for container, label in pending.items(): - fail(f"{label}: not healthy") - info(f" Check logs: docker compose logs {container.replace('tod_', '')}") - - # Final summary - print() - if not pending: - banner("Setup Complete!") - print(f" {bold('Access your instance:')}") - print(f" URL: {green(config['app_base_url'])}") - print(f" Email: {config['admin_email']}") - if config.get("admin_password_generated"): - print(f" Password: {bold(config['admin_password'])}") - else: - print(f" Password: (the password you entered)") - print() - info("Change the admin password after your first login.") - else: - warn("Some services are not healthy. Check the logs above.") - info(f" docker compose -f {COMPOSE_BASE} -f {COMPOSE_PROD} logs") -``` - -- [ ] **Step 4: Add the main function and signal handler** - -Append to `setup.py`: - -```python -# ── Main ───────────────────────────────────────────────────────────────────── - -def main() -> int: - # Graceful Ctrl+C - env_written = False - - def handle_sigint(sig, frame): - nonlocal env_written - print() - if not env_written: - info("Aborted before writing .env.prod — no files changed.") - else: - warn(f".env.prod was already written to {ENV_PROD}") - info("OpenBao tokens may still be placeholders if bootstrap didn't complete.") - sys.exit(1) - - signal.signal(signal.SIGINT, handle_sigint) - - os.chdir(PROJECT_ROOT) - - # Phase 1: Pre-flight - if not preflight(): - return 1 - - # Phase 2: Wizard - config: dict = {} - wizard_database(config) - wizard_security(config) - wizard_admin(config) - wizard_email(config) - wizard_domain(config) - - # Summary - if not show_summary(config): - info("Setup cancelled.") - return 1 - - # Phase 3: Write files - section("Writing Configuration") - write_env_prod(config) - write_init_sql_prod(config) - env_written = True - - # Phase 4: OpenBao - bao_ok = bootstrap_openbao(config) - - # Phase 5: Build - if not build_images(): - warn("Fix the build error and re-run setup.py to continue.") - return 1 - - # Phase 6: Start - if not start_stack(): - return 1 - - # Phase 7: Health - health_check(config) - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) -``` - -- [ ] **Step 5: Verify complete script loads and has main** - -Run: `cd /Volumes/ssd01/v9/the-other-dude && python3 -c "from setup import main; print('OK')"` -Expected: `OK` - -- [ ] **Step 6: Commit** - -```bash -git add setup.py -git commit -m "feat(setup): add OpenBao bootstrap, builds, start, and health checks" -``` - ---- - -## Chunk 5: Docker Compose — Mount Production Init SQL - -### Task 6: Mount init-postgres-prod.sql in production compose - -**Files:** -- Modify: `docker-compose.prod.yml` - -- [ ] **Step 1: Add postgres volume override for prod init SQL** - -In `docker-compose.prod.yml`, add a `postgres` service override to mount the production init SQL instead of the dev one. Add before the `api` service (or anywhere in services): - -```yaml - postgres: - volumes: - - ./docker-data/postgres:/var/lib/postgresql/data - - ./scripts/init-postgres-prod.sql:/docker-entrypoint-initdb.d/init.sql:ro - healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres -d ${POSTGRES_DB:-tod}"] - interval: 5s - timeout: 5s - retries: 5 -``` - -This overrides the base `docker-compose.yml` volumes for postgres, mounting the prod init SQL with generated passwords instead of the dev one with hardcoded passwords. - -- [ ] **Step 2: Commit** - -```bash -git add docker-compose.prod.yml -git commit -m "feat(setup): mount production init SQL and use env var for healthcheck" -``` - ---- - -## Chunk 6: Final Integration - -### Task 7: End-to-end verification - -- [ ] **Step 1: Verify all modified files are consistent** - -Run these checks: -```bash -# Ensure no remaining 'mikrotik' references in key files -grep -r "mikrotik" docker-compose.yml docker-compose.prod.yml scripts/init-postgres.sql .env.example -# Expected: no output - -# Verify setup.py syntax -python3 -m py_compile setup.py -# Expected: no output (success) - -# Verify login.tsx has the DEV guard -grep -A2 "import.meta.env.DEV" frontend/src/routes/login.tsx -# Expected: shows the DEV-gated hint block -``` - -- [ ] **Step 2: Verify no remaining mikrotik references anywhere** - -```bash -grep -r "mikrotik" docker-compose*.yml scripts/init-postgres.sql .env.example .env.staging.example 2>/dev/null || echo "All clear" -``` - -Expected: `All clear` (no output from grep) diff --git a/docs/superpowers/specs/2026-03-14-saas-tiers-design.md b/docs/superpowers/specs/2026-03-14-saas-tiers-design.md deleted file mode 100644 index 87e1585..0000000 --- a/docs/superpowers/specs/2026-03-14-saas-tiers-design.md +++ /dev/null @@ -1,346 +0,0 @@ -# SaaS Tiers, Invite System & Plan Limits — Design Spec - -## Overview - -Add a tier/quota system to TOD that limits tenants, devices, and users per account. Includes a one-time invite system for onboarding new users with configurable plan limits, and a disabled-by-default public signup page for future homelab tier. - -**Branch:** `saas-tiers` (diverges from open-source `main`) - -**Target audience:** Super_admin (you) managing a SaaS offering, and invited users who manage their own tenants. - -## Design Decisions - -- **No new "Account" model** — the existing user model is extended. The "account owner" is just the user who accepted an invite and creates tenants. -- **Per-user plan limits** — a `plan_limits` table keyed by user ID stores max_tenants, max_devices_per_tenant, max_users_per_tenant. -- **No limits row = no limits** — super_admin users never have a `plan_limits` row, so they're unlimited. -- **Invite-based onboarding** — super_admin generates one-time invite links (32 bytes / 256 bits entropy). No self-registration yet (homelab signup page exists but is disabled by default). -- **Existing RBAC preserved** — invited users become `tenant_admin` of tenants they create. No new roles. -- **`user_tenants` join table** — tracks which tenants a user belongs to and their role in each. The user's `tenant_id` column becomes the "currently active" tenant. Tenant switching updates this field and re-issues the JWT. -- **Enforcement at creation time** — limits are checked when creating devices, users, or tenants. Not on every request. -- **Invited users start as bcrypt** — with `must_upgrade_auth=True`. SRP upgrade happens on first login, consistent with existing user creation flow. - -## Data Model - -### New Tables - -#### `plan_limits` - -Per-user quota configuration. If no row exists for a user, no limits are enforced (super_admin behavior). - -| Column | Type | Default | Description | -|--------|------|---------|-------------| -| `id` | UUID PK | gen_random_uuid() | Primary key | -| `user_id` | UUID FK (users.id), unique | — | The account owner | -| `max_tenants` | integer | 2 | Max tenants this user can own | -| `max_devices_per_tenant` | integer | 10 | Max devices per tenant | -| `max_users_per_tenant` | integer | 10 | Max users per tenant (0 = owner only) | -| `plan_name` | varchar(50) | "invite" | Plan identifier: "invite", "homelab", "custom" | -| `created_at` | timestamptz | now() | | -| `updated_at` | timestamptz | now() | | - -RLS policy: super_admin can read/write all rows. Users can read their own row. - -#### `user_tenants` - -Join table tracking which tenants a user belongs to and their role in each. - -| Column | Type | Default | Description | -|--------|------|---------|-------------| -| `id` | UUID PK | gen_random_uuid() | Primary key | -| `user_id` | UUID FK (users.id) | — | The user | -| `tenant_id` | UUID FK (tenants.id) | — | The tenant | -| `role` | varchar(50) | "tenant_admin" | User's role in this tenant | -| `created_at` | timestamptz | now() | | - -Unique constraint on `(user_id, tenant_id)`. This table allows a single user (single email, single password) to be a member of multiple tenants without duplicating user rows. - -The existing `users.tenant_id` column is retained as the "currently active" tenant. The `switch-tenant` endpoint updates this field and re-issues the JWT. - -#### `invites` - -One-time invite tokens generated by super_admin. - -| Column | Type | Default | Description | -|--------|------|---------|-------------| -| `id` | UUID PK | gen_random_uuid() | Primary key | -| `token` | varchar(64), unique, indexed | — | URL-safe random token (32 bytes / 256 bits entropy) | -| `plan_name` | varchar(50) | "invite" | Plan to assign when claimed | -| `created_by` | UUID FK (users.id) | — | Super_admin who created it | -| `claimed_by` | UUID FK (users.id), nullable | — | User who claimed it | -| `claimed_at` | timestamptz, nullable | — | When it was claimed | -| `expires_at` | timestamptz | — | 7 days from creation | -| `created_at` | timestamptz | now() | | - -No RLS — only accessible via super_admin endpoints and the public claim endpoint (which validates the token directly). - -### Modified Tables - -#### `tenants` - -Add column: - -| Column | Type | Default | Description | -|--------|------|---------|-------------| -| `owner_id` | UUID FK (users.id), nullable | — | User who created/owns this tenant. Null for bootstrap/super_admin-created tenants (always unlimited). | - -#### `system_settings` - -New key-value entry: - -| Key | Default Value | Description | -|-----|---------------|-------------| -| `homelab_signup_enabled` | `"false"` | Controls public signup page visibility | - -### Default Plan Values - -| Plan | max_tenants | max_devices_per_tenant | max_users_per_tenant | -|------|------------|----------------------|---------------------| -| invite | 2 | 10 | 10 | -| homelab | 1 | 5 | 0 (owner only) | -| custom | (set by super_admin) | (set by super_admin) | (set by super_admin) | - -### Migration Notes - -- Existing tenants get `owner_id = NULL` (treated as unlimited / super_admin-owned). -- Existing users get a corresponding `user_tenants` row for their current `tenant_id` with their current `role`. -- No `plan_limits` rows are created for existing users (unlimited by default). - -## Enforcement Logic - -Limits are checked at creation time only — not on every request. - -### Device Creation - -**Endpoints:** `POST /api/tenants/{id}/devices`, VPN onboard endpoint - -1. Look up tenant's `owner_id` -2. If `owner_id` is NULL → no limit (super_admin-owned tenant) -3. Look up `plan_limits` for owner. If no row → no limit. -4. Count devices in tenant (within the same transaction for onboard) -5. If count >= `max_devices_per_tenant` → return 422: `"Device limit reached (5/5)"` - -### User Creation - -**Endpoint:** `POST /api/tenants/{id}/users` - -1. Look up tenant's `owner_id` -2. If `owner_id` is NULL → no limit. -3. Look up `plan_limits` for owner. If no row → no limit. -4. Count active users in tenant -5. If count >= `max_users_per_tenant` → return 422: `"User limit reached (10/10)"` -6. Homelab plan (`max_users_per_tenant = 0`) means only the owner exists — no additional users. - -### Tenant Creation - -**Endpoint:** `POST /api/tenants` - -Currently super_admin only. Change: allow users with a `plan_limits` row to create tenants within their limit. - -1. Look up `plan_limits` for current user. If no row → no limit (super_admin). -2. Count tenants where `owner_id = current_user.id` -3. If count >= `max_tenants` → return 422: `"Tenant limit reached (2/2)"` -4. Create tenant with `owner_id = current_user.id` -5. Add `user_tenants` row: `(current_user.id, new_tenant.id, "tenant_admin")` -6. Update `users.tenant_id = new_tenant.id` (switch to the new tenant) - -## Invite System - -### Creating Invites (Super_admin) - -**Endpoint:** `POST /api/invites` - -Rate limit: 20/minute - -Request body: -```json -{ - "plan_name": "invite" // optional, defaults to "invite" -} -``` - -Response: -```json -{ - "id": "uuid", - "token": "abc123...", - "url": "https://app.theotherdude.net/invite/abc123...", - "plan_name": "invite", - "expires_at": "2026-03-21T16:00:00Z", - "created_at": "2026-03-14T16:00:00Z" -} -``` - -- Generates 32-byte URL-safe random token (256 bits entropy — brute force infeasible) -- Sets `expires_at` to 7 days from now - -### Managing Invites (Super_admin) - -- `GET /api/invites` — list all invites with status (pending/claimed/expired) -- `DELETE /api/invites/{id}` — revoke an unclaimed invite - -### Validating an Invite (Public) - -**Endpoint:** `GET /api/invites/{token}/validate` - -Rate limit: 5/minute per IP - -No auth required. Returns: -```json -{ "valid": true } -``` - -Or `{ "valid": false }` — no reason disclosed to prevent information leakage about token states. - -### Claiming an Invite (Public) - -**Endpoint:** `POST /api/invites/{token}/claim` - -Rate limit: 5/minute per IP - -No auth required. Request body: -```json -{ - "name": "Jane Doe", - "email": "jane@example.com", - "password": "securepassword123" -} -``` - -Flow: -1. Validate token (exists, not claimed, not expired). Return generic 400 "Invalid or expired invite" for any failure (no distinction between expired/claimed/not-found). -2. Check email uniqueness globally -3. Create user with `role = "tenant_admin"`, `tenant_id = NULL`, `must_upgrade_auth = True` (bcrypt, upgrades to SRP on first login) -4. Create `plan_limits` row with plan defaults based on `invite.plan_name` -5. Mark invite as claimed (`claimed_by`, `claimed_at`) -6. Issue JWT with special `onboarding = true` claim (see Onboarding State below) -7. Frontend redirects to tenant creation page - -### Onboarding State - -After claiming an invite, the user has `tenant_id = NULL` and `role = "tenant_admin"`. The existing RLS middleware blocks non-super_admin users with no tenant. To handle this: - -- The JWT issued during claim includes an `onboarding: true` claim -- The tenant context middleware is modified: if `onboarding = true`, allow access to a whitelist of endpoints only: - - `POST /api/tenants` (create first tenant) - - `GET /api/plan/usage` (see their limits) - - `POST /api/auth/logout` -- All other endpoints return 403: "Please create a tenant first" -- After creating their first tenant, the user gets a normal JWT with `tenant_id` set - -## Tenant Switching - -Users who belong to multiple tenants can switch between them. - -**Endpoint:** `POST /api/auth/switch-tenant` - -Request body: -```json -{ - "tenant_id": "uuid" -} -``` - -Flow: -1. Look up `user_tenants` for `(current_user.id, target_tenant_id)`. If no row → 403 "You do not have access to this tenant". -2. Update `users.tenant_id = target_tenant_id` -3. Issue new JWT with the target `tenant_id` and the role from `user_tenants.role` -4. Return new access token + refresh token - -**Listing available tenants:** - -`GET /api/auth/tenants` — returns all tenants the current user belongs to (from `user_tenants`), including the currently active one. - -## API Summary - -### New Endpoints - -| Method | Path | Auth | Rate Limit | Description | -|--------|------|------|------------|-------------| -| `POST` | `/api/invites` | super_admin | 20/min | Create invite | -| `GET` | `/api/invites` | super_admin | — | List all invites | -| `DELETE` | `/api/invites/{id}` | super_admin | 5/min | Revoke invite | -| `GET` | `/api/invites/{token}/validate` | public | 5/min/IP | Check if invite is valid | -| `POST` | `/api/invites/{token}/claim` | public | 5/min/IP | Register via invite | -| `POST` | `/api/auth/switch-tenant` | authenticated | 20/min | Switch active tenant | -| `GET` | `/api/auth/tenants` | authenticated | — | List user's tenants | -| `GET` | `/api/settings/signup-status` | public | — | Check if homelab signup is enabled | -| `GET` | `/api/plan/usage` | authenticated | — | Get current plan limits and usage | -| `PUT` | `/api/admin/users/{user_id}/plan` | super_admin | 20/min | Update a user's plan limits | - -### Modified Endpoints - -| Method | Path | Change | -|--------|------|--------| -| `POST` | `/api/tenants` | Allow users with plan_limits to create; set `owner_id`; add `user_tenants` row | -| `POST` | `/api/tenants/{id}/devices` | Add device limit enforcement | -| `POST` | `/api/tenants/{id}/vpn/peers/onboard` | Add device limit enforcement (before device creation in transaction) | -| `POST` | `/api/tenants/{id}/users` | Add user limit enforcement | - -### Usage Response Schema - -`GET /api/plan/usage` returns: - -```json -{ - "plan_name": "invite", - "tenants": { "current": 1, "max": 2 }, - "active_tenant": { - "tenant_id": "uuid", - "devices": { "current": 3, "max": 10 }, - "users": { "current": 2, "max": 10 } - } -} -``` - -Returns device/user counts for the currently active tenant. - -## Frontend Changes - -### New Pages - -- **`/invite/{token}`** — public invite claim page. Standalone (not behind auth). Shows registration form or "Invalid or expired invite" error. -- **`/signup`** — public homelab signup page. Disabled by default. Shows "Not accepting signups" when `homelab_signup_enabled` is false. -- **`/settings/invites`** — super_admin invite management. Create, list, copy link, revoke. - -### Modified Components - -- **Top nav / sidebar** — tenant switcher dropdown for users who belong to multiple tenants. Shows current tenant name, lists available tenants from `GET /api/auth/tenants`, "Create Tenant" option if under limit. -- **Tenant list** — "Create Tenant" button visible to users with a plan_limits row (not just super_admin). Disabled with tooltip if at limit. -- **Tenant detail (super_admin view)** — shows plan limits and current usage. Editable by super_admin. -- **Device list** — subtle usage indicator: "3/10 devices" near the header. Only shown when limits exist. -- **User list** — subtle usage indicator: "2/10 users" near the header. Only shown when limits exist. -- **System settings (super_admin)** — "Enable homelab signups" toggle. - -## Audit Logging - -The following operations produce audit log entries: - -- Invite created (by super_admin) -- Invite claimed (by new user) -- Invite revoked (by super_admin) -- Tenant created by non-super_admin user -- Tenant switched -- Plan limits updated by super_admin - -## Error Handling - -| Scenario | HTTP Status | Message | -|----------|-------------|---------| -| Device limit reached | 422 | "Device limit reached ({count}/{max})" | -| User limit reached | 422 | "User limit reached ({count}/{max})" | -| Tenant limit reached | 422 | "Tenant limit reached ({count}/{max})" | -| Invalid/expired/claimed invite | 400 | "Invalid or expired invite" | -| Email already registered | 409 | "Email already in use" | -| Signup disabled | 403 | "Not accepting signups at this time" | -| Switch to unjoined tenant | 403 | "You do not have access to this tenant" | -| Onboarding user hits non-whitelisted endpoint | 403 | "Please create a tenant first" | - -## Out of Scope - -- Billing / Paddle integration -- Homelab self-registration activation (page exists but disabled) -- VPN per-tenant network isolation (separate spec) -- Email notifications for invites (super_admin copies the link) -- Usage metering / analytics dashboard -- Plan upgrade/downgrade flows -- Tenant deletion by non-super_admin users (remains super_admin only) diff --git a/docs/superpowers/specs/2026-03-14-setup-script-design.md b/docs/superpowers/specs/2026-03-14-setup-script-design.md deleted file mode 100644 index 68bc1cf..0000000 --- a/docs/superpowers/specs/2026-03-14-setup-script-design.md +++ /dev/null @@ -1,249 +0,0 @@ -# TOD Production Setup Script — Design Spec - -## Overview - -An interactive Python setup wizard (`setup.py`) that walks a sysadmin through configuring and deploying TOD (The Other Dude) for production. The script minimizes manual configuration by auto-generating secrets, capturing OpenBao credentials automatically, building images sequentially, and verifying service health. - -**Target audience:** Technical sysadmins unfamiliar with this specific project. - -## Design Decisions - -- **Python 3.10+** — already required by the stack, enables rich input handling and colored output. -- **Linear wizard with opinionated defaults** — grouped sections, auto-generate everything possible, only prompt for genuine human decisions. -- **Integrated OpenBao bootstrap** — script starts the OpenBao container, captures unseal key and root token, updates `.env.prod` automatically (no manual copy-paste). -- **Sequential image builds** — builds api, poller, frontend, winbox-worker one at a time to avoid OOM on low-RAM machines. -- **Re-runnable** — safe to run again; detects existing `.env.prod` and offers to overwrite, back up (`.env.prod.backup.`), or abort. - -## Prerequisite: Database Rename - -The codebase currently uses `mikrotik` as the database name. Before the setup script can use `tod`, these files must be updated: - -- `docker-compose.yml` — default `POSTGRES_DB` and healthcheck (`pg_isready -d`) -- `docker-compose.prod.yml` — hardcoded poller `DATABASE_URL` (change to `${POLLER_DATABASE_URL}`) -- `docker-compose.staging.yml` — if applicable -- `scripts/init-postgres.sql` — `GRANT CONNECT ON DATABASE` statements -- `.env.example` — all URL references - -The setup script will use `POSTGRES_DB=tod`. These file changes are part of the implementation, not runtime. - -Additionally, `docker-compose.prod.yml` hardcodes the poller's `DATABASE_URL`. This must be changed to `DATABASE_URL: ${POLLER_DATABASE_URL}` so the setup script's generated value is used. - -## Script Flow - -### Phase 1: Pre-flight Checks - -- Verify Python 3.10+ -- Verify Docker Engine and Docker Compose v2 are installed and the daemon is running -- Check for existing `.env.prod` — if found, offer: overwrite / back up and create new / abort -- Warn if less than 4GB RAM available -- Check if key ports are in use (5432, 6379, 4222, 8001, 3000, 51820) and warn - -### Phase 2: Interactive Configuration (Linear Wizard) - -Six sections, presented in order: - -#### 2.1 Database - -| Prompt | Default | Notes | -|--------|---------|-------| -| PostgreSQL superuser password | (required, no default) | Validated non-empty, min 12 chars | - -Auto-generated: -- `POSTGRES_DB=tod` -- `app_user` password via `secrets.token_urlsafe(24)` (yields ~32 base64 chars) -- `poller_user` password via `secrets.token_urlsafe(24)` (yields ~32 base64 chars) -- `DATABASE_URL=postgresql+asyncpg://postgres:@postgres:5432/tod` -- `SYNC_DATABASE_URL=postgresql+psycopg2://postgres:@postgres:5432/tod` -- `APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:@postgres:5432/tod` -- `POLLER_DATABASE_URL=postgres://poller_user:@postgres:5432/tod` - -#### 2.2 Security - -No prompts. Auto-generated: -- `JWT_SECRET_KEY` via `secrets.token_urlsafe(64)` (yields ~86 base64 chars) -- `CREDENTIAL_ENCRYPTION_KEY` via `base64(secrets.token_bytes(32))` (yields 44 base64 chars) -- Display both values to the user with a "save these somewhere safe" note - -#### 2.3 Admin Account - -| Prompt | Default | Notes | -|--------|---------|-------| -| Admin email | `admin@the-other-dude.dev` | Validated as email-like | -| Admin password | (enter or press Enter to generate) | Min 12 chars if manual; generated passwords are 24 chars | - -#### 2.4 Email (Optional) - -| Prompt | Default | Notes | -|--------|---------|-------| -| Configure SMTP now? | No | If no, skip with reminder | -| SMTP host | (required if yes) | | -| SMTP port | 587 | | -| SMTP username | (optional) | | -| SMTP password | (optional) | | -| From address | (required if yes) | | -| Use TLS? | Yes | | - -#### 2.5 Web / Domain - -| Prompt | Default | Notes | -|--------|---------|-------| -| Production domain | (required) | e.g. `tod.staack.com` | - -Auto-derived: -- `APP_BASE_URL=https://` -- `CORS_ORIGINS=https://` - -#### 2.6 Summary & Confirmation - -Display all settings grouped by section. Secrets are partially masked (first 8 chars + `...`). Ask for confirmation before writing. - -### Phase 3: Write `.env.prod` - -Write the file with section comments and timestamp header. Also generate `scripts/init-postgres-prod.sql` with the generated `app_user` and `poller_user` passwords baked in (PostgreSQL init scripts don't support env var substitution). - -Format: - -```bash -# ============================================================ -# TOD Production Environment — generated by setup.py -# Generated: -# ============================================================ - -# --- Database --- -POSTGRES_DB=tod -POSTGRES_USER=postgres -POSTGRES_PASSWORD= -DATABASE_URL=postgresql+asyncpg://postgres:@postgres:5432/tod -SYNC_DATABASE_URL=postgresql+psycopg2://postgres:@postgres:5432/tod -APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:@postgres:5432/tod -POLLER_DATABASE_URL=postgres://poller_user:@postgres:5432/tod - -# --- Security --- -JWT_SECRET_KEY= -CREDENTIAL_ENCRYPTION_KEY= - -# --- OpenBao (KMS) --- -OPENBAO_ADDR=http://openbao:8200 -OPENBAO_TOKEN=PLACEHOLDER_RUN_SETUP -BAO_UNSEAL_KEY=PLACEHOLDER_RUN_SETUP - -# --- Admin Bootstrap --- -FIRST_ADMIN_EMAIL= -FIRST_ADMIN_PASSWORD= - -# --- Email --- -# -SMTP_HOST= -SMTP_PORT=587 -SMTP_USER= -SMTP_PASSWORD= -SMTP_USE_TLS=true -SMTP_FROM_ADDRESS=noreply@example.com - -# --- Web --- -APP_BASE_URL=https:// -CORS_ORIGINS=https:// - -# --- Application --- -ENVIRONMENT=production -LOG_LEVEL=info -DEBUG=false -APP_NAME=TOD - The Other Dude - -# --- Storage --- -GIT_STORE_PATH=/data/git-store -FIRMWARE_CACHE_DIR=/data/firmware-cache -WIREGUARD_CONFIG_PATH=/data/wireguard -WIREGUARD_GATEWAY=wireguard -CONFIG_RETENTION_DAYS=90 - -# --- Redis & NATS --- -REDIS_URL=redis://redis:6379/0 -NATS_URL=nats://nats:4222 - -# --- Poller --- -POLL_INTERVAL_SECONDS=60 -CONNECTION_TIMEOUT_SECONDS=10 -COMMAND_TIMEOUT_SECONDS=30 - -# --- Remote Access --- -TUNNEL_PORT_MIN=49000 -TUNNEL_PORT_MAX=49100 -TUNNEL_IDLE_TIMEOUT=300 -SSH_RELAY_PORT=8080 -SSH_IDLE_TIMEOUT=900 - -# --- Config Backup --- -CONFIG_BACKUP_INTERVAL=21600 -CONFIG_BACKUP_MAX_CONCURRENT=10 -``` - -### Phase 4: OpenBao Bootstrap - -1. Start postgres and openbao containers only: `docker compose -f docker-compose.yml -f docker-compose.prod.yml --env-file .env.prod up -d postgres openbao` -2. Wait for openbao container to be healthy (timeout 60s) -3. Run `docker compose logs openbao 2>&1` and parse the `OPENBAO_TOKEN=` and `BAO_UNSEAL_KEY=` lines using regex (init.sh prints these to stdout during container startup, which is captured in Docker logs) -4. Update `.env.prod` by replacing the `PLACEHOLDER_RUN_SETUP` values with the captured credentials -5. On failure: `.env.prod` retains placeholders, print instructions for manual capture via `docker compose logs openbao` - -### Phase 5: Build Images - -Build sequentially to avoid OOM: - -``` -docker compose -f docker-compose.yml -f docker-compose.prod.yml build api -docker compose -f docker-compose.yml -f docker-compose.prod.yml build poller -docker compose -f docker-compose.yml -f docker-compose.prod.yml build frontend -docker compose -f docker-compose.yml -f docker-compose.prod.yml build winbox-worker -``` - -Show progress for each. On failure: stop, report which image failed, suggest rerunning. - -### Phase 6: Start Stack - -``` -docker compose -f docker-compose.yml -f docker-compose.prod.yml --env-file .env.prod up -d -``` - -### Phase 7: Health Check - -- Poll service health for up to 60 seconds -- Report status of: postgres, redis, nats, openbao, api, poller, frontend, winbox-worker -- On success: print access URL (`https://`) and admin credentials -- On timeout: report which services are unhealthy, suggest `docker compose logs ` - -## Database Init Script - -`scripts/init-postgres.sql` hardcodes `app_password` and `poller_password`. Since PostgreSQL's `docker-entrypoint-initdb.d` scripts don't support environment variable substitution, the setup script generates `scripts/init-postgres-prod.sql` with the actual generated passwords baked in. The docker-compose.prod.yml volume mount will be updated to use this file instead. - -## Login Page Fix - -`frontend/src/routes/login.tsx` lines 235-241 contain a "First time?" hint showing `.env` credential names. This will be wrapped in `{import.meta.env.DEV && (...)}` so it only appears in development builds. Vite's production build strips DEV-gated code entirely. - -## Error Handling - -| Scenario | Behavior | -|----------|----------| -| Docker not installed/running | Fail early with clear message | -| Existing `.env.prod` | Offer: overwrite / back up / abort | -| Port already in use | Warn (non-blocking) with which port and likely culprit | -| OpenBao init fails | `.env.prod` retains placeholders, print manual capture steps | -| Image build fails | Stop, show failed image, suggest retry command | -| Health check timeout (60s) | Report unhealthy services, suggest log commands | -| Ctrl+C before Phase 3 | Graceful exit, no files written | -| Ctrl+C during/after Phase 3 | `.env.prod` exists (possibly with placeholders), noted on exit | - -## Re-runnability - -- Detects existing `.env.prod` and offers choices -- Won't regenerate secrets if valid ones exist (offers to keep or regenerate) -- OpenBao re-init is idempotent (init.sh handles already-initialized state) -- Image rebuilds are safe (Docker layer caching) -- Backup naming: `.env.prod.backup.` - -## Dependencies - -- Python 3.10+ (stdlib only — no pip packages required) -- Docker Engine 24+ -- Docker Compose v2 -- Stdlib modules: `secrets`, `subprocess`, `shutil`, `json`, `re`, `datetime`, `pathlib`, `getpass`, `socket` (for port checks) diff --git a/docs/superpowers/specs/2026-03-14-vpn-isolation-design.md b/docs/superpowers/specs/2026-03-14-vpn-isolation-design.md deleted file mode 100644 index fc30022..0000000 --- a/docs/superpowers/specs/2026-03-14-vpn-isolation-design.md +++ /dev/null @@ -1,274 +0,0 @@ -# Per-Tenant VPN Network Isolation — Design Spec - -## Overview - -Isolate WireGuard VPN networks per tenant so that devices in one tenant's VPN cannot reach devices in another tenant's VPN. Each tenant gets a unique `/24` subnet auto-allocated from `10.10.0.0/16`, with iptables rules blocking cross-subnet traffic. - -**Branch:** `main` (this is a security fix, not SaaS-specific) - -## Design Decisions - -- **Single `wg0` interface** — WireGuard handles thousands of peers on one interface with negligible performance impact. No need for per-tenant interfaces. -- **Per-tenant `/24` subnets** — allocated from `10.10.0.0/16`, giving 255 tenants (index 1–255). Index 0 is reserved. Expandable to `10.0.0.0/8` if needed (note: `_next_available_ip()` materializes all hosts in the subnet, so subnets larger than `/24` require refactoring that function). -- **Auto-allocation only** — `setup_vpn()` picks the next available subnet. No manual override. -- **Global config sync** — one `wg0.conf` with all tenants' peers. Rebuilt on any VPN change. Protected by a PostgreSQL advisory lock to prevent concurrent writes. -- **Global server keypair** — a single WireGuard server keypair stored in `system_settings`, replacing per-tenant server keys. Generated on first `setup_vpn()` call or during migration. -- **iptables isolation** — cross-subnet traffic blocked at the WireGuard container's firewall. IPv6 blocked too. -- **Device-side config is untrusted** — isolation relies entirely on server-side enforcement (AllowedIPs `/32` + iptables DROP). A malicious device operator changing their `allowed-address` to `10.10.0.0/16` on their router gains nothing — the server only routes their assigned `/32`. - -## Data Model Changes - -### Modified: `vpn_config` - -| Column | Change | Description | -|--------|--------|-------------| -| `subnet_index` | **New column**, integer, unique, not null | Maps to third octet: index 1 = `10.10.1.0/24` | -| `subnet` | Default changes | No longer `10.10.0.0/24`; derived from `subnet_index` | -| `server_address` | Default changes | No longer `10.10.0.1/24`; derived as `10.10.{index}.1/24` | -| `server_private_key` | **Deprecated** | Kept in table for rollback safety but no longer used. Global key in `system_settings` is authoritative. | -| `server_public_key` | **Deprecated** | Same — kept but unused. All peers use the global public key. | - -### New: `system_settings` entries - -| Key | Description | -|-----|-------------| -| `vpn_server_private_key` | Global WireGuard server private key (encrypted with CREDENTIAL_ENCRYPTION_KEY) | -| `vpn_server_public_key` | Global WireGuard server public key (plaintext) | - -### Allocation Logic - -``` -subnet_index = first available integer in range [1, 255] not already in vpn_config -subnet = 10.10.{subnet_index}.0/24 -server_address = 10.10.{subnet_index}.1/24 -``` - -Allocation query (atomic, gap-filling): -```sql -SELECT MIN(x) FROM generate_series(1, 255) AS x -WHERE x NOT IN (SELECT subnet_index FROM vpn_config) -``` - -If no index available → 422 "VPN subnet pool exhausted". - -Unique constraint on `subnet_index` provides safety against race conditions. On conflict, retry once. - -## VPN Service Changes - -### `setup_vpn(db, tenant_id, endpoint)` - -Current behavior: creates VpnConfig with hardcoded `10.10.0.0/24` and generates a per-tenant server keypair. - -New behavior: -1. **Get or create global server keypair:** check `system_settings` for `vpn_server_private_key`. If not found, generate a new keypair and store both the private key (encrypted) and public key. This happens on the first `setup_vpn()` call on a fresh install. -2. Allocate next `subnet_index` using the gap-filling query -3. Set `subnet = 10.10.{index}.0/24` -4. Set `server_address = 10.10.{index}.1/24` -5. Store the global public key in `server_public_key` (for backward compat / display) -6. Call `sync_wireguard_config(db)` (global, not per-tenant) - -### `sync_wireguard_config(db)` - -Current signature: `sync_wireguard_config(db, tenant_id)` — builds config for one tenant. - -New signature: `sync_wireguard_config(db)` — builds config for ALL tenants. - -**Concurrency protection:** acquire a PostgreSQL advisory lock (`pg_advisory_xact_lock(hash)`) before writing. This prevents two simultaneous peer additions from producing a corrupt `wg0.conf`. - -**Atomic write:** write to a temp file, then `os.rename()` to `wg0.conf`. This prevents the WireGuard container from reading a partially-written file. - -New behavior: -1. Acquire advisory lock -2. Read global server private key from `system_settings` (decrypt it) -3. Query ALL enabled `VpnConfig` rows (across all tenants, using admin engine to bypass RLS) -4. For each, query enabled `VpnPeer` rows -5. Build single `wg0.conf`: - -```ini -[Interface] -Address = 10.10.0.1/16 -ListenPort = 51820 -PrivateKey = {global_server_private_key} - -# --- Tenant: {tenant_name} (10.10.1.0/24) --- -[Peer] -PublicKey = {peer_public_key} -PresharedKey = {preshared_key} -AllowedIPs = 10.10.1.2/32 - -# --- Tenant: {tenant_name_2} (10.10.2.0/24) --- -[Peer] -PublicKey = {peer_public_key} -PresharedKey = {preshared_key} -AllowedIPs = 10.10.2.2/32 -``` - -6. Write to temp file, `os.rename()` to `wg0.conf` -7. Touch `.reload` flag -8. Release advisory lock - -### `_next_available_ip(db, tenant_id, config)` - -No changes needed — already scoped to `tenant_id` and uses the config's subnet. With unique subnets per tenant, IPs are naturally isolated. Note: this function materializes all `/24` hosts into a list, which is fine for `/24` (253 entries) but must be refactored if subnets larger than `/24` are ever used. - -### `add_peer(db, tenant_id, device_id, ...)` - -Changes: -- Calls `sync_wireguard_config(db)` instead of `sync_wireguard_config(db, tenant_id)` -- **Validate `additional_allowed_ips`:** if provided, reject any subnet that overlaps with `10.10.0.0/16` (the VPN address space). Only non-VPN subnets are allowed (e.g., `192.168.1.0/24` for site-to-site routing). This prevents a tenant from claiming another tenant's VPN subnet in their AllowedIPs. - -### `remove_peer(db, tenant_id, peer_id)` - -Minor change: calls `sync_wireguard_config(db)` instead of `sync_wireguard_config(db, tenant_id)`. - -### Tenant deletion hook - -When a tenant is deleted (CASCADE deletes vpn_config and vpn_peers), call `sync_wireguard_config(db)` to regenerate `wg0.conf` without the deleted tenant's peers. Add this to the tenant deletion endpoint. - -### `read_wg_status()` - -No changes — status is keyed by peer public key, which is unique globally. The existing `get_peer_handshake()` lookup continues to work. - -## WireGuard Container Changes - -### iptables Isolation Rules - -Update `docker-data/wireguard/custom-cont-init.d/10-forwarding.sh`: - -```bash -#!/bin/sh -# Enable forwarding between Docker network and WireGuard tunnel -# Idempotent: check before adding to prevent duplicates on restart -iptables -C FORWARD -i eth0 -o wg0 -j ACCEPT 2>/dev/null || iptables -A FORWARD -i eth0 -o wg0 -j ACCEPT -iptables -C FORWARD -i wg0 -o eth0 -j ACCEPT 2>/dev/null || iptables -A FORWARD -i wg0 -o eth0 -j ACCEPT - -# Block cross-subnet traffic on wg0 (tenant isolation) -# Peers in 10.10.1.0/24 cannot reach peers in 10.10.2.0/24 -iptables -C FORWARD -i wg0 -o wg0 -j DROP 2>/dev/null || iptables -A FORWARD -i wg0 -o wg0 -j DROP - -# Block IPv6 forwarding on wg0 (prevent link-local bypass) -ip6tables -C FORWARD -i wg0 -j DROP 2>/dev/null || ip6tables -A FORWARD -i wg0 -j DROP - -# NAT for return traffic -iptables -C POSTROUTING -t nat -o wg0 -j MASQUERADE 2>/dev/null || iptables -t nat -A POSTROUTING -o wg0 -j MASQUERADE - -echo "WireGuard forwarding and tenant isolation rules applied" -``` - -Rules use `iptables -C` (check) before `-A` (append) to be idempotent across container restarts. - -The key isolation layers: - -1. **WireGuard AllowedIPs** — each peer can only send to its own `/32` IP (cryptographic enforcement) -2. **iptables `wg0 → wg0` DROP** — blocks any traffic that enters and exits the tunnel interface (peer-to-peer) -3. **iptables IPv6 DROP** — prevents link-local IPv6 bypass -4. **Separate subnets** — no IP collisions between tenants -5. **`additional_allowed_ips` validation** — blocks tenants from claiming VPN address space - -### Server Address - -The `[Interface] Address` changes from `10.10.0.1/24` to `10.10.0.1/16` so the server can route to all tenant subnets. - -## Routing Changes - -### Poller & API - -No changes needed. Both already route `10.10.0.0/16` via the WireGuard container. - -### setup.py - -Update `prepare_data_dirs()` to write the updated forwarding script with idempotent rules and IPv6 blocking. - -## RouterOS Command Generation - -### `onboard_device()` and `get_peer_config()` - -These generate RouterOS commands for device setup. Changes: - -- `allowed-address` changes from `10.10.0.0/24` to `10.10.{index}.0/24` (tenant's specific subnet) -- `endpoint-address` and `endpoint-port` unchanged -- Server public key changes to the global server public key (read from `system_settings`) - -## Migration - -### Database Migration - -1. Generate global server keypair: - - Create keypair using `generate_wireguard_keypair()` - - Store in `system_settings`: `vpn_server_private_key` (encrypted), `vpn_server_public_key` (plaintext) -2. Add `subnet_index` column to `vpn_config` (integer, unique, not null) -3. For existing VpnConfig rows (may be multiple if multiple tenants have VPN): - - Assign sequential `subnet_index` values starting from 1 - - Update `subnet` to `10.10.{index}.0/24` - - Update `server_address` to `10.10.{index}.1/24` -4. For existing VpnPeer rows: - - Remap IPs: `10.10.0.X` → `10.10.{tenant's index}.X` (preserve the host octet) - - Example: Tenant A (index 1) peer at `10.10.0.2` → `10.10.1.2`. Tenant B (index 2) peer at `10.10.0.2` → `10.10.2.2`. No collision. -5. Regenerate `wg0.conf` using the new global sync function - -### Device-Side Update Required - -This is a **breaking change** for existing VPN peers. After migration: -- Devices need updated RouterOS commands: - - New server public key (global key replaces per-tenant key) - - New VPN IP address (`10.10.0.X` → `10.10.{index}.X`) - - New allowed-address (`10.10.{index}.0/24`) -- The API should expose a "regenerate commands" endpoint or show a banner in the UI indicating that VPN reconfiguration is needed. - -### Migration Communication - -After the migration runs: -- Log a warning with the list of affected devices -- Show a banner in the VPN UI: "VPN network updated — devices need reconfiguration. Click here for updated commands." -- The existing "View Setup Commands" button in the UI will show the correct updated commands. - -## API Changes - -### Modified Endpoints - -| Method | Path | Change | -|--------|------|--------| -| `POST` | `/api/tenants/{id}/vpn` | `setup_vpn` allocates subnet_index, uses global server key | -| `GET` | `/api/tenants/{id}/vpn` | Returns tenant's specific subnet info | -| `GET` | `/api/tenants/{id}/vpn/peers/{id}/config` | Returns commands with tenant-specific subnet and global server key | -| `POST` | `/api/tenants/{id}/vpn/peers` | Validates `additional_allowed_ips` doesn't overlap `10.10.0.0/16` | -| `DELETE` | `/api/tenants/{id}` | Calls `sync_wireguard_config(db)` after cascade delete | - -### No New Endpoints - -The isolation is transparent — tenants don't need to know about it. - -## Error Handling - -| Scenario | HTTP Status | Message | -|----------|-------------|---------| -| No available subnet index (255 tenants with VPN) | 422 | "VPN subnet pool exhausted" | -| Subnet index conflict (race condition) | — | Retry allocation once | -| `additional_allowed_ips` overlaps VPN space | 422 | "Additional allowed IPs must not overlap the VPN address space (10.10.0.0/16)" | - -## Testing - -- Create two tenants with VPN enabled → verify they get different subnets (`10.10.1.0/24`, `10.10.2.0/24`) -- Add peers in both → verify IPs don't collide -- From tenant A's device, attempt to ping tenant B's device → verify it's blocked -- Verify `wg0.conf` contains peers from both tenants with correct subnets -- Verify iptables rules are in place after container restart (idempotent) -- Verify `additional_allowed_ips` with `10.10.x.x` subnet is rejected -- Delete a tenant → verify `wg0.conf` is regenerated without its peers -- Disable a tenant's VPN → verify peers excluded from `wg0.conf` -- Empty state (no enabled tenants) → verify `wg0.conf` has only `[Interface]` section -- Migration: multiple tenants sharing `10.10.0.0/24` → verify correct remapping to unique subnets - -## Audit Logging - -- Subnet allocated (tenant_id, subnet_index, subnet) -- Global server keypair generated (first-run event) -- VPN config regenerated (triggered by which operation) - -## Out of Scope - -- Multiple WireGuard interfaces (not needed at current scale) -- Manual subnet assignment -- IPv6 VPN support (IPv6 is blocked as a security measure) -- Per-tenant WireGuard listen ports -- VPN-level rate limiting or bandwidth quotas