Files
the-other-dude/setup.py
Jason Staack e22163c55f fix(ci): format setup.py, register CredentialProfile model
- Run ruff format on setup.py to fix pre-existing style violations
- Add CredentialProfile import to models/__init__.py so SQLAlchemy
  can resolve the Device.credential_profile relationship in tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 18:38:09 -05:00

1996 lines
64 KiB
Python
Executable File

#!/usr/bin/env python3
"""TOD Production Setup Wizard.
Interactive setup script that configures .env.prod, bootstraps OpenBao,
builds Docker images, starts the stack, and verifies service health.
Usage:
python3 setup.py # Interactive mode
python3 setup.py --non-interactive \\
--postgres-password 'MyP@ss!' \\
--domain tod.example.com \\
--admin-email admin@example.com \\
--no-telemetry --yes # Non-interactive mode
"""
import argparse
import base64
import datetime
import getpass
import json
import os
import pathlib
import platform
import re
import secrets
import shutil
import signal
import socket
import subprocess
import sys
import time
import urllib.error
import urllib.request
# ── Constants ────────────────────────────────────────────────────────────────
PROJECT_ROOT = pathlib.Path(__file__).resolve().parent
ENV_PROD = PROJECT_ROOT / ".env.prod"
INIT_SQL_TEMPLATE = PROJECT_ROOT / "scripts" / "init-postgres.sql"
INIT_SQL_PROD = PROJECT_ROOT / "scripts" / "init-postgres-prod.sql"
COMPOSE_BASE = "docker-compose.yml"
COMPOSE_PROD = "docker-compose.prod.yml"
COMPOSE_BUILD_OVERRIDE = "docker-compose.build.yml"
COMPOSE_CMD = [
"docker",
"compose",
"-f",
COMPOSE_BASE,
"-f",
COMPOSE_PROD,
]
REQUIRED_PORTS = {
5432: "PostgreSQL",
6379: "Redis",
4222: "NATS",
8001: "API",
3000: "Frontend",
51820: "WireGuard (UDP)",
}
# ── Color helpers ────────────────────────────────────────────────────────────
def _supports_color() -> bool:
return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
_COLOR = _supports_color()
def _c(code: str, text: str) -> str:
return f"\033[{code}m{text}\033[0m" if _COLOR else text
def green(t: str) -> str:
return _c("32", t)
def yellow(t: str) -> str:
return _c("33", t)
def red(t: str) -> str:
return _c("31", t)
def cyan(t: str) -> str:
return _c("36", t)
def bold(t: str) -> str:
return _c("1", t)
def dim(t: str) -> str:
return _c("2", t)
def banner(text: str) -> None:
width = 62
print()
print(cyan("=" * width))
print(cyan(f" {text}"))
print(cyan("=" * width))
print()
def section(text: str) -> None:
print()
print(bold(f"--- {text} ---"))
print()
def ok(text: str) -> None:
print(f" {green('')} {text}")
def warn(text: str) -> None:
print(f" {yellow('!')} {text}")
def fail(text: str) -> None:
print(f" {red('')} {text}")
def info(text: str) -> None:
print(f" {dim('·')} {text}")
# ── Setup Telemetry ─────────────────────────────────────────────────────────
_TELEMETRY_COLLECTOR = "https://telemetry.theotherdude.net"
_TELEMETRY_TOKEN = "75e320cbd48e20e3234ab4e734f86e124a903a7278e643cf6d383708a8a7fe4b"
def _collect_environment() -> dict:
"""Gather allowlisted environment info. No IPs, hostnames, or secrets."""
env = {
"os": platform.system(),
"os_version": platform.release(),
"arch": platform.machine(),
"python": platform.python_version(),
}
# Docker version
try:
r = subprocess.run(
["docker", "version", "--format", "{{.Server.Version}}"],
capture_output=True,
text=True,
timeout=5,
)
if r.returncode == 0:
env["docker"] = r.stdout.strip()
except Exception:
pass
# Compose version
try:
r = subprocess.run(
["docker", "compose", "version", "--short"],
capture_output=True,
text=True,
timeout=5,
)
if r.returncode == 0:
env["compose"] = r.stdout.strip()
except Exception:
pass
# RAM (rounded to nearest GB)
try:
if sys.platform == "darwin":
r = subprocess.run(
["sysctl", "-n", "hw.memsize"],
capture_output=True,
text=True,
timeout=5,
)
if r.returncode == 0:
env["ram_gb"] = round(int(r.stdout.strip()) / (1024**3))
else:
with open("/proc/meminfo") as f:
for line in f:
if line.startswith("MemTotal:"):
env["ram_gb"] = round(int(line.split()[1]) * 1024 / (1024**3))
break
except Exception:
pass
return env
def _get_app_version() -> tuple[str, str]:
"""Return (version, build_id) from git if available."""
version = "unknown"
build_id = "unknown"
try:
r = subprocess.run(
["git", "describe", "--tags", "--always"],
capture_output=True,
text=True,
timeout=5,
cwd=PROJECT_ROOT,
)
if r.returncode == 0:
version = r.stdout.strip()
except Exception:
pass
try:
r = subprocess.run(
["git", "rev-parse", "--short", "HEAD"],
capture_output=True,
text=True,
timeout=5,
cwd=PROJECT_ROOT,
)
if r.returncode == 0:
build_id = r.stdout.strip()
except Exception:
pass
return version, build_id
class SetupTelemetry:
"""Lightweight fire-and-forget telemetry for setup diagnostics.
When enabled, sends one event per setup step to the TOD telemetry collector.
All events use a shared anonymous token — no registration, no PII.
"""
def __init__(self) -> None:
self.enabled = False
self._environment: dict = {}
self._app_version = "unknown"
self._build_id = "unknown"
def enable(self) -> None:
self.enabled = True
self._environment = _collect_environment()
self._app_version, self._build_id = _get_app_version()
def step(
self,
step_name: str,
result: str,
duration_ms: int | None = None,
error_message: str | None = None,
error_code: str | None = None,
metrics: dict | None = None,
) -> None:
"""Emit a single setup step event. No-op if disabled."""
if not self.enabled:
return
event: dict = {
"event_type": "setup",
"severity": "error" if result == "failure" else "info",
"phase": "setup",
"operation": step_name,
"result": result,
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
"app_version": self._app_version,
"build_id": self._build_id,
"environment": self._environment,
}
if duration_ms is not None:
event["duration_ms"] = duration_ms
if error_message:
event["error"] = {"message": error_message[:500], "code": error_code or ""}
if metrics:
event["metrics"] = metrics
self._send([event])
def _send(self, events: list[dict]) -> None:
"""POST events to the collector. Fire-and-forget."""
try:
body = json.dumps({"events": events}).encode()
req = urllib.request.Request(
f"{_TELEMETRY_COLLECTOR}/api/v1/ingest",
data=body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {_TELEMETRY_TOKEN}",
},
method="POST",
)
urllib.request.urlopen(req, timeout=10)
except Exception:
pass # Fire-and-forget — never affect setup
# ── Input helpers ────────────────────────────────────────────────────────────
def ask(
prompt: str,
default: str = "",
required: bool = False,
secret: bool = False,
validate=None,
) -> str:
"""Prompt the user for input with optional default, validation, and secret mode."""
suffix = f" [{default}]" if default else ""
full_prompt = f" {prompt}{suffix}: "
while True:
try:
if secret:
value = getpass.getpass(full_prompt)
else:
value = input(full_prompt)
except EOFError:
if default:
return default
if required:
raise SystemExit(
f"EOF reached and no default for required field: {prompt}"
)
return ""
value = value.strip()
if not value and default:
value = default
if required and not value:
warn("This field is required.")
continue
if validate:
error = validate(value)
if error:
warn(error)
continue
return value
def ask_yes_no(prompt: str, default: bool = False) -> bool:
"""Ask a yes/no question."""
hint = "Y/n" if default else "y/N"
while True:
try:
answer = input(f" {prompt} [{hint}]: ").strip().lower()
except EOFError:
return default
if not answer:
return default
if answer in ("y", "yes"):
return True
if answer in ("n", "no"):
return False
warn("Please enter y or n.")
def mask_secret(value: str) -> str:
"""Show first 8 chars of a secret, mask the rest."""
if len(value) <= 12:
return "*" * len(value)
return value[:8] + "..."
# ── Validators ───────────────────────────────────────────────────────────────
def validate_password_strength(value: str) -> str | None:
if len(value) < 12:
return "Password must be at least 12 characters."
return None
def validate_email(value: str) -> str | None:
if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", value):
return "Please enter a valid email address."
return None
def validate_domain(value: str) -> str | None:
# Strip protocol if provided
cleaned = re.sub(r"^https?://", "", value).rstrip("/")
if not re.match(r"^[a-zA-Z0-9]([a-zA-Z0-9\-]*\.)+[a-zA-Z]{2,}$", cleaned):
return "Please enter a valid domain (e.g. tod.example.com)."
return None
# ── System checks ────────────────────────────────────────────────────────────
def check_python_version() -> bool:
if sys.version_info < (3, 10):
fail(f"Python 3.10+ required, found {sys.version}")
return False
ok(f"Python {sys.version_info.major}.{sys.version_info.minor}")
return True
def check_docker() -> bool:
try:
result = subprocess.run(
["docker", "info"],
capture_output=True,
text=True,
timeout=10,
)
if result.returncode != 0:
fail("Docker is not running. Start Docker and try again.")
return False
ok("Docker Engine")
except FileNotFoundError:
fail("Docker is not installed.")
return False
except subprocess.TimeoutExpired:
fail("Docker is not responding.")
return False
try:
result = subprocess.run(
["docker", "compose", "version"],
capture_output=True,
text=True,
timeout=10,
)
if result.returncode != 0:
fail("Docker Compose v2 is not available.")
return False
version_match = re.search(r"v?(\d+\.\d+)", result.stdout)
version_str = version_match.group(1) if version_match else "unknown"
ok(f"Docker Compose v{version_str}")
except FileNotFoundError:
fail("Docker Compose is not installed.")
return False
return True
def check_ram() -> None:
try:
if sys.platform == "darwin":
result = subprocess.run(
["sysctl", "-n", "hw.memsize"],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode != 0:
return
ram_bytes = int(result.stdout.strip())
else:
with open("/proc/meminfo") as f:
for line in f:
if line.startswith("MemTotal:"):
ram_bytes = int(line.split()[1]) * 1024
break
else:
return
ram_gb = ram_bytes / (1024**3)
if ram_gb < 4:
warn(f"Only {ram_gb:.1f} GB RAM detected. 4 GB+ recommended for builds.")
else:
ok(f"{ram_gb:.1f} GB RAM")
except Exception:
info("Could not detect RAM — skipping check")
def check_ports() -> None:
for port, service in REQUIRED_PORTS.items():
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(1)
result = s.connect_ex(("127.0.0.1", port))
if result == 0:
warn(f"Port {port} ({service}) is already in use")
else:
ok(f"Port {port} ({service}) is free")
except Exception:
info(f"Could not check port {port} ({service})")
def check_existing_env(args: argparse.Namespace) -> str:
"""Check for existing .env.prod. Returns 'overwrite', 'backup', or 'abort'."""
if not ENV_PROD.exists():
return "overwrite"
if args.non_interactive:
ts = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
backup = ENV_PROD.with_name(f".env.prod.backup.{ts}")
shutil.copy2(ENV_PROD, backup)
ok(f"Backed up existing .env.prod to {backup.name}")
return "overwrite"
print()
warn(f"Existing .env.prod found at {ENV_PROD}")
print()
print(" What would you like to do?")
print(f" {bold('1)')} Overwrite it")
print(f" {bold('2)')} Back it up and create a new one")
print(f" {bold('3)')} Abort")
print()
while True:
choice = input(" Choice [1/2/3]: ").strip()
if choice == "1":
return "overwrite"
elif choice == "2":
ts = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
backup = ENV_PROD.with_name(f".env.prod.backup.{ts}")
shutil.copy2(ENV_PROD, backup)
ok(f"Backed up to {backup.name}")
return "overwrite"
elif choice == "3":
return "abort"
else:
warn("Please enter 1, 2, or 3.")
def preflight(args: argparse.Namespace) -> bool:
"""Run all pre-flight checks. Returns True if OK to proceed."""
banner("TOD Production Setup")
print(" This wizard will configure your production environment,")
print(" generate secrets, bootstrap OpenBao, pull or build images,")
print(" and start the stack.")
print()
section("Pre-flight Checks")
if not check_python_version():
return False
if not check_docker():
return False
check_ram()
check_ports()
action = check_existing_env(args)
if action == "abort":
print()
info("Setup aborted.")
return False
return True
# ── Secret generation ────────────────────────────────────────────────────────
def generate_jwt_secret() -> str:
return secrets.token_urlsafe(64)
def generate_encryption_key() -> str:
return base64.b64encode(secrets.token_bytes(32)).decode()
def generate_db_password() -> str:
return secrets.token_urlsafe(24)
def generate_admin_password() -> str:
return secrets.token_urlsafe(18)
# ── Wizard sections ─────────────────────────────────────────────────────────
def wizard_database(config: dict, args: argparse.Namespace) -> None:
section("Database")
info("PostgreSQL superuser password — used for migrations and admin operations.")
info("The app and poller service passwords will be auto-generated.")
print()
if args.non_interactive:
if not args.postgres_password:
fail("--postgres-password is required in non-interactive mode.")
raise SystemExit(1)
config["postgres_password"] = args.postgres_password
else:
config["postgres_password"] = ask(
"PostgreSQL superuser password",
required=True,
secret=True,
validate=validate_password_strength,
)
config["app_user_password"] = generate_db_password()
config["poller_user_password"] = generate_db_password()
config["postgres_db"] = "tod"
ok("Database passwords configured")
info(f"app_user password: {mask_secret(config['app_user_password'])}")
info(f"poller_user password: {mask_secret(config['poller_user_password'])}")
def wizard_security(config: dict) -> None:
section("Security")
info("Auto-generating cryptographic keys...")
print()
config["jwt_secret"] = generate_jwt_secret()
config["encryption_key"] = generate_encryption_key()
ok("JWT signing key generated")
ok("Credential encryption key generated")
print()
warn("Save these somewhere safe — they cannot be recovered if lost:")
info(f"JWT_SECRET_KEY={mask_secret(config['jwt_secret'])}")
info(f"CREDENTIAL_ENCRYPTION_KEY={mask_secret(config['encryption_key'])}")
def wizard_admin(config: dict, args: argparse.Namespace) -> None:
section("Admin Account")
info("The first admin account is created on initial startup.")
print()
if args.non_interactive:
config["admin_email"] = args.admin_email or "admin@the-other-dude.dev"
else:
config["admin_email"] = ask(
"Admin email",
default="admin@the-other-dude.dev",
required=True,
validate=validate_email,
)
if args.non_interactive:
if args.admin_password:
config["admin_password"] = args.admin_password
config["admin_password_generated"] = False
else:
config["admin_password"] = generate_admin_password()
config["admin_password_generated"] = True
ok(f"Generated password: {bold(config['admin_password'])}")
warn("Save this now — it will not be shown again after setup.")
else:
print()
info("Enter a password or press Enter to auto-generate one.")
password = ask("Admin password", secret=True)
if password:
error = validate_password_strength(password)
while error:
warn(error)
password = ask(
"Admin password",
secret=True,
required=True,
validate=validate_password_strength,
)
error = None # ask() already validated
config["admin_password"] = password
config["admin_password_generated"] = False
else:
config["admin_password"] = generate_admin_password()
config["admin_password_generated"] = True
ok(f"Generated password: {bold(config['admin_password'])}")
warn("Save this now — it will not be shown again after setup.")
def wizard_email(config: dict, args: argparse.Namespace) -> None:
section("Email (SMTP)")
info("Email is used for password reset links.")
print()
if args.non_interactive:
if not args.smtp_host:
config["smtp_configured"] = False
info("Skipped — no --smtp-host provided.")
return
config["smtp_configured"] = True
config["smtp_host"] = args.smtp_host
config["smtp_port"] = args.smtp_port or "587"
config["smtp_user"] = args.smtp_user or ""
config["smtp_password"] = args.smtp_password or ""
config["smtp_from"] = args.smtp_from or ""
if not config["smtp_from"]:
fail("--smtp-from is required when --smtp-host is provided.")
raise SystemExit(1)
# Determine TLS setting: --no-smtp-tls wins if set, otherwise default True
if args.no_smtp_tls:
config["smtp_tls"] = False
else:
config["smtp_tls"] = True
return
if not ask_yes_no("Configure SMTP now?", default=False):
config["smtp_configured"] = False
info("Skipped — you can re-run setup.py later to configure email.")
return
config["smtp_configured"] = True
config["smtp_host"] = ask("SMTP host", required=True)
config["smtp_port"] = ask("SMTP port", default="587")
config["smtp_user"] = ask("SMTP username (optional)")
config["smtp_password"] = (
ask("SMTP password (optional)", secret=True) if config["smtp_user"] else ""
)
config["smtp_from"] = ask("From address", required=True, validate=validate_email)
config["smtp_tls"] = ask_yes_no("Use TLS?", default=True)
def wizard_domain(config: dict, args: argparse.Namespace) -> None:
section("Web / Domain")
info("Your production domain, used for CORS and email links.")
print()
if args.non_interactive:
if not args.domain:
fail("--domain is required in non-interactive mode.")
raise SystemExit(1)
raw = args.domain
else:
raw = ask(
"Production domain (e.g. tod.example.com)",
required=True,
validate=validate_domain,
)
domain = re.sub(r"^https?://", "", raw).rstrip("/")
config["domain"] = domain
# Determine protocol — default HTTPS for production, allow HTTP for LAN/dev
if args.non_interactive:
use_https = not getattr(args, "no_https", False)
else:
use_https = ask_yes_no(
"Use HTTPS? (disable for LAN/dev without TLS)", default=True
)
protocol = "https" if use_https else "http"
config["app_base_url"] = f"{protocol}://{domain}"
config["cors_origins"] = f"{protocol}://{domain}"
ok(f"APP_BASE_URL={protocol}://{domain}")
ok(f"CORS_ORIGINS={protocol}://{domain}")
if not use_https:
warn(
"Running without HTTPS — cookies will not be Secure. Fine for LAN, not for public internet."
)
# ── Reverse proxy ───────────────────────────────────────────────────────────
PROXY_EXAMPLES = PROJECT_ROOT / "infrastructure" / "reverse-proxy-examples"
PROXY_CONFIGS = {
"caddy": {
"label": "Caddy",
"binary": "caddy",
"example": PROXY_EXAMPLES / "caddy" / "Caddyfile.example",
"targets": [
pathlib.Path("/etc/caddy/Caddyfile.d"),
pathlib.Path("/etc/caddy"),
],
"filename": None, # derived from domain
"placeholders": {
"tod.example.com": None, # replaced with domain
"YOUR_TOD_HOST": None, # replaced with host IP
},
},
"nginx": {
"label": "nginx",
"binary": "nginx",
"example": PROXY_EXAMPLES / "nginx" / "tod.conf.example",
"targets": [
pathlib.Path("/etc/nginx/sites-available"),
pathlib.Path("/etc/nginx/conf.d"),
],
"filename": None,
"placeholders": {
"tod.example.com": None,
"YOUR_TOD_HOST": None,
},
},
"apache": {
"label": "Apache",
"binary": "apache2",
"alt_binary": "httpd",
"example": PROXY_EXAMPLES / "apache" / "tod.conf.example",
"targets": [
pathlib.Path("/etc/apache2/sites-available"),
pathlib.Path("/etc/httpd/conf.d"),
],
"filename": None,
"placeholders": {
"tod.example.com": None,
"YOUR_TOD_HOST": None,
},
},
"haproxy": {
"label": "HAProxy",
"binary": "haproxy",
"example": PROXY_EXAMPLES / "haproxy" / "haproxy.cfg.example",
"targets": [
pathlib.Path("/etc/haproxy"),
],
"filename": "haproxy.cfg",
"placeholders": {
"tod.example.com": None,
"YOUR_TOD_HOST": None,
},
},
"traefik": {
"label": "Traefik",
"binary": "traefik",
"example": PROXY_EXAMPLES / "traefik" / "traefik-dynamic.yaml.example",
"targets": [
pathlib.Path("/etc/traefik/dynamic"),
pathlib.Path("/etc/traefik"),
],
"filename": None,
"placeholders": {
"tod.example.com": None,
"YOUR_TOD_HOST": None,
},
},
}
def _detect_proxy(name: str, cfg: dict) -> bool:
"""Check if a reverse proxy binary is installed."""
binary = cfg["binary"]
if shutil.which(binary):
return True
alt = cfg.get("alt_binary")
if alt and shutil.which(alt):
return True
return False
def _get_host_ip() -> str:
"""Best-effort detection of the host's LAN IP."""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
ip = s.getsockname()[0]
s.close()
return ip
except Exception:
return "127.0.0.1"
def _write_system_file(path: pathlib.Path, content: str) -> bool:
"""Write a file, using sudo tee if direct write fails with permission error."""
# Try direct write first
try:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
return True
except PermissionError:
pass
# Fall back to sudo
info(f"Need elevated permissions to write to {path.parent}")
if not ask_yes_no("Use sudo?", default=True):
warn("Skipped. You can copy the config manually later.")
return False
try:
# Ensure parent directory exists
subprocess.run(
["sudo", "mkdir", "-p", str(path.parent)],
check=True,
timeout=30,
)
# Write via sudo tee
result = subprocess.run(
["sudo", "tee", str(path)],
input=content,
text=True,
capture_output=True,
timeout=30,
)
if result.returncode != 0:
fail(f"sudo tee failed: {result.stderr.strip()}")
return False
return True
except subprocess.CalledProcessError as e:
fail(f"sudo failed: {e}")
return False
except Exception as e:
fail(f"Failed to write config: {e}")
return False
def wizard_reverse_proxy(config: dict, args: argparse.Namespace) -> None:
section("Reverse Proxy")
info("TOD needs a reverse proxy for HTTPS termination.")
info("Example configs are included for Caddy, nginx, Apache, HAProxy, and Traefik.")
print()
if args.non_interactive:
proxy_val = args.proxy or "skip"
if proxy_val == "skip":
config["proxy_configured"] = False
info(
"Skipped. Example configs are in infrastructure/reverse-proxy-examples/"
)
return
valid_proxies = list(PROXY_CONFIGS.keys())
if proxy_val not in valid_proxies:
fail(f"--proxy must be one of: {', '.join(valid_proxies)}, skip")
raise SystemExit(1)
selected = proxy_val
else:
if not ask_yes_no("Configure a reverse proxy now?", default=True):
config["proxy_configured"] = False
info(
"Skipped. Example configs are in infrastructure/reverse-proxy-examples/"
)
return
# Detect installed proxies
detected = []
for name, cfg in PROXY_CONFIGS.items():
if _detect_proxy(name, cfg):
detected.append(name)
if detected:
print()
info(f"Detected: {', '.join(PROXY_CONFIGS[n]['label'] for n in detected)}")
else:
print()
info("No reverse proxy detected on this system.")
# Show menu
print()
print(" Which reverse proxy are you using?")
choices = list(PROXY_CONFIGS.keys())
for i, name in enumerate(choices, 1):
label = PROXY_CONFIGS[name]["label"]
tag = f" {green('(detected)')}" if name in detected else ""
print(f" {bold(f'{i})')} {label}{tag}")
print(f" {bold(f'{len(choices) + 1})')} Skip — I'll configure it myself")
print()
while True:
choice = input(f" Choice [1-{len(choices) + 1}]: ").strip()
if not choice.isdigit():
warn("Please enter a number.")
continue
idx = int(choice) - 1
if idx == len(choices):
config["proxy_configured"] = False
info(
"Skipped. Example configs are in infrastructure/reverse-proxy-examples/"
)
return
if 0 <= idx < len(choices):
break
warn(f"Please enter 1-{len(choices) + 1}.")
selected = choices[idx]
cfg = PROXY_CONFIGS[selected]
domain = config["domain"]
host_ip = _get_host_ip()
# Read and customize the example config
if not cfg["example"].exists():
fail(f"Example config not found: {cfg['example']}")
config["proxy_configured"] = False
return
template = cfg["example"].read_text()
# Replace placeholders
output = template.replace("tod.example.com", domain)
output = output.replace("YOUR_TOD_HOST", host_ip)
# Determine output filename
if cfg["filename"]:
out_name = cfg["filename"]
else:
safe_domain = domain.replace(".", "-")
ext = cfg["example"].suffix.replace(".example", "") or ".conf"
if cfg["example"].name == "Caddyfile.example":
out_name = f"{safe_domain}.caddy"
else:
out_name = f"{safe_domain}{ext}"
# Find a writable target directory
target_dir = None
for candidate in cfg["targets"]:
if candidate.is_dir():
target_dir = candidate
break
print()
if target_dir:
out_path = target_dir / out_name
info(f"Will write: {out_path}")
else:
# Fall back to project directory
out_path = PROJECT_ROOT / out_name
info(f"No standard config directory found for {cfg['label']}.")
info(f"Will write to: {out_path}")
print()
info("Preview (first 20 lines):")
for line in output.splitlines()[:20]:
print(f" {dim(line)}")
print(f" {dim('...')}")
print()
custom_path = ask("Write config to", default=str(out_path))
out_path = pathlib.Path(custom_path)
if out_path.exists():
if not ask_yes_no(f"{out_path} already exists. Overwrite?", default=False):
info("Skipped writing proxy config.")
config["proxy_configured"] = False
return
written = _write_system_file(out_path, output)
if not written:
config["proxy_configured"] = False
return
ok(f"Wrote {cfg['label']} config to {out_path}")
config["proxy_configured"] = True
config["proxy_type"] = cfg["label"]
config["proxy_path"] = str(out_path)
# Post-install hints
print()
if selected == "caddy":
info("Reload Caddy: sudo systemctl reload caddy")
elif selected == "nginx":
if "/sites-available/" in str(out_path):
sites_enabled = out_path.parent.parent / "sites-enabled" / out_path.name
info(f"Enable site: sudo ln -s {out_path} {sites_enabled}")
info("Test config: sudo nginx -t")
info("Reload nginx: sudo systemctl reload nginx")
elif selected == "apache":
if "/sites-available/" in str(out_path):
info(f"Enable site: sudo a2ensite {out_path.stem}")
info("Test config: sudo apachectl configtest")
info("Reload Apache: sudo systemctl reload apache2")
elif selected == "haproxy":
info("Test config: sudo haproxy -c -f /etc/haproxy/haproxy.cfg")
info("Reload: sudo systemctl reload haproxy")
elif selected == "traefik":
info("Traefik watches for file changes — no reload needed.")
def wizard_telemetry(
config: dict, telem: SetupTelemetry, args: argparse.Namespace
) -> None:
section("Anonymous Diagnostics")
info("TOD can send anonymous setup and runtime diagnostics to help")
info("identify common failures. No personal data, IPs, hostnames,")
info("or configuration values are ever sent.")
print()
info("What is collected: step pass/fail, duration, OS/arch/Python")
info("version, Docker version, RAM (rounded), and error types.")
info("You can disable this anytime by setting TELEMETRY_ENABLED=false")
info("in .env.prod.")
print()
if args.non_interactive:
if args.telemetry:
config["telemetry_enabled"] = True
telem.enable()
ok("Diagnostics enabled — thank you!")
else:
config["telemetry_enabled"] = False
info("No diagnostics will be sent.")
return
if ask_yes_no("Send anonymous diagnostics?", default=False):
config["telemetry_enabled"] = True
telem.enable()
ok("Diagnostics enabled — thank you!")
else:
config["telemetry_enabled"] = False
info("No diagnostics will be sent.")
def _read_version() -> str:
"""Read the version string from the VERSION file."""
version_file = PROJECT_ROOT / "VERSION"
if version_file.exists():
return version_file.read_text().strip()
return "latest"
def wizard_build_mode(config: dict, args: argparse.Namespace) -> None:
"""Ask whether to use pre-built images or build from source."""
section("Build Mode")
version = _read_version()
config["tod_version"] = version
if args.non_interactive:
mode = getattr(args, "build_mode", None) or "prebuilt"
config["build_mode"] = mode
if mode == "source":
COMPOSE_CMD.extend(["-f", COMPOSE_BUILD_OVERRIDE])
ok(f"Build from source (v{version})")
else:
ok(f"Pre-built images from GHCR (v{version})")
return
print(f" TOD v{bold(version)} can be installed two ways:")
print()
print(f" {bold('1.')} {green('Pre-built images')} {dim('(recommended)')}")
print(" Pull ready-to-run images from GitHub Container Registry.")
print(" Fast install, no compilation needed.")
print()
print(f" {bold('2.')} Build from source")
print(" Compile Go, Python, and Node.js locally.")
print(" Requires 4+ GB RAM and takes 5-15 minutes.")
print()
while True:
choice = input(" Choice [1/2]: ").strip()
if choice in ("1", ""):
config["build_mode"] = "prebuilt"
ok("Pre-built images from GHCR")
break
elif choice == "2":
config["build_mode"] = "source"
COMPOSE_CMD.extend(["-f", COMPOSE_BUILD_OVERRIDE])
ok("Build from source")
break
else:
warn("Please enter 1 or 2.")
# ── Summary ──────────────────────────────────────────────────────────────────
def show_summary(config: dict, args: argparse.Namespace) -> bool:
banner("Configuration Summary")
print(f" {bold('Database')}")
print(f" POSTGRES_DB = {config['postgres_db']}")
print(f" POSTGRES_PASSWORD = {mask_secret(config['postgres_password'])}")
print(f" app_user password = {mask_secret(config['app_user_password'])}")
print(f" poller_user password = {mask_secret(config['poller_user_password'])}")
print()
print(f" {bold('Security')}")
print(f" JWT_SECRET_KEY = {mask_secret(config['jwt_secret'])}")
print(f" ENCRYPTION_KEY = {mask_secret(config['encryption_key'])}")
print()
print(f" {bold('Admin Account')}")
print(f" Email = {config['admin_email']}")
print(
f" Password = {'(auto-generated)' if config.get('admin_password_generated') else mask_secret(config['admin_password'])}"
)
print()
print(f" {bold('Email')}")
if config.get("smtp_configured"):
print(f" SMTP_HOST = {config['smtp_host']}")
print(f" SMTP_PORT = {config['smtp_port']}")
print(f" SMTP_FROM = {config['smtp_from']}")
print(f" SMTP_TLS = {config['smtp_tls']}")
else:
print(f" {dim('(not configured)')}")
print()
print(f" {bold('Web')}")
print(f" Domain = {config['domain']}")
print(f" APP_BASE_URL = {config['app_base_url']}")
print()
print(f" {bold('Reverse Proxy')}")
if config.get("proxy_configured"):
print(f" Type = {config['proxy_type']}")
print(f" Config = {config['proxy_path']}")
else:
print(f" {dim('(not configured)')}")
print()
print(f" {bold('Diagnostics')}")
if config.get("telemetry_enabled"):
print(f" TELEMETRY_ENABLED = {green('true')}")
else:
print(f" TELEMETRY_ENABLED = {dim('false')}")
print()
print(f" {bold('Build Mode')}")
if config.get("build_mode") == "source":
print(" Mode = Build from source")
else:
print(f" Mode = {green('Pre-built images')}")
print(f" Version = {config.get('tod_version', 'latest')}")
print()
print(f" {bold('OpenBao')}")
print(f" {dim('(will be captured automatically during bootstrap)')}")
print()
if args.yes:
ok("Auto-confirmed (--yes)")
return True
return ask_yes_no("Write .env.prod with these settings?", default=True)
# ── File writers ─────────────────────────────────────────────────────────────
def write_env_prod(config: dict) -> None:
"""Write the .env.prod file."""
db = config["postgres_db"]
pg_pw = config["postgres_password"]
app_pw = config["app_user_password"]
poll_pw = config["poller_user_password"]
ts = datetime.datetime.now().isoformat(timespec="seconds")
smtp_block = ""
if config.get("smtp_configured"):
smtp_block = f"""\
SMTP_HOST={config["smtp_host"]}
SMTP_PORT={config["smtp_port"]}
SMTP_USER={config.get("smtp_user", "")}
SMTP_PASSWORD={config.get("smtp_password", "")}
SMTP_USE_TLS={"true" if config.get("smtp_tls") else "false"}
SMTP_FROM_ADDRESS={config["smtp_from"]}"""
else:
smtp_block = """\
# Email not configured — re-run setup.py to add SMTP
SMTP_HOST=
SMTP_PORT=587
SMTP_USER=
SMTP_PASSWORD=
SMTP_USE_TLS=true
SMTP_FROM_ADDRESS=noreply@example.com"""
content = f"""\
# ============================================================
# TOD Production Environment — generated by setup.py
# Generated: {ts}
# ============================================================
# --- Database ---
POSTGRES_DB={db}
POSTGRES_USER=postgres
POSTGRES_PASSWORD={pg_pw}
DATABASE_URL=postgresql+asyncpg://postgres:{pg_pw}@postgres:5432/{db}
SYNC_DATABASE_URL=postgresql+psycopg2://postgres:{pg_pw}@postgres:5432/{db}
APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:{app_pw}@postgres:5432/{db}
POLLER_DATABASE_URL=postgres://poller_user:{poll_pw}@postgres:5432/{db}?sslmode=disable
# --- Security ---
JWT_SECRET_KEY={config["jwt_secret"]}
CREDENTIAL_ENCRYPTION_KEY={config["encryption_key"]}
# --- OpenBao (KMS) ---
OPENBAO_ADDR=http://openbao:8200
OPENBAO_TOKEN=PLACEHOLDER_RUN_SETUP
BAO_UNSEAL_KEY=PLACEHOLDER_RUN_SETUP
# --- Admin Bootstrap ---
FIRST_ADMIN_EMAIL={config["admin_email"]}
FIRST_ADMIN_PASSWORD={config["admin_password"]}
# --- Email ---
{smtp_block}
# --- Web ---
APP_BASE_URL={config["app_base_url"]}
CORS_ORIGINS={config["cors_origins"]}
# --- Application ---
ENVIRONMENT=production
LOG_LEVEL=info
DEBUG=false
APP_NAME=TOD - The Other Dude
TOD_VERSION={config.get("tod_version", "latest")}
# --- Storage ---
GIT_STORE_PATH=/data/git-store
FIRMWARE_CACHE_DIR=/data/firmware-cache
WIREGUARD_CONFIG_PATH=/data/wireguard
WIREGUARD_GATEWAY=wireguard
CONFIG_RETENTION_DAYS=90
# --- Redis & NATS ---
REDIS_URL=redis://redis:6379/0
NATS_URL=nats://nats:4222
# --- Poller ---
POLL_INTERVAL_SECONDS=60
CONNECTION_TIMEOUT_SECONDS=10
COMMAND_TIMEOUT_SECONDS=30
# --- Remote Access ---
TUNNEL_PORT_MIN=49000
TUNNEL_PORT_MAX=49100
TUNNEL_IDLE_TIMEOUT=300
SSH_RELAY_PORT=8080
SSH_IDLE_TIMEOUT=900
# --- Config Backup ---
CONFIG_BACKUP_INTERVAL=21600
CONFIG_BACKUP_MAX_CONCURRENT=10
# --- Telemetry ---
# Opt-in anonymous diagnostics. Set to false to disable.
TELEMETRY_ENABLED={"true" if config.get("telemetry_enabled") else "false"}
TELEMETRY_COLLECTOR_URL={_TELEMETRY_COLLECTOR}
"""
ENV_PROD.write_text(content)
ENV_PROD.chmod(0o600)
ok(f"Wrote {ENV_PROD.name}")
def write_init_sql_prod(config: dict) -> None:
"""Generate init-postgres-prod.sql with production passwords."""
app_pw = config["app_user_password"]
poll_pw = config["poller_user_password"]
db = config["postgres_db"]
# Use dollar-quoting ($pw$...$pw$) to avoid SQL injection from passwords
content = f"""\
-- Production database init — generated by setup.py
-- Passwords match those in .env.prod
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'app_user') THEN
CREATE ROLE app_user WITH LOGIN PASSWORD $pw${app_pw}$pw$ NOSUPERUSER NOCREATEDB NOCREATEROLE;
END IF;
END
$$;
GRANT CONNECT ON DATABASE {db} TO app_user;
GRANT USAGE ON SCHEMA public TO app_user;
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'poller_user') THEN
CREATE ROLE poller_user WITH LOGIN PASSWORD $pw${poll_pw}$pw$ NOSUPERUSER NOCREATEDB NOCREATEROLE BYPASSRLS;
END IF;
END
$$;
GRANT CONNECT ON DATABASE {db} TO poller_user;
GRANT USAGE ON SCHEMA public TO poller_user;
"""
INIT_SQL_PROD.write_text(content)
INIT_SQL_PROD.chmod(0o644) # postgres container needs to read this
ok(f"Wrote {INIT_SQL_PROD.name}")
# ── Data directory setup ─────────────────────────────────────────────────────
# UID 1001 = appuser inside the API container
APPUSER_UID = 1001
# Directories the API container writes to (as appuser)
API_WRITABLE_DIRS = [
"docker-data/git-store",
"docker-data/firmware-cache",
]
# Directories that need broad write access (shared between containers)
SHARED_WRITABLE_DIRS = [
"docker-data/wireguard/wg_confs",
]
# Directories that just need to exist (owned by their respective containers)
DATA_DIRS = [
"docker-data/postgres",
"docker-data/redis",
"docker-data/nats",
"docker-data/wireguard",
"docker-data/wireguard/custom-cont-init.d",
]
def prepare_data_dirs() -> None:
"""Create data directories with correct ownership and permissions."""
section("Preparing Data Directories")
# Create all directories
for d in DATA_DIRS + API_WRITABLE_DIRS + SHARED_WRITABLE_DIRS:
path = PROJECT_ROOT / d
path.mkdir(parents=True, exist_ok=True)
# Set ownership for API-writable dirs (appuser uid 1001)
for d in API_WRITABLE_DIRS:
path = PROJECT_ROOT / d
try:
os.chown(path, APPUSER_UID, APPUSER_UID)
ok(f"{d} (owned by appuser)")
except PermissionError:
# Try with sudo
try:
subprocess.run(
["sudo", "chown", "-R", f"{APPUSER_UID}:{APPUSER_UID}", str(path)],
check=True,
timeout=10,
)
ok(f"{d} (owned by appuser via sudo)")
except Exception:
warn(f"{d} — could not set ownership, backups/firmware may fail")
# Set permissions for shared dirs (API + WireGuard container both write)
for d in SHARED_WRITABLE_DIRS:
path = PROJECT_ROOT / d
try:
path.chmod(0o777)
ok(f"{d} (world-writable for container sharing)")
except PermissionError:
try:
subprocess.run(
["sudo", "chmod", "-R", "777", str(path)],
check=True,
timeout=10,
)
ok(f"{d} (world-writable via sudo)")
except Exception:
warn(f"{d} — could not set permissions, VPN config sync may fail")
# Create/update WireGuard forwarding init script (always overwrite for isolation rules)
fwd_script = (
PROJECT_ROOT / "docker-data/wireguard/custom-cont-init.d/10-forwarding.sh"
)
fwd_script.write_text("""\
#!/bin/sh
# Enable forwarding between Docker network and WireGuard tunnel
# Idempotent: check before adding to prevent duplicates on restart
# Allow Docker→VPN (poller/API reaching devices)
iptables -C FORWARD -i eth0 -o wg0 -j ACCEPT 2>/dev/null || iptables -A FORWARD -i eth0 -o wg0 -j ACCEPT
# Allow VPN→Docker ONLY (devices reaching poller/API, NOT the public internet)
iptables -C FORWARD -i wg0 -o eth0 -d 172.16.0.0/12 -j ACCEPT 2>/dev/null || iptables -A FORWARD -i wg0 -o eth0 -d 172.16.0.0/12 -j ACCEPT
# Block VPN→anywhere else (prevents using server as exit node)
iptables -C FORWARD -i wg0 -o eth0 -j DROP 2>/dev/null || iptables -A FORWARD -i wg0 -o eth0 -j DROP
# Block cross-subnet traffic on wg0 (tenant isolation)
# Peers in 10.10.1.0/24 cannot reach peers in 10.10.2.0/24
iptables -C FORWARD -i wg0 -o wg0 -j DROP 2>/dev/null || iptables -A FORWARD -i wg0 -o wg0 -j DROP
# Block IPv6 forwarding on wg0 (prevent link-local bypass)
ip6tables -C FORWARD -i wg0 -j DROP 2>/dev/null || ip6tables -A FORWARD -i wg0 -j DROP
# NAT for return traffic — per-tenant SNAT rules applied by wg-reload watcher
echo "WireGuard forwarding and tenant isolation rules applied"
""")
fwd_script.chmod(0o755)
ok("WireGuard forwarding init script created")
ok("Data directories ready")
# ── Docker operations ────────────────────────────────────────────────────────
def run_compose(
*args, check: bool = True, capture: bool = False, timeout: int = 600
) -> subprocess.CompletedProcess:
"""Run a docker compose command with the prod overlay."""
cmd = COMPOSE_CMD + ["--env-file", str(ENV_PROD)] + list(args)
return subprocess.run(
cmd,
capture_output=capture,
text=True,
timeout=timeout,
check=check,
cwd=PROJECT_ROOT,
)
def bootstrap_openbao(config: dict) -> bool:
"""Start OpenBao, capture credentials, update .env.prod."""
section("OpenBao Bootstrap")
info("Starting PostgreSQL and OpenBao containers...")
try:
run_compose("up", "-d", "postgres", "openbao")
except subprocess.CalledProcessError as e:
fail("Failed to start OpenBao containers.")
info(str(e))
return False
info("Waiting for OpenBao to initialize (up to 60s)...")
# Wait for the container to be healthy
deadline = time.time() + 60
healthy = False
while time.time() < deadline:
result = subprocess.run(
[
"docker",
"inspect",
"--format",
"{{.State.Health.Status}}",
"tod_openbao",
],
capture_output=True,
text=True,
timeout=10,
)
status = result.stdout.strip()
if status == "healthy":
healthy = True
break
time.sleep(2)
if not healthy:
fail("OpenBao did not become healthy within 60 seconds.")
warn("Your .env.prod has placeholder tokens. To fix manually:")
info(" docker compose logs openbao")
info(" Look for BAO_UNSEAL_KEY and OPENBAO_TOKEN lines")
info(" Update .env.prod with those values")
return False
ok("OpenBao is healthy")
# Parse credentials from container logs
info("Capturing OpenBao credentials from logs...")
result = run_compose("logs", "openbao", check=False, capture=True, timeout=30)
logs = result.stdout + result.stderr
unseal_match = re.search(r"BAO_UNSEAL_KEY=(\S+)", logs)
token_match = re.search(r"OPENBAO_TOKEN=(\S+)", logs)
if unseal_match and token_match:
unseal_key = unseal_match.group(1)
root_token = token_match.group(1)
# Update .env.prod
env_content = ENV_PROD.read_text()
env_content = env_content.replace(
"OPENBAO_TOKEN=PLACEHOLDER_RUN_SETUP", f"OPENBAO_TOKEN={root_token}"
)
env_content = env_content.replace(
"BAO_UNSEAL_KEY=PLACEHOLDER_RUN_SETUP", f"BAO_UNSEAL_KEY={unseal_key}"
)
ENV_PROD.write_text(env_content)
ENV_PROD.chmod(0o600)
ok("OpenBao credentials captured and saved to .env.prod")
info(f"OPENBAO_TOKEN={mask_secret(root_token)}")
info(f"BAO_UNSEAL_KEY={mask_secret(unseal_key)}")
return True
else:
# OpenBao was already initialized — check if .env.prod has real values
env_content = ENV_PROD.read_text()
if "PLACEHOLDER_RUN_SETUP" in env_content:
warn(
"Could not find credentials in logs (OpenBao may already be initialized)."
)
warn("Check 'docker compose logs openbao' and update .env.prod manually.")
return False
else:
ok("OpenBao already initialized — existing credentials in .env.prod")
return True
def pull_images() -> bool:
"""Pull pre-built images from GHCR."""
section("Pulling Images")
info("Downloading pre-built images from GitHub Container Registry...")
print()
services = ["api", "poller", "frontend", "winbox-worker"]
for i, service in enumerate(services, 1):
info(f"[{i}/{len(services)}] Pulling {service}...")
try:
run_compose("pull", service, timeout=600)
ok(f"{service} pulled successfully")
except subprocess.CalledProcessError:
fail(f"Failed to pull {service}")
print()
warn("Check your internet connection and that the image exists.")
warn("To retry:")
info(
f" docker compose -f {COMPOSE_BASE} -f {COMPOSE_PROD} "
f"--env-file .env.prod pull {service}"
)
return False
except subprocess.TimeoutExpired:
fail(f"Pull of {service} timed out (10 min)")
return False
print()
ok("All images ready")
return True
def build_images() -> bool:
"""Build Docker images one at a time to avoid OOM."""
section("Building Images")
info("Building images sequentially to avoid memory issues...")
print()
services = ["api", "poller", "frontend", "winbox-worker"]
for i, service in enumerate(services, 1):
info(f"[{i}/{len(services)}] Building {service}...")
try:
run_compose("build", service, timeout=900)
ok(f"{service} built successfully")
except subprocess.CalledProcessError:
fail(f"Failed to build {service}")
print()
warn("To retry this build:")
info(
f" docker compose -f {COMPOSE_BASE} -f {COMPOSE_PROD} "
f"-f {COMPOSE_BUILD_OVERRIDE} build {service}"
)
return False
except subprocess.TimeoutExpired:
fail(f"Build of {service} timed out (15 min)")
return False
print()
ok("All images built successfully")
return True
def start_stack() -> bool:
"""Start the full stack."""
section("Starting Stack")
info("Bringing up all services...")
try:
run_compose("up", "-d")
ok("Stack started")
return True
except subprocess.CalledProcessError as e:
fail("Failed to start stack")
info(str(e))
return False
def health_check(config: dict) -> None:
"""Poll service health for up to 60 seconds."""
section("Health Check")
info("Checking service health (up to 60s)...")
print()
services = [
("tod_postgres", "PostgreSQL"),
("tod_redis", "Redis"),
("tod_nats", "NATS"),
("tod_openbao", "OpenBao"),
("tod_api", "API"),
("tod_poller", "Poller"),
("tod_frontend", "Frontend"),
("tod_winbox_worker", "WinBox Worker"),
]
deadline = time.time() + 60
pending = dict(services)
last_waiting_msg = 0
while pending and time.time() < deadline:
for container, label in list(pending.items()):
try:
result = subprocess.run(
[
"docker",
"inspect",
"--format",
"{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}",
container,
],
capture_output=True,
text=True,
timeout=5,
)
status = result.stdout.strip()
if status in ("healthy", "running"):
ok(f"{label}: {status}")
del pending[container]
except Exception:
pass
if pending:
now = time.time()
remaining = int(deadline - now)
if now - last_waiting_msg >= 10:
waiting_names = ", ".join(label for _, label in pending.items())
info(f"Waiting for: {waiting_names} ({remaining}s remaining)")
last_waiting_msg = now
time.sleep(3)
for container, label in pending.items():
fail(f"{label}: not healthy")
info(f" Check logs: docker compose logs {container.replace('tod_', '')}")
# Final summary
print()
if not pending:
banner("Setup Complete!")
print(f" {bold('Access your instance:')}")
print(f" URL: {green(config['app_base_url'])}")
print(f" Email: {config['admin_email']}")
if config.get("admin_password_generated"):
print(f" Password: {bold(config['admin_password'])}")
else:
print(" Password: (the password you entered)")
print()
info("Change the admin password after your first login.")
else:
warn("Some services are not healthy. Check the logs above.")
info(f" docker compose -f {COMPOSE_BASE} -f {COMPOSE_PROD} logs")
# ── Main ─────────────────────────────────────────────────────────────────────
def _timed(telem: SetupTelemetry, step_name: str, func, *args, **kwargs):
"""Run func, emit a telemetry event with timing. Returns func's result."""
t0 = time.monotonic()
try:
result = func(*args, **kwargs)
duration_ms = int((time.monotonic() - t0) * 1000)
telem.step(step_name, "success", duration_ms=duration_ms)
return result
except Exception as e:
duration_ms = int((time.monotonic() - t0) * 1000)
telem.step(
step_name,
"failure",
duration_ms=duration_ms,
error_message=str(e),
error_code=type(e).__name__,
)
raise
def _build_parser() -> argparse.ArgumentParser:
"""Build the CLI argument parser."""
parser = argparse.ArgumentParser(
description="TOD Production Setup Wizard",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--non-interactive",
action="store_true",
help="Skip all prompts, use defaults + provided flags",
)
parser.add_argument(
"--postgres-password",
type=str,
default=None,
help="PostgreSQL superuser password",
)
parser.add_argument(
"--admin-email",
type=str,
default=None,
help="Admin email (default: admin@the-other-dude.dev)",
)
parser.add_argument(
"--admin-password",
type=str,
default=None,
help="Admin password (auto-generated if not provided)",
)
parser.add_argument(
"--domain",
type=str,
default=None,
help="Production domain (e.g. tod.example.com)",
)
parser.add_argument(
"--smtp-host",
type=str,
default=None,
help="SMTP host (skip email config if not provided)",
)
parser.add_argument(
"--smtp-port", type=str, default=None, help="SMTP port (default: 587)"
)
parser.add_argument("--smtp-user", type=str, default=None, help="SMTP username")
parser.add_argument("--smtp-password", type=str, default=None, help="SMTP password")
parser.add_argument("--smtp-from", type=str, default=None, help="SMTP from address")
parser.add_argument(
"--smtp-tls",
action="store_true",
default=False,
help="Use TLS for SMTP (default: true in non-interactive)",
)
parser.add_argument(
"--no-smtp-tls", action="store_true", default=False, help="Disable TLS for SMTP"
)
parser.add_argument(
"--no-https",
action="store_true",
default=False,
help="Use HTTP instead of HTTPS (for LAN/dev without TLS)",
)
parser.add_argument(
"--proxy",
type=str,
default=None,
help="Reverse proxy type: caddy, nginx, apache, haproxy, traefik, skip",
)
parser.add_argument(
"--telemetry",
action="store_true",
default=False,
help="Enable anonymous diagnostics",
)
parser.add_argument(
"--no-telemetry",
action="store_true",
default=False,
help="Disable anonymous diagnostics",
)
parser.add_argument(
"--build-mode",
type=str,
default=None,
choices=["prebuilt", "source"],
help="Image source: prebuilt (pull from GHCR) or source (compile locally)",
)
parser.add_argument(
"--yes",
"-y",
action="store_true",
default=False,
help="Auto-confirm summary (don't prompt for confirmation)",
)
return parser
def main() -> int:
# Parse CLI arguments
parser = _build_parser()
args = parser.parse_args()
# Graceful Ctrl+C
env_written = False
telem = SetupTelemetry()
setup_start = time.monotonic()
def handle_sigint(sig, frame):
nonlocal env_written
telem.step(
"setup_total",
"failure",
duration_ms=int((time.monotonic() - setup_start) * 1000),
error_message="User cancelled (SIGINT)",
)
print()
if not env_written:
info("Aborted before writing .env.prod — no files changed.")
else:
warn(f".env.prod was already written to {ENV_PROD}")
info(
"OpenBao tokens may still be placeholders if bootstrap didn't complete."
)
sys.exit(1)
signal.signal(signal.SIGINT, handle_sigint)
os.chdir(PROJECT_ROOT)
# Phase 1: Pre-flight
if not preflight(args):
telem.step("preflight", "failure")
return 1
telem.step("preflight", "success")
# Telemetry opt-in (right after preflight, before wizard)
config: dict = {}
wizard_telemetry(config, telem, args)
# Phase 2: Wizard
try:
wizard_build_mode(config, args)
wizard_database(config, args)
wizard_security(config)
wizard_admin(config, args)
wizard_email(config, args)
wizard_domain(config, args)
wizard_reverse_proxy(config, args)
telem.step("wizard", "success")
except Exception as e:
telem.step(
"wizard", "failure", error_message=str(e), error_code=type(e).__name__
)
raise
# Summary
if not show_summary(config, args):
info("Setup cancelled.")
telem.step(
"setup_total",
"failure",
duration_ms=int((time.monotonic() - setup_start) * 1000),
error_message="User cancelled at summary",
)
return 1
# Phase 3: Write files and prepare directories
section("Writing Configuration")
try:
write_env_prod(config)
write_init_sql_prod(config)
env_written = True
prepare_data_dirs()
telem.step("write_config", "success")
except Exception as e:
telem.step(
"write_config", "failure", error_message=str(e), error_code=type(e).__name__
)
raise
# Phase 4: OpenBao
t0 = time.monotonic()
bao_ok = bootstrap_openbao(config)
duration_ms = int((time.monotonic() - t0) * 1000)
if bao_ok:
telem.step("openbao_bootstrap", "success", duration_ms=duration_ms)
else:
telem.step(
"openbao_bootstrap",
"failure",
duration_ms=duration_ms,
error_message="OpenBao did not become healthy or credentials not found",
)
if not ask_yes_no(
"Continue without OpenBao credentials? (stack will need manual fix)",
default=False,
):
warn("Fix OpenBao credentials in .env.prod and re-run setup.py.")
telem.step(
"setup_total",
"failure",
duration_ms=int((time.monotonic() - setup_start) * 1000),
error_message="Aborted after OpenBao failure",
)
return 1
# Phase 5: Build or Pull
t0 = time.monotonic()
if config.get("build_mode") == "source":
images_ok = build_images()
step_name = "build_images"
fail_msg = "Docker build failed"
retry_hint = "Fix the build error and re-run setup.py to continue."
else:
images_ok = pull_images()
step_name = "pull_images"
fail_msg = "Image pull failed"
retry_hint = "Check your connection and re-run setup.py to continue."
if not images_ok:
duration_ms = int((time.monotonic() - t0) * 1000)
telem.step(step_name, "failure", duration_ms=duration_ms)
warn(retry_hint)
telem.step(
"setup_total",
"failure",
duration_ms=int((time.monotonic() - setup_start) * 1000),
error_message=fail_msg,
)
return 1
duration_ms = int((time.monotonic() - t0) * 1000)
telem.step(step_name, "success", duration_ms=duration_ms)
# Phase 6: Start
t0 = time.monotonic()
if not start_stack():
duration_ms = int((time.monotonic() - t0) * 1000)
telem.step("start_stack", "failure", duration_ms=duration_ms)
telem.step(
"setup_total",
"failure",
duration_ms=int((time.monotonic() - setup_start) * 1000),
error_message="Stack failed to start",
)
return 1
duration_ms = int((time.monotonic() - t0) * 1000)
telem.step("start_stack", "success", duration_ms=duration_ms)
# Phase 7: Health
t0 = time.monotonic()
health_check(config)
duration_ms = int((time.monotonic() - t0) * 1000)
telem.step("health_check", "success", duration_ms=duration_ms)
# Done
total_ms = int((time.monotonic() - setup_start) * 1000)
telem.step("setup_total", "success", duration_ms=total_ms)
return 0
if __name__ == "__main__":
sys.exit(main())