From ecbefae9ab69e11a047c313782e3a618b910fe88 Mon Sep 17 00:00:00 2001 From: Jason Staack Date: Mon, 16 Mar 2026 20:04:26 -0500 Subject: [PATCH] feat: add opt-in anonymous setup telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instruments setup.py to send per-step diagnostic events to the TOD telemetry collector when the user opts in. Uses a shared static token with no registration flow — fully anonymous. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup.py | 277 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 265 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index 076275b..80b126c 100755 --- a/setup.py +++ b/setup.py @@ -11,8 +11,10 @@ Usage: import base64 import datetime import getpass +import json import os import pathlib +import platform import re import secrets import shutil @@ -21,6 +23,8 @@ import socket import subprocess import sys import time +import urllib.error +import urllib.request # ── Constants ──────────────────────────────────────────────────────────────── @@ -95,6 +99,148 @@ def info(text: str) -> None: print(f" {dim('·')} {text}") +# ── Setup Telemetry ───────────────────────────────────────────────────────── + +_TELEMETRY_COLLECTOR = "https://telemetry.theotherdude.net" +_TELEMETRY_TOKEN = "75e320cbd48e20e3234ab4e734f86e124a903a7278e643cf6d383708a8a7fe4b" + + +def _collect_environment() -> dict: + """Gather allowlisted environment info. No IPs, hostnames, or secrets.""" + env = { + "os": platform.system(), + "os_version": platform.release(), + "arch": platform.machine(), + "python": platform.python_version(), + } + # Docker version + try: + r = subprocess.run( + ["docker", "version", "--format", "{{.Server.Version}}"], + capture_output=True, text=True, timeout=5, + ) + if r.returncode == 0: + env["docker"] = r.stdout.strip() + except Exception: + pass + # Compose version + try: + r = subprocess.run( + ["docker", "compose", "version", "--short"], + capture_output=True, text=True, timeout=5, + ) + if r.returncode == 0: + env["compose"] = r.stdout.strip() + except Exception: + pass + # RAM (rounded to nearest GB) + try: + if sys.platform == "darwin": + r = subprocess.run( + ["sysctl", "-n", "hw.memsize"], + capture_output=True, text=True, timeout=5, + ) + if r.returncode == 0: + env["ram_gb"] = round(int(r.stdout.strip()) / (1024 ** 3)) + else: + with open("/proc/meminfo") as f: + for line in f: + if line.startswith("MemTotal:"): + env["ram_gb"] = round(int(line.split()[1]) * 1024 / (1024 ** 3)) + break + except Exception: + pass + return env + + +def _get_app_version() -> tuple[str, str]: + """Return (version, build_id) from git if available.""" + version = "unknown" + build_id = "unknown" + try: + r = subprocess.run( + ["git", "describe", "--tags", "--always"], + capture_output=True, text=True, timeout=5, cwd=PROJECT_ROOT, + ) + if r.returncode == 0: + version = r.stdout.strip() + except Exception: + pass + try: + r = subprocess.run( + ["git", "rev-parse", "--short", "HEAD"], + capture_output=True, text=True, timeout=5, cwd=PROJECT_ROOT, + ) + if r.returncode == 0: + build_id = r.stdout.strip() + except Exception: + pass + return version, build_id + + +class SetupTelemetry: + """Lightweight fire-and-forget telemetry for setup diagnostics. + + When enabled, sends one event per setup step to the TOD telemetry collector. + All events use a shared anonymous token — no registration, no PII. + """ + + def __init__(self) -> None: + self.enabled = False + self._environment: dict = {} + self._app_version = "unknown" + self._build_id = "unknown" + + def enable(self) -> None: + self.enabled = True + self._environment = _collect_environment() + self._app_version, self._build_id = _get_app_version() + + def step(self, step_name: str, result: str, duration_ms: int | None = None, + error_message: str | None = None, error_code: str | None = None, + metrics: dict | None = None) -> None: + """Emit a single setup step event. No-op if disabled.""" + if not self.enabled: + return + + event: dict = { + "event_type": "setup", + "severity": "error" if result == "failure" else "info", + "phase": "setup", + "operation": step_name, + "result": result, + "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(), + "app_version": self._app_version, + "build_id": self._build_id, + "environment": self._environment, + } + if duration_ms is not None: + event["duration_ms"] = duration_ms + if error_message: + event["error"] = {"message": error_message[:500], "code": error_code or ""} + if metrics: + event["metrics"] = metrics + + self._send([event]) + + def _send(self, events: list[dict]) -> None: + """POST events to the collector. Fire-and-forget.""" + try: + body = json.dumps({"events": events}).encode() + req = urllib.request.Request( + f"{_TELEMETRY_COLLECTOR}/api/v1/ingest", + data=body, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {_TELEMETRY_TOKEN}", + }, + method="POST", + ) + urllib.request.urlopen(req, timeout=10) + except Exception: + pass # Fire-and-forget — never affect setup + + # ── Input helpers ──────────────────────────────────────────────────────────── def ask(prompt: str, default: str = "", required: bool = False, @@ -720,6 +866,27 @@ def wizard_reverse_proxy(config: dict) -> None: info("Traefik watches for file changes — no reload needed.") +def wizard_telemetry(config: dict, telem: SetupTelemetry) -> None: + section("Anonymous Diagnostics") + info("TOD can send anonymous setup and runtime diagnostics to help") + info("identify common failures. No personal data, IPs, hostnames,") + info("or configuration values are ever sent.") + print() + info("What is collected: step pass/fail, duration, OS/arch/Python") + info("version, Docker version, RAM (rounded), and error types.") + info("You can disable this anytime by setting TELEMETRY_ENABLED=false") + info("in .env.prod.") + print() + + if ask_yes_no("Send anonymous diagnostics?", default=False): + config["telemetry_enabled"] = True + telem.enable() + ok("Diagnostics enabled — thank you!") + else: + config["telemetry_enabled"] = False + info("No diagnostics will be sent.") + + # ── Summary ────────────────────────────────────────────────────────────────── def show_summary(config: dict) -> bool: @@ -765,6 +932,13 @@ def show_summary(config: dict) -> bool: print(f" {dim('(not configured)')}") print() + print(f" {bold('Diagnostics')}") + if config.get("telemetry_enabled"): + print(f" TELEMETRY_ENABLED = {green('true')}") + else: + print(f" TELEMETRY_ENABLED = {dim('false')}") + print() + print(f" {bold('OpenBao')}") print(f" {dim('(will be captured automatically during bootstrap)')}") print() @@ -868,6 +1042,11 @@ SSH_IDLE_TIMEOUT=900 # --- Config Backup --- CONFIG_BACKUP_INTERVAL=21600 CONFIG_BACKUP_MAX_CONCURRENT=10 + +# --- Telemetry --- +# Opt-in anonymous diagnostics. Set to false to disable. +TELEMETRY_ENABLED={'true' if config.get('telemetry_enabled') else 'false'} +TELEMETRY_COLLECTOR_URL={_TELEMETRY_COLLECTOR} """ ENV_PROD.write_text(content) @@ -1216,12 +1395,34 @@ def health_check(config: dict) -> None: # ── Main ───────────────────────────────────────────────────────────────────── +def _timed(telem: SetupTelemetry, step_name: str, func, *args, **kwargs): + """Run func, emit a telemetry event with timing. Returns func's result.""" + t0 = time.monotonic() + try: + result = func(*args, **kwargs) + duration_ms = int((time.monotonic() - t0) * 1000) + telem.step(step_name, "success", duration_ms=duration_ms) + return result + except Exception as e: + duration_ms = int((time.monotonic() - t0) * 1000) + telem.step( + step_name, "failure", duration_ms=duration_ms, + error_message=str(e), error_code=type(e).__name__, + ) + raise + + def main() -> int: # Graceful Ctrl+C env_written = False + telem = SetupTelemetry() + setup_start = time.monotonic() def handle_sigint(sig, frame): nonlocal env_written + telem.step("setup_total", "failure", + duration_ms=int((time.monotonic() - setup_start) * 1000), + error_message="User cancelled (SIGINT)") print() if not env_written: info("Aborted before writing .env.prod — no files changed.") @@ -1236,47 +1437,99 @@ def main() -> int: # Phase 1: Pre-flight if not preflight(): + telem.step("preflight", "failure") return 1 + telem.step("preflight", "success") + + # Telemetry opt-in (right after preflight, before wizard) + config: dict = {} + wizard_telemetry(config, telem) # Phase 2: Wizard - config: dict = {} - wizard_database(config) - wizard_security(config) - wizard_admin(config) - wizard_email(config) - wizard_domain(config) - wizard_reverse_proxy(config) + try: + wizard_database(config) + wizard_security(config) + wizard_admin(config) + wizard_email(config) + wizard_domain(config) + wizard_reverse_proxy(config) + telem.step("wizard", "success") + except Exception as e: + telem.step("wizard", "failure", + error_message=str(e), error_code=type(e).__name__) + raise # Summary if not show_summary(config): info("Setup cancelled.") + telem.step("setup_total", "failure", + duration_ms=int((time.monotonic() - setup_start) * 1000), + error_message="User cancelled at summary") return 1 # Phase 3: Write files and prepare directories section("Writing Configuration") - write_env_prod(config) - write_init_sql_prod(config) - env_written = True - prepare_data_dirs() + try: + write_env_prod(config) + write_init_sql_prod(config) + env_written = True + prepare_data_dirs() + telem.step("write_config", "success") + except Exception as e: + telem.step("write_config", "failure", + error_message=str(e), error_code=type(e).__name__) + raise # Phase 4: OpenBao + t0 = time.monotonic() bao_ok = bootstrap_openbao(config) - if not bao_ok: + duration_ms = int((time.monotonic() - t0) * 1000) + if bao_ok: + telem.step("openbao_bootstrap", "success", duration_ms=duration_ms) + else: + telem.step("openbao_bootstrap", "failure", duration_ms=duration_ms, + error_message="OpenBao did not become healthy or credentials not found") if not ask_yes_no("Continue without OpenBao credentials? (stack will need manual fix)", default=False): warn("Fix OpenBao credentials in .env.prod and re-run setup.py.") + telem.step("setup_total", "failure", + duration_ms=int((time.monotonic() - setup_start) * 1000), + error_message="Aborted after OpenBao failure") return 1 # Phase 5: Build + t0 = time.monotonic() if not build_images(): + duration_ms = int((time.monotonic() - t0) * 1000) + telem.step("build_images", "failure", duration_ms=duration_ms) warn("Fix the build error and re-run setup.py to continue.") + telem.step("setup_total", "failure", + duration_ms=int((time.monotonic() - setup_start) * 1000), + error_message="Docker build failed") return 1 + duration_ms = int((time.monotonic() - t0) * 1000) + telem.step("build_images", "success", duration_ms=duration_ms) # Phase 6: Start + t0 = time.monotonic() if not start_stack(): + duration_ms = int((time.monotonic() - t0) * 1000) + telem.step("start_stack", "failure", duration_ms=duration_ms) + telem.step("setup_total", "failure", + duration_ms=int((time.monotonic() - setup_start) * 1000), + error_message="Stack failed to start") return 1 + duration_ms = int((time.monotonic() - t0) * 1000) + telem.step("start_stack", "success", duration_ms=duration_ms) # Phase 7: Health + t0 = time.monotonic() health_check(config) + duration_ms = int((time.monotonic() - t0) * 1000) + telem.step("health_check", "success", duration_ms=duration_ms) + + # Done + total_ms = int((time.monotonic() - setup_start) * 1000) + telem.step("setup_total", "success", duration_ms=total_ms) return 0