feat: add opt-in anonymous setup telemetry
Instruments setup.py to send per-step diagnostic events to the TOD telemetry collector when the user opts in. Uses a shared static token with no registration flow — fully anonymous. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
257
setup.py
257
setup.py
@@ -11,8 +11,10 @@ Usage:
|
|||||||
import base64
|
import base64
|
||||||
import datetime
|
import datetime
|
||||||
import getpass
|
import getpass
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import platform
|
||||||
import re
|
import re
|
||||||
import secrets
|
import secrets
|
||||||
import shutil
|
import shutil
|
||||||
@@ -21,6 +23,8 @@ import socket
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
# ── Constants ────────────────────────────────────────────────────────────────
|
# ── Constants ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -95,6 +99,148 @@ def info(text: str) -> None:
|
|||||||
print(f" {dim('·')} {text}")
|
print(f" {dim('·')} {text}")
|
||||||
|
|
||||||
|
|
||||||
|
# ── Setup Telemetry ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_TELEMETRY_COLLECTOR = "https://telemetry.theotherdude.net"
|
||||||
|
_TELEMETRY_TOKEN = "75e320cbd48e20e3234ab4e734f86e124a903a7278e643cf6d383708a8a7fe4b"
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_environment() -> dict:
|
||||||
|
"""Gather allowlisted environment info. No IPs, hostnames, or secrets."""
|
||||||
|
env = {
|
||||||
|
"os": platform.system(),
|
||||||
|
"os_version": platform.release(),
|
||||||
|
"arch": platform.machine(),
|
||||||
|
"python": platform.python_version(),
|
||||||
|
}
|
||||||
|
# Docker version
|
||||||
|
try:
|
||||||
|
r = subprocess.run(
|
||||||
|
["docker", "version", "--format", "{{.Server.Version}}"],
|
||||||
|
capture_output=True, text=True, timeout=5,
|
||||||
|
)
|
||||||
|
if r.returncode == 0:
|
||||||
|
env["docker"] = r.stdout.strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Compose version
|
||||||
|
try:
|
||||||
|
r = subprocess.run(
|
||||||
|
["docker", "compose", "version", "--short"],
|
||||||
|
capture_output=True, text=True, timeout=5,
|
||||||
|
)
|
||||||
|
if r.returncode == 0:
|
||||||
|
env["compose"] = r.stdout.strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# RAM (rounded to nearest GB)
|
||||||
|
try:
|
||||||
|
if sys.platform == "darwin":
|
||||||
|
r = subprocess.run(
|
||||||
|
["sysctl", "-n", "hw.memsize"],
|
||||||
|
capture_output=True, text=True, timeout=5,
|
||||||
|
)
|
||||||
|
if r.returncode == 0:
|
||||||
|
env["ram_gb"] = round(int(r.stdout.strip()) / (1024 ** 3))
|
||||||
|
else:
|
||||||
|
with open("/proc/meminfo") as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith("MemTotal:"):
|
||||||
|
env["ram_gb"] = round(int(line.split()[1]) * 1024 / (1024 ** 3))
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
|
def _get_app_version() -> tuple[str, str]:
|
||||||
|
"""Return (version, build_id) from git if available."""
|
||||||
|
version = "unknown"
|
||||||
|
build_id = "unknown"
|
||||||
|
try:
|
||||||
|
r = subprocess.run(
|
||||||
|
["git", "describe", "--tags", "--always"],
|
||||||
|
capture_output=True, text=True, timeout=5, cwd=PROJECT_ROOT,
|
||||||
|
)
|
||||||
|
if r.returncode == 0:
|
||||||
|
version = r.stdout.strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
r = subprocess.run(
|
||||||
|
["git", "rev-parse", "--short", "HEAD"],
|
||||||
|
capture_output=True, text=True, timeout=5, cwd=PROJECT_ROOT,
|
||||||
|
)
|
||||||
|
if r.returncode == 0:
|
||||||
|
build_id = r.stdout.strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return version, build_id
|
||||||
|
|
||||||
|
|
||||||
|
class SetupTelemetry:
|
||||||
|
"""Lightweight fire-and-forget telemetry for setup diagnostics.
|
||||||
|
|
||||||
|
When enabled, sends one event per setup step to the TOD telemetry collector.
|
||||||
|
All events use a shared anonymous token — no registration, no PII.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.enabled = False
|
||||||
|
self._environment: dict = {}
|
||||||
|
self._app_version = "unknown"
|
||||||
|
self._build_id = "unknown"
|
||||||
|
|
||||||
|
def enable(self) -> None:
|
||||||
|
self.enabled = True
|
||||||
|
self._environment = _collect_environment()
|
||||||
|
self._app_version, self._build_id = _get_app_version()
|
||||||
|
|
||||||
|
def step(self, step_name: str, result: str, duration_ms: int | None = None,
|
||||||
|
error_message: str | None = None, error_code: str | None = None,
|
||||||
|
metrics: dict | None = None) -> None:
|
||||||
|
"""Emit a single setup step event. No-op if disabled."""
|
||||||
|
if not self.enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
event: dict = {
|
||||||
|
"event_type": "setup",
|
||||||
|
"severity": "error" if result == "failure" else "info",
|
||||||
|
"phase": "setup",
|
||||||
|
"operation": step_name,
|
||||||
|
"result": result,
|
||||||
|
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
||||||
|
"app_version": self._app_version,
|
||||||
|
"build_id": self._build_id,
|
||||||
|
"environment": self._environment,
|
||||||
|
}
|
||||||
|
if duration_ms is not None:
|
||||||
|
event["duration_ms"] = duration_ms
|
||||||
|
if error_message:
|
||||||
|
event["error"] = {"message": error_message[:500], "code": error_code or ""}
|
||||||
|
if metrics:
|
||||||
|
event["metrics"] = metrics
|
||||||
|
|
||||||
|
self._send([event])
|
||||||
|
|
||||||
|
def _send(self, events: list[dict]) -> None:
|
||||||
|
"""POST events to the collector. Fire-and-forget."""
|
||||||
|
try:
|
||||||
|
body = json.dumps({"events": events}).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{_TELEMETRY_COLLECTOR}/api/v1/ingest",
|
||||||
|
data=body,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {_TELEMETRY_TOKEN}",
|
||||||
|
},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
urllib.request.urlopen(req, timeout=10)
|
||||||
|
except Exception:
|
||||||
|
pass # Fire-and-forget — never affect setup
|
||||||
|
|
||||||
|
|
||||||
# ── Input helpers ────────────────────────────────────────────────────────────
|
# ── Input helpers ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def ask(prompt: str, default: str = "", required: bool = False,
|
def ask(prompt: str, default: str = "", required: bool = False,
|
||||||
@@ -720,6 +866,27 @@ def wizard_reverse_proxy(config: dict) -> None:
|
|||||||
info("Traefik watches for file changes — no reload needed.")
|
info("Traefik watches for file changes — no reload needed.")
|
||||||
|
|
||||||
|
|
||||||
|
def wizard_telemetry(config: dict, telem: SetupTelemetry) -> None:
|
||||||
|
section("Anonymous Diagnostics")
|
||||||
|
info("TOD can send anonymous setup and runtime diagnostics to help")
|
||||||
|
info("identify common failures. No personal data, IPs, hostnames,")
|
||||||
|
info("or configuration values are ever sent.")
|
||||||
|
print()
|
||||||
|
info("What is collected: step pass/fail, duration, OS/arch/Python")
|
||||||
|
info("version, Docker version, RAM (rounded), and error types.")
|
||||||
|
info("You can disable this anytime by setting TELEMETRY_ENABLED=false")
|
||||||
|
info("in .env.prod.")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if ask_yes_no("Send anonymous diagnostics?", default=False):
|
||||||
|
config["telemetry_enabled"] = True
|
||||||
|
telem.enable()
|
||||||
|
ok("Diagnostics enabled — thank you!")
|
||||||
|
else:
|
||||||
|
config["telemetry_enabled"] = False
|
||||||
|
info("No diagnostics will be sent.")
|
||||||
|
|
||||||
|
|
||||||
# ── Summary ──────────────────────────────────────────────────────────────────
|
# ── Summary ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def show_summary(config: dict) -> bool:
|
def show_summary(config: dict) -> bool:
|
||||||
@@ -765,6 +932,13 @@ def show_summary(config: dict) -> bool:
|
|||||||
print(f" {dim('(not configured)')}")
|
print(f" {dim('(not configured)')}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
print(f" {bold('Diagnostics')}")
|
||||||
|
if config.get("telemetry_enabled"):
|
||||||
|
print(f" TELEMETRY_ENABLED = {green('true')}")
|
||||||
|
else:
|
||||||
|
print(f" TELEMETRY_ENABLED = {dim('false')}")
|
||||||
|
print()
|
||||||
|
|
||||||
print(f" {bold('OpenBao')}")
|
print(f" {bold('OpenBao')}")
|
||||||
print(f" {dim('(will be captured automatically during bootstrap)')}")
|
print(f" {dim('(will be captured automatically during bootstrap)')}")
|
||||||
print()
|
print()
|
||||||
@@ -868,6 +1042,11 @@ SSH_IDLE_TIMEOUT=900
|
|||||||
# --- Config Backup ---
|
# --- Config Backup ---
|
||||||
CONFIG_BACKUP_INTERVAL=21600
|
CONFIG_BACKUP_INTERVAL=21600
|
||||||
CONFIG_BACKUP_MAX_CONCURRENT=10
|
CONFIG_BACKUP_MAX_CONCURRENT=10
|
||||||
|
|
||||||
|
# --- Telemetry ---
|
||||||
|
# Opt-in anonymous diagnostics. Set to false to disable.
|
||||||
|
TELEMETRY_ENABLED={'true' if config.get('telemetry_enabled') else 'false'}
|
||||||
|
TELEMETRY_COLLECTOR_URL={_TELEMETRY_COLLECTOR}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ENV_PROD.write_text(content)
|
ENV_PROD.write_text(content)
|
||||||
@@ -1216,12 +1395,34 @@ def health_check(config: dict) -> None:
|
|||||||
|
|
||||||
# ── Main ─────────────────────────────────────────────────────────────────────
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _timed(telem: SetupTelemetry, step_name: str, func, *args, **kwargs):
|
||||||
|
"""Run func, emit a telemetry event with timing. Returns func's result."""
|
||||||
|
t0 = time.monotonic()
|
||||||
|
try:
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
telem.step(step_name, "success", duration_ms=duration_ms)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
telem.step(
|
||||||
|
step_name, "failure", duration_ms=duration_ms,
|
||||||
|
error_message=str(e), error_code=type(e).__name__,
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
# Graceful Ctrl+C
|
# Graceful Ctrl+C
|
||||||
env_written = False
|
env_written = False
|
||||||
|
telem = SetupTelemetry()
|
||||||
|
setup_start = time.monotonic()
|
||||||
|
|
||||||
def handle_sigint(sig, frame):
|
def handle_sigint(sig, frame):
|
||||||
nonlocal env_written
|
nonlocal env_written
|
||||||
|
telem.step("setup_total", "failure",
|
||||||
|
duration_ms=int((time.monotonic() - setup_start) * 1000),
|
||||||
|
error_message="User cancelled (SIGINT)")
|
||||||
print()
|
print()
|
||||||
if not env_written:
|
if not env_written:
|
||||||
info("Aborted before writing .env.prod — no files changed.")
|
info("Aborted before writing .env.prod — no files changed.")
|
||||||
@@ -1236,47 +1437,99 @@ def main() -> int:
|
|||||||
|
|
||||||
# Phase 1: Pre-flight
|
# Phase 1: Pre-flight
|
||||||
if not preflight():
|
if not preflight():
|
||||||
|
telem.step("preflight", "failure")
|
||||||
return 1
|
return 1
|
||||||
|
telem.step("preflight", "success")
|
||||||
|
|
||||||
|
# Telemetry opt-in (right after preflight, before wizard)
|
||||||
|
config: dict = {}
|
||||||
|
wizard_telemetry(config, telem)
|
||||||
|
|
||||||
# Phase 2: Wizard
|
# Phase 2: Wizard
|
||||||
config: dict = {}
|
try:
|
||||||
wizard_database(config)
|
wizard_database(config)
|
||||||
wizard_security(config)
|
wizard_security(config)
|
||||||
wizard_admin(config)
|
wizard_admin(config)
|
||||||
wizard_email(config)
|
wizard_email(config)
|
||||||
wizard_domain(config)
|
wizard_domain(config)
|
||||||
wizard_reverse_proxy(config)
|
wizard_reverse_proxy(config)
|
||||||
|
telem.step("wizard", "success")
|
||||||
|
except Exception as e:
|
||||||
|
telem.step("wizard", "failure",
|
||||||
|
error_message=str(e), error_code=type(e).__name__)
|
||||||
|
raise
|
||||||
|
|
||||||
# Summary
|
# Summary
|
||||||
if not show_summary(config):
|
if not show_summary(config):
|
||||||
info("Setup cancelled.")
|
info("Setup cancelled.")
|
||||||
|
telem.step("setup_total", "failure",
|
||||||
|
duration_ms=int((time.monotonic() - setup_start) * 1000),
|
||||||
|
error_message="User cancelled at summary")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Phase 3: Write files and prepare directories
|
# Phase 3: Write files and prepare directories
|
||||||
section("Writing Configuration")
|
section("Writing Configuration")
|
||||||
|
try:
|
||||||
write_env_prod(config)
|
write_env_prod(config)
|
||||||
write_init_sql_prod(config)
|
write_init_sql_prod(config)
|
||||||
env_written = True
|
env_written = True
|
||||||
prepare_data_dirs()
|
prepare_data_dirs()
|
||||||
|
telem.step("write_config", "success")
|
||||||
|
except Exception as e:
|
||||||
|
telem.step("write_config", "failure",
|
||||||
|
error_message=str(e), error_code=type(e).__name__)
|
||||||
|
raise
|
||||||
|
|
||||||
# Phase 4: OpenBao
|
# Phase 4: OpenBao
|
||||||
|
t0 = time.monotonic()
|
||||||
bao_ok = bootstrap_openbao(config)
|
bao_ok = bootstrap_openbao(config)
|
||||||
if not bao_ok:
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
if bao_ok:
|
||||||
|
telem.step("openbao_bootstrap", "success", duration_ms=duration_ms)
|
||||||
|
else:
|
||||||
|
telem.step("openbao_bootstrap", "failure", duration_ms=duration_ms,
|
||||||
|
error_message="OpenBao did not become healthy or credentials not found")
|
||||||
if not ask_yes_no("Continue without OpenBao credentials? (stack will need manual fix)", default=False):
|
if not ask_yes_no("Continue without OpenBao credentials? (stack will need manual fix)", default=False):
|
||||||
warn("Fix OpenBao credentials in .env.prod and re-run setup.py.")
|
warn("Fix OpenBao credentials in .env.prod and re-run setup.py.")
|
||||||
|
telem.step("setup_total", "failure",
|
||||||
|
duration_ms=int((time.monotonic() - setup_start) * 1000),
|
||||||
|
error_message="Aborted after OpenBao failure")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Phase 5: Build
|
# Phase 5: Build
|
||||||
|
t0 = time.monotonic()
|
||||||
if not build_images():
|
if not build_images():
|
||||||
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
telem.step("build_images", "failure", duration_ms=duration_ms)
|
||||||
warn("Fix the build error and re-run setup.py to continue.")
|
warn("Fix the build error and re-run setup.py to continue.")
|
||||||
|
telem.step("setup_total", "failure",
|
||||||
|
duration_ms=int((time.monotonic() - setup_start) * 1000),
|
||||||
|
error_message="Docker build failed")
|
||||||
return 1
|
return 1
|
||||||
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
telem.step("build_images", "success", duration_ms=duration_ms)
|
||||||
|
|
||||||
# Phase 6: Start
|
# Phase 6: Start
|
||||||
|
t0 = time.monotonic()
|
||||||
if not start_stack():
|
if not start_stack():
|
||||||
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
telem.step("start_stack", "failure", duration_ms=duration_ms)
|
||||||
|
telem.step("setup_total", "failure",
|
||||||
|
duration_ms=int((time.monotonic() - setup_start) * 1000),
|
||||||
|
error_message="Stack failed to start")
|
||||||
return 1
|
return 1
|
||||||
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
telem.step("start_stack", "success", duration_ms=duration_ms)
|
||||||
|
|
||||||
# Phase 7: Health
|
# Phase 7: Health
|
||||||
|
t0 = time.monotonic()
|
||||||
health_check(config)
|
health_check(config)
|
||||||
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
telem.step("health_check", "success", duration_ms=duration_ms)
|
||||||
|
|
||||||
|
# Done
|
||||||
|
total_ms = int((time.monotonic() - setup_start) * 1000)
|
||||||
|
telem.step("setup_total", "success", duration_ms=total_ms)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user