feat: The Other Dude v9.0.1 — full-featured email system

ci: add GitHub Pages deployment workflow for docs site

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-08 17:46:37 -05:00
commit b840047e19
511 changed files with 106948 additions and 0 deletions

View File

@@ -0,0 +1,140 @@
"""Prometheus metrics and health check infrastructure.
Provides:
- setup_instrumentator(): Configures Prometheus auto-instrumentation for FastAPI
- check_health_ready(): Verifies PostgreSQL, Redis, and NATS connectivity for readiness probes
"""
import asyncio
import time
import structlog
from fastapi import FastAPI
from prometheus_fastapi_instrumentator import Instrumentator
logger = structlog.get_logger(__name__)
def setup_instrumentator(app: FastAPI) -> Instrumentator:
"""Configure and mount Prometheus metrics instrumentation.
Auto-instruments all HTTP endpoints with:
- http_requests_total (counter) by method, handler, status_code
- http_request_duration_seconds (histogram) by method, handler
- http_requests_in_progress (gauge)
The /metrics endpoint is mounted at root level (not under /api prefix).
Labels use handler templates (e.g., /api/tenants/{tenant_id}/...) not
resolved paths, ensuring bounded cardinality.
Must be called AFTER all routers are included so all routes are captured.
"""
instrumentator = Instrumentator(
should_group_status_codes=False,
should_ignore_untemplated=True,
excluded_handlers=["/health", "/health/ready", "/metrics", "/api/health"],
should_respect_env_var=False,
)
instrumentator.instrument(app)
instrumentator.expose(app, include_in_schema=False, should_gzip=True)
logger.info("prometheus instrumentation enabled", endpoint="/metrics")
return instrumentator
async def check_health_ready() -> dict:
"""Check readiness by verifying all critical dependencies.
Checks PostgreSQL, Redis, and NATS connectivity with 5-second timeouts.
Returns a structured result with per-dependency status and latency.
Returns:
dict with "status" ("healthy"|"unhealthy"), "version", and "checks"
containing per-dependency results.
"""
from app.config import settings
checks: dict[str, dict] = {}
all_healthy = True
# PostgreSQL check
checks["postgres"] = await _check_postgres()
if checks["postgres"]["status"] != "up":
all_healthy = False
# Redis check
checks["redis"] = await _check_redis(settings.REDIS_URL)
if checks["redis"]["status"] != "up":
all_healthy = False
# NATS check
checks["nats"] = await _check_nats(settings.NATS_URL)
if checks["nats"]["status"] != "up":
all_healthy = False
return {
"status": "healthy" if all_healthy else "unhealthy",
"version": settings.APP_VERSION,
"checks": checks,
}
async def _check_postgres() -> dict:
"""Verify PostgreSQL connectivity via the admin engine."""
start = time.monotonic()
try:
from sqlalchemy import text
from app.database import engine
async with engine.connect() as conn:
await asyncio.wait_for(
conn.execute(text("SELECT 1")),
timeout=5.0,
)
latency_ms = round((time.monotonic() - start) * 1000)
return {"status": "up", "latency_ms": latency_ms, "error": None}
except Exception as exc:
latency_ms = round((time.monotonic() - start) * 1000)
logger.warning("health check: postgres failed", error=str(exc))
return {"status": "down", "latency_ms": latency_ms, "error": str(exc)}
async def _check_redis(redis_url: str) -> dict:
"""Verify Redis connectivity."""
start = time.monotonic()
try:
import redis.asyncio as aioredis
client = aioredis.from_url(redis_url, socket_connect_timeout=5)
try:
await asyncio.wait_for(client.ping(), timeout=5.0)
finally:
await client.aclose()
latency_ms = round((time.monotonic() - start) * 1000)
return {"status": "up", "latency_ms": latency_ms, "error": None}
except Exception as exc:
latency_ms = round((time.monotonic() - start) * 1000)
logger.warning("health check: redis failed", error=str(exc))
return {"status": "down", "latency_ms": latency_ms, "error": str(exc)}
async def _check_nats(nats_url: str) -> dict:
"""Verify NATS connectivity."""
start = time.monotonic()
try:
import nats
nc = await asyncio.wait_for(
nats.connect(nats_url),
timeout=5.0,
)
try:
await nc.drain()
except Exception:
pass
latency_ms = round((time.monotonic() - start) * 1000)
return {"status": "up", "latency_ms": latency_ms, "error": None}
except Exception as exc:
latency_ms = round((time.monotonic() - start) * 1000)
logger.warning("health check: nats failed", error=str(exc))
return {"status": "down", "latency_ms": latency_ms, "error": str(exc)}