feat: The Other Dude v9.0.1 — full-featured email system

ci: add GitHub Pages deployment workflow for docs site

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-08 17:46:37 -05:00
commit b840047e19
511 changed files with 106948 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""FastAPI routers for all API endpoints."""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,172 @@
"""API key management endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/api-keys:
- List all keys (active + revoked)
- Create new key (returns plaintext once)
- Revoke key (soft delete)
RBAC: tenant_admin or above for all operations.
RLS enforced via get_db() (app_user engine with tenant context).
"""
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services.api_key_service import (
ALLOWED_SCOPES,
create_api_key,
list_api_keys,
revoke_api_key,
)
router = APIRouter(tags=["api-keys"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
# ---------------------------------------------------------------------------
# Request/response schemas
# ---------------------------------------------------------------------------
class ApiKeyCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
scopes: list[str]
expires_at: Optional[datetime] = None
class ApiKeyResponse(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: str
name: str
key_prefix: str
scopes: list[str]
expires_at: Optional[str] = None
last_used_at: Optional[str] = None
created_at: str
revoked_at: Optional[str] = None
class ApiKeyCreateResponse(ApiKeyResponse):
"""Extended response that includes the plaintext key (shown once)."""
key: str
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("/tenants/{tenant_id}/api-keys", response_model=list[ApiKeyResponse])
async def list_keys(
tenant_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
) -> list[dict]:
"""List all API keys for a tenant."""
await _check_tenant_access(current_user, tenant_id, db)
keys = await list_api_keys(db, tenant_id)
# Convert UUID ids to strings for response
for k in keys:
k["id"] = str(k["id"])
return keys
@router.post(
"/tenants/{tenant_id}/api-keys",
response_model=ApiKeyCreateResponse,
status_code=status.HTTP_201_CREATED,
)
async def create_key(
tenant_id: uuid.UUID,
body: ApiKeyCreate,
db: AsyncSession = Depends(get_db),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
) -> dict:
"""Create a new API key. The plaintext key is returned only once."""
await _check_tenant_access(current_user, tenant_id, db)
# Validate scopes against allowed list
invalid_scopes = set(body.scopes) - ALLOWED_SCOPES
if invalid_scopes:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid scopes: {', '.join(sorted(invalid_scopes))}. "
f"Allowed: {', '.join(sorted(ALLOWED_SCOPES))}",
)
if not body.scopes:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="At least one scope is required.",
)
result = await create_api_key(
db=db,
tenant_id=tenant_id,
user_id=current_user.user_id,
name=body.name,
scopes=body.scopes,
expires_at=body.expires_at,
)
return {
"id": str(result["id"]),
"name": result["name"],
"key_prefix": result["key_prefix"],
"key": result["key"],
"scopes": result["scopes"],
"expires_at": result["expires_at"].isoformat() if result["expires_at"] else None,
"last_used_at": None,
"created_at": result["created_at"].isoformat() if result["created_at"] else None,
"revoked_at": None,
}
@router.delete("/tenants/{tenant_id}/api-keys/{key_id}", status_code=status.HTTP_200_OK)
async def revoke_key(
tenant_id: uuid.UUID,
key_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
) -> dict:
"""Revoke an API key (soft delete -- sets revoked_at timestamp)."""
await _check_tenant_access(current_user, tenant_id, db)
success = await revoke_api_key(db, tenant_id, key_id)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="API key not found or already revoked.",
)
return {"status": "revoked", "key_id": str(key_id)}

View File

@@ -0,0 +1,294 @@
"""Audit log API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/ for:
- Paginated, filterable audit log listing
- CSV export of audit logs
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: operator and above can view audit logs.
Phase 30: Audit log details are encrypted at rest via Transit (Tier 2).
When encrypted_details is set, the router decrypts via Transit on-demand
and returns the plaintext details in the response. Structural fields
(action, resource_type, timestamp, ip_address) are always plaintext.
"""
import asyncio
import csv
import io
import json
import logging
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy import and_, func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["audit-logs"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_operator(current_user: CurrentUser) -> None:
"""Raise 403 if user does not have at least operator role."""
allowed = {"super_admin", "admin", "operator"}
if current_user.role not in allowed:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="At least operator role required to view audit logs.",
)
async def _decrypt_audit_details(
encrypted_details: str | None,
plaintext_details: dict[str, Any] | None,
tenant_id: str,
) -> dict[str, Any]:
"""Decrypt encrypted audit log details via Transit, falling back to plaintext.
Priority:
1. If encrypted_details is set, decrypt via Transit and parse as JSON.
2. If decryption fails, return plaintext details as fallback.
3. If neither available, return empty dict.
"""
if encrypted_details:
try:
from app.services.crypto import decrypt_data_transit
decrypted_json = await decrypt_data_transit(encrypted_details, tenant_id)
return json.loads(decrypted_json)
except Exception:
logger.warning(
"Failed to decrypt audit details for tenant %s, using plaintext fallback",
tenant_id,
exc_info=True,
)
# Fall through to plaintext
return plaintext_details if plaintext_details else {}
async def _decrypt_details_batch(
rows: list[Any],
tenant_id: str,
) -> list[dict[str, Any]]:
"""Decrypt encrypted_details for a batch of audit log rows concurrently.
Uses asyncio.gather with limited concurrency to avoid overwhelming OpenBao.
Rows without encrypted_details return their plaintext details directly.
"""
semaphore = asyncio.Semaphore(10) # Limit concurrent Transit calls
async def _decrypt_one(row: Any) -> dict[str, Any]:
async with semaphore:
return await _decrypt_audit_details(
row.get("encrypted_details"),
row.get("details"),
tenant_id,
)
return list(await asyncio.gather(*[_decrypt_one(row) for row in rows]))
# ---------------------------------------------------------------------------
# Response models
# ---------------------------------------------------------------------------
class AuditLogItem(BaseModel):
id: str
user_email: Optional[str] = None
action: str
resource_type: Optional[str] = None
resource_id: Optional[str] = None
device_name: Optional[str] = None
details: dict[str, Any] = {}
ip_address: Optional[str] = None
created_at: str
class AuditLogResponse(BaseModel):
items: list[AuditLogItem]
total: int
page: int
per_page: int
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/audit-logs",
response_model=AuditLogResponse,
summary="List audit logs with pagination and filters",
)
async def list_audit_logs(
tenant_id: uuid.UUID,
page: int = Query(default=1, ge=1),
per_page: int = Query(default=50, ge=1, le=100),
action: Optional[str] = Query(default=None),
user_id: Optional[uuid.UUID] = Query(default=None),
device_id: Optional[uuid.UUID] = Query(default=None),
date_from: Optional[datetime] = Query(default=None),
date_to: Optional[datetime] = Query(default=None),
format: Optional[str] = Query(default=None, description="Set to 'csv' for CSV export"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Any:
_require_operator(current_user)
await _check_tenant_access(current_user, tenant_id, db)
# Build filter conditions using parameterized text fragments
conditions = [text("a.tenant_id = :tenant_id")]
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
if action:
conditions.append(text("a.action = :action"))
params["action"] = action
if user_id:
conditions.append(text("a.user_id = :user_id"))
params["user_id"] = str(user_id)
if device_id:
conditions.append(text("a.device_id = :device_id"))
params["device_id"] = str(device_id)
if date_from:
conditions.append(text("a.created_at >= :date_from"))
params["date_from"] = date_from.isoformat()
if date_to:
conditions.append(text("a.created_at <= :date_to"))
params["date_to"] = date_to.isoformat()
where_clause = and_(*conditions)
# Shared SELECT columns for data queries
_data_columns = text(
"a.id, u.email AS user_email, a.action, a.resource_type, "
"a.resource_id, d.hostname AS device_name, a.details, "
"a.encrypted_details, a.ip_address, a.created_at"
)
_data_from = text(
"audit_logs a "
"LEFT JOIN users u ON a.user_id = u.id "
"LEFT JOIN devices d ON a.device_id = d.id"
)
# Count total
count_result = await db.execute(
select(func.count()).select_from(text("audit_logs a")).where(where_clause),
params,
)
total = count_result.scalar() or 0
# CSV export -- no pagination limit
if format == "csv":
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("a.created_at DESC")),
params,
)
all_rows = result.mappings().all()
# Decrypt encrypted details concurrently
decrypted_details = await _decrypt_details_batch(
all_rows, str(tenant_id)
)
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"ID", "User Email", "Action", "Resource Type",
"Resource ID", "Device", "Details", "IP Address", "Timestamp",
])
for row, details in zip(all_rows, decrypted_details):
details_str = json.dumps(details) if details else "{}"
writer.writerow([
str(row["id"]),
row["user_email"] or "",
row["action"],
row["resource_type"] or "",
row["resource_id"] or "",
row["device_name"] or "",
details_str,
row["ip_address"] or "",
str(row["created_at"]),
])
output.seek(0)
return StreamingResponse(
iter([output.getvalue()]),
media_type="text/csv",
headers={"Content-Disposition": "attachment; filename=audit-logs.csv"},
)
# Paginated query
offset = (page - 1) * per_page
params["limit"] = per_page
params["offset"] = offset
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("a.created_at DESC"))
.limit(per_page)
.offset(offset),
params,
)
rows = result.mappings().all()
# Decrypt encrypted details concurrently (skips rows without encrypted_details)
decrypted_details = await _decrypt_details_batch(rows, str(tenant_id))
items = [
AuditLogItem(
id=str(row["id"]),
user_email=row["user_email"],
action=row["action"],
resource_type=row["resource_type"],
resource_id=row["resource_id"],
device_name=row["device_name"],
details=details,
ip_address=row["ip_address"],
created_at=row["created_at"].isoformat() if row["created_at"] else "",
)
for row, details in zip(rows, decrypted_details)
]
return AuditLogResponse(
items=items,
total=total,
page=page,
per_page=per_page,
)

1052
backend/app/routers/auth.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,763 @@
"""Certificate Authority management API endpoints.
Provides the full certificate lifecycle for tenant CAs:
- CA initialization and info retrieval
- Per-device certificate signing
- Certificate deployment via NATS to Go poller (SFTP + RouterOS import)
- Bulk deployment across multiple devices
- Certificate rotation and revocation
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read-only (GET); tenant_admin and above = mutating actions.
"""
import json
import logging
import uuid
from datetime import datetime, timezone
import nats
import nats.aio.client
import nats.errors
import structlog
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from fastapi.responses import PlainTextResponse
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.certificate import CertificateAuthority, DeviceCertificate
from app.models.device import Device
from app.schemas.certificate import (
BulkCertDeployRequest,
CACreateRequest,
CAResponse,
CertDeployResponse,
CertSignRequest,
DeviceCertResponse,
)
from app.services.audit_service import log_action
from app.services.ca_service import (
generate_ca,
get_ca_for_tenant,
get_cert_for_deploy,
get_device_certs,
sign_device_cert,
update_cert_status,
)
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["certificates"])
# Module-level NATS connection for cert deployment (lazy initialized)
_nc: nats.aio.client.Client | None = None
async def _get_nats() -> nats.aio.client.Client:
"""Get or create a NATS connection for certificate deployment requests."""
global _nc
if _nc is None or _nc.is_closed:
_nc = await nats.connect(settings.NATS_URL)
logger.info("Certificate NATS connection established")
return _nc
async def _deploy_cert_via_nats(
device_id: str,
cert_pem: str,
key_pem: str,
cert_name: str,
ssh_port: int = 22,
) -> dict:
"""Send a certificate deployment request to the Go poller via NATS.
Args:
device_id: Target device UUID string.
cert_pem: PEM-encoded device certificate.
key_pem: PEM-encoded device private key (decrypted).
cert_name: Name for the cert on the device (e.g., "portal-device-cert").
ssh_port: SSH port for SFTP upload (default 22).
Returns:
Dict with success, cert_name_on_device, and error fields.
"""
nc = await _get_nats()
payload = json.dumps({
"device_id": device_id,
"cert_pem": cert_pem,
"key_pem": key_pem,
"cert_name": cert_name,
"ssh_port": ssh_port,
}).encode()
try:
reply = await nc.request(
f"cert.deploy.{device_id}",
payload,
timeout=60.0,
)
return json.loads(reply.data)
except nats.errors.TimeoutError:
return {
"success": False,
"error": "Certificate deployment timed out -- device may be offline or unreachable",
}
except Exception as exc:
logger.error("NATS cert deploy request failed", device_id=device_id, error=str(exc))
return {"success": False, "error": str(exc)}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _get_device_for_tenant(
db: AsyncSession, device_id: uuid.UUID, current_user: CurrentUser
) -> Device:
"""Fetch a device and verify tenant ownership."""
result = await db.execute(
select(Device).where(Device.id == device_id)
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
return device
async def _get_tenant_id(
current_user: CurrentUser,
db: AsyncSession,
tenant_id_override: uuid.UUID | None = None,
) -> uuid.UUID:
"""Extract tenant_id from the current user, handling super_admin.
Super admins must provide tenant_id_override (from query param).
Regular users use their own tenant_id.
"""
if current_user.is_super_admin:
if tenant_id_override is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Super admin must provide tenant_id query parameter.",
)
# Set RLS context for the selected tenant
await set_tenant_context(db, str(tenant_id_override))
return tenant_id_override
if current_user.tenant_id is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No tenant context available.",
)
return current_user.tenant_id
async def _get_cert_with_tenant_check(
db: AsyncSession, cert_id: uuid.UUID, tenant_id: uuid.UUID
) -> DeviceCertificate:
"""Fetch a device certificate and verify tenant ownership."""
result = await db.execute(
select(DeviceCertificate).where(DeviceCertificate.id == cert_id)
)
cert = result.scalar_one_or_none()
if cert is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Certificate {cert_id} not found",
)
# RLS should enforce this, but double-check
if cert.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Certificate {cert_id} not found",
)
return cert
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.post(
"/ca",
response_model=CAResponse,
status_code=status.HTTP_201_CREATED,
summary="Initialize a Certificate Authority for the tenant",
)
@limiter.limit("5/minute")
async def create_ca(
request: Request,
body: CACreateRequest,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> CAResponse:
"""Generate a self-signed root CA for the tenant.
Each tenant may have at most one CA. Returns 409 if a CA already exists.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
# Check if CA already exists
existing = await get_ca_for_tenant(db, tenant_id)
if existing is not None:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Tenant already has a Certificate Authority. Delete it before creating a new one.",
)
ca = await generate_ca(
db,
tenant_id,
body.common_name,
body.validity_years,
settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "ca_create",
resource_type="certificate_authority", resource_id=str(ca.id),
details={"common_name": body.common_name, "validity_years": body.validity_years},
)
except Exception:
pass
logger.info("CA created", tenant_id=str(tenant_id), ca_id=str(ca.id))
return CAResponse.model_validate(ca)
@router.get(
"/ca",
response_model=CAResponse,
summary="Get tenant CA information",
)
async def get_ca(
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> CAResponse:
"""Return the tenant's CA public information (no private key)."""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured for this tenant.",
)
return CAResponse.model_validate(ca)
@router.get(
"/ca/pem",
response_class=PlainTextResponse,
summary="Download the CA public certificate (PEM)",
)
async def get_ca_pem(
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> PlainTextResponse:
"""Return the CA's public certificate in PEM format.
Users can import this into their trust store to validate device connections.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured for this tenant.",
)
return PlainTextResponse(
content=ca.cert_pem,
media_type="application/x-pem-file",
headers={"Content-Disposition": "attachment; filename=portal-ca.pem"},
)
@router.post(
"/sign",
response_model=DeviceCertResponse,
status_code=status.HTTP_201_CREATED,
summary="Sign a certificate for a device",
)
@limiter.limit("20/minute")
async def sign_cert(
request: Request,
body: CertSignRequest,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> DeviceCertResponse:
"""Sign a per-device TLS certificate using the tenant's CA.
The device must belong to the tenant. The cert uses CN=hostname, SAN=IP+DNS.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
# Verify device belongs to tenant (RLS enforces, but also get device data)
device = await _get_device_for_tenant(db, body.device_id, current_user)
# Get tenant CA
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured. Initialize a CA first.",
)
cert = await sign_device_cert(
db,
ca,
body.device_id,
device.hostname,
device.ip_address,
body.validity_days,
settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_sign",
resource_type="device_certificate", resource_id=str(cert.id),
device_id=body.device_id,
details={"hostname": device.hostname, "validity_days": body.validity_days},
)
except Exception:
pass
logger.info("Device cert signed", device_id=str(body.device_id), cert_id=str(cert.id))
return DeviceCertResponse.model_validate(cert)
@router.post(
"/{cert_id}/deploy",
response_model=CertDeployResponse,
summary="Deploy a signed certificate to a device",
)
@limiter.limit("20/minute")
async def deploy_cert(
request: Request,
cert_id: uuid.UUID,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> CertDeployResponse:
"""Deploy a signed certificate to a device via NATS/SFTP.
The Go poller receives the cert, uploads it via SFTP, imports it,
and assigns it to the api-ssl service on the RouterOS device.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
cert = await _get_cert_with_tenant_check(db, cert_id, tenant_id)
# Update status to deploying
try:
await update_cert_status(db, cert_id, "deploying")
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=str(e),
)
# Get decrypted cert data for deployment
try:
cert_pem, key_pem, _ca_cert_pem = await get_cert_for_deploy(
db, cert_id, settings.get_encryption_key_bytes()
)
except ValueError as e:
# Rollback status
await update_cert_status(db, cert_id, "issued")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to prepare cert for deployment: {e}",
)
# Flush DB changes before NATS call so deploying status is persisted
await db.flush()
# Send deployment command via NATS
result = await _deploy_cert_via_nats(
device_id=str(cert.device_id),
cert_pem=cert_pem,
key_pem=key_pem,
cert_name="portal-device-cert",
)
if result.get("success"):
# Update cert status to deployed
await update_cert_status(db, cert_id, "deployed")
# Update device tls_mode to portal_ca
device_result = await db.execute(
select(Device).where(Device.id == cert.device_id)
)
device = device_result.scalar_one_or_none()
if device is not None:
device.tls_mode = "portal_ca"
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_deploy",
resource_type="device_certificate", resource_id=str(cert_id),
device_id=cert.device_id,
details={"cert_name_on_device": result.get("cert_name_on_device")},
)
except Exception:
pass
logger.info(
"Certificate deployed successfully",
cert_id=str(cert_id),
device_id=str(cert.device_id),
cert_name_on_device=result.get("cert_name_on_device"),
)
return CertDeployResponse(
success=True,
device_id=cert.device_id,
cert_name_on_device=result.get("cert_name_on_device"),
)
else:
# Rollback status to issued
await update_cert_status(db, cert_id, "issued")
logger.warning(
"Certificate deployment failed",
cert_id=str(cert_id),
device_id=str(cert.device_id),
error=result.get("error"),
)
return CertDeployResponse(
success=False,
device_id=cert.device_id,
error=result.get("error"),
)
@router.post(
"/deploy/bulk",
response_model=list[CertDeployResponse],
summary="Bulk deploy certificates to multiple devices",
)
@limiter.limit("5/minute")
async def bulk_deploy(
request: Request,
body: BulkCertDeployRequest,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> list[CertDeployResponse]:
"""Deploy certificates to multiple devices sequentially.
For each device: signs a cert if none exists (status=issued), then deploys.
Sequential deployment per project patterns (no concurrent NATS calls).
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
# Get tenant CA
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured. Initialize a CA first.",
)
results: list[CertDeployResponse] = []
for device_id in body.device_ids:
try:
# Get device info
device = await _get_device_for_tenant(db, device_id, current_user)
# Check if device already has an issued cert
existing_certs = await get_device_certs(db, tenant_id, device_id)
issued_cert = None
for c in existing_certs:
if c.status == "issued":
issued_cert = c
break
# Sign a new cert if none exists in issued state
if issued_cert is None:
issued_cert = await sign_device_cert(
db,
ca,
device_id,
device.hostname,
device.ip_address,
730, # Default 2 years
settings.get_encryption_key_bytes(),
)
await db.flush()
# Deploy the cert
await update_cert_status(db, issued_cert.id, "deploying")
cert_pem, key_pem, _ca_cert_pem = await get_cert_for_deploy(
db, issued_cert.id, settings.get_encryption_key_bytes()
)
await db.flush()
result = await _deploy_cert_via_nats(
device_id=str(device_id),
cert_pem=cert_pem,
key_pem=key_pem,
cert_name="portal-device-cert",
)
if result.get("success"):
await update_cert_status(db, issued_cert.id, "deployed")
device.tls_mode = "portal_ca"
results.append(CertDeployResponse(
success=True,
device_id=device_id,
cert_name_on_device=result.get("cert_name_on_device"),
))
else:
await update_cert_status(db, issued_cert.id, "issued")
results.append(CertDeployResponse(
success=False,
device_id=device_id,
error=result.get("error"),
))
except HTTPException as e:
results.append(CertDeployResponse(
success=False,
device_id=device_id,
error=e.detail,
))
except Exception as e:
logger.error("Bulk deploy error", device_id=str(device_id), error=str(e))
results.append(CertDeployResponse(
success=False,
device_id=device_id,
error=str(e),
))
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_bulk_deploy",
resource_type="device_certificate",
details={
"device_count": len(body.device_ids),
"successful": sum(1 for r in results if r.success),
"failed": sum(1 for r in results if not r.success),
},
)
except Exception:
pass
return results
@router.get(
"/devices",
response_model=list[DeviceCertResponse],
summary="List device certificates",
)
async def list_device_certs(
device_id: uuid.UUID | None = Query(None, description="Filter by device ID"),
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> list[DeviceCertResponse]:
"""List device certificates for the tenant.
Optionally filter by device_id. Excludes superseded certs.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
certs = await get_device_certs(db, tenant_id, device_id)
return [DeviceCertResponse.model_validate(c) for c in certs]
@router.post(
"/{cert_id}/revoke",
response_model=DeviceCertResponse,
summary="Revoke a device certificate",
)
@limiter.limit("5/minute")
async def revoke_cert(
request: Request,
cert_id: uuid.UUID,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> DeviceCertResponse:
"""Revoke a device certificate and reset the device TLS mode to insecure."""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
cert = await _get_cert_with_tenant_check(db, cert_id, tenant_id)
try:
updated_cert = await update_cert_status(db, cert_id, "revoked")
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=str(e),
)
# Reset device tls_mode to insecure
device_result = await db.execute(
select(Device).where(Device.id == cert.device_id)
)
device = device_result.scalar_one_or_none()
if device is not None:
device.tls_mode = "insecure"
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_revoke",
resource_type="device_certificate", resource_id=str(cert_id),
device_id=cert.device_id,
)
except Exception:
pass
logger.info("Certificate revoked", cert_id=str(cert_id), device_id=str(cert.device_id))
return DeviceCertResponse.model_validate(updated_cert)
@router.post(
"/{cert_id}/rotate",
response_model=CertDeployResponse,
summary="Rotate a device certificate",
)
@limiter.limit("5/minute")
async def rotate_cert(
request: Request,
cert_id: uuid.UUID,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> CertDeployResponse:
"""Rotate a device certificate: supersede the old cert, sign a new one, and deploy it.
This is equivalent to: mark old cert as superseded, sign new cert, deploy new cert.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
old_cert = await _get_cert_with_tenant_check(db, cert_id, tenant_id)
# Get the device for hostname/IP
device_result = await db.execute(
select(Device).where(Device.id == old_cert.device_id)
)
device = device_result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {old_cert.device_id} not found",
)
# Get tenant CA
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured.",
)
# Mark old cert as superseded
try:
await update_cert_status(db, cert_id, "superseded")
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=str(e),
)
# Sign new cert
new_cert = await sign_device_cert(
db,
ca,
old_cert.device_id,
device.hostname,
device.ip_address,
730, # Default 2 years
settings.get_encryption_key_bytes(),
)
await db.flush()
# Deploy new cert
await update_cert_status(db, new_cert.id, "deploying")
cert_pem, key_pem, _ca_cert_pem = await get_cert_for_deploy(
db, new_cert.id, settings.get_encryption_key_bytes()
)
await db.flush()
result = await _deploy_cert_via_nats(
device_id=str(old_cert.device_id),
cert_pem=cert_pem,
key_pem=key_pem,
cert_name="portal-device-cert",
)
if result.get("success"):
await update_cert_status(db, new_cert.id, "deployed")
device.tls_mode = "portal_ca"
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_rotate",
resource_type="device_certificate", resource_id=str(new_cert.id),
device_id=old_cert.device_id,
details={
"old_cert_id": str(cert_id),
"cert_name_on_device": result.get("cert_name_on_device"),
},
)
except Exception:
pass
logger.info(
"Certificate rotated successfully",
old_cert_id=str(cert_id),
new_cert_id=str(new_cert.id),
device_id=str(old_cert.device_id),
)
return CertDeployResponse(
success=True,
device_id=old_cert.device_id,
cert_name_on_device=result.get("cert_name_on_device"),
)
else:
# Rollback: mark new cert as issued (deploy failed)
await update_cert_status(db, new_cert.id, "issued")
logger.warning(
"Certificate rotation deploy failed",
new_cert_id=str(new_cert.id),
device_id=str(old_cert.device_id),
error=result.get("error"),
)
return CertDeployResponse(
success=False,
device_id=old_cert.device_id,
error=result.get("error"),
)

View File

@@ -0,0 +1,297 @@
"""
Client device discovery API endpoint.
Fetches ARP, DHCP lease, and wireless registration data from a RouterOS device
via the NATS command proxy, merges by MAC address, and returns a unified client list.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/devices/{device_id}/clients
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer and above (read-only operation).
"""
import asyncio
import uuid
from datetime import datetime, timezone
from typing import Any
import structlog
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.services import routeros_proxy
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["clients"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
async def _check_device_online(
db: AsyncSession, device_id: uuid.UUID
) -> Device:
"""Verify the device exists and is online. Returns the Device object."""
result = await db.execute(
select(Device).where(Device.id == device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
if device.status != "online":
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Device is offline -- client discovery requires a live connection.",
)
return device
# ---------------------------------------------------------------------------
# MAC-address merge logic
# ---------------------------------------------------------------------------
def _normalize_mac(mac: str) -> str:
"""Normalize a MAC address to uppercase colon-separated format."""
return mac.strip().upper().replace("-", ":")
def _merge_client_data(
arp_data: list[dict[str, Any]],
dhcp_data: list[dict[str, Any]],
wireless_data: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Merge ARP, DHCP lease, and wireless registration data by MAC address.
ARP entries are the base. DHCP enriches with hostname. Wireless enriches
with signal/tx/rx/uptime and marks the client as wireless.
"""
# Index DHCP leases by MAC
dhcp_by_mac: dict[str, dict[str, Any]] = {}
for lease in dhcp_data:
mac_raw = lease.get("mac-address") or lease.get("active-mac-address", "")
if mac_raw:
dhcp_by_mac[_normalize_mac(mac_raw)] = lease
# Index wireless registrations by MAC
wireless_by_mac: dict[str, dict[str, Any]] = {}
for reg in wireless_data:
mac_raw = reg.get("mac-address", "")
if mac_raw:
wireless_by_mac[_normalize_mac(mac_raw)] = reg
# Track which MACs we've already processed (from ARP)
seen_macs: set[str] = set()
clients: list[dict[str, Any]] = []
# Start with ARP entries as base
for entry in arp_data:
mac_raw = entry.get("mac-address", "")
if not mac_raw:
continue
mac = _normalize_mac(mac_raw)
if mac in seen_macs:
continue
seen_macs.add(mac)
# Determine status: ARP complete flag or dynamic flag
is_complete = entry.get("complete", "true").lower() == "true"
arp_status = "reachable" if is_complete else "stale"
client: dict[str, Any] = {
"mac": mac,
"ip": entry.get("address", ""),
"interface": entry.get("interface", ""),
"hostname": None,
"status": arp_status,
"signal_strength": None,
"tx_rate": None,
"rx_rate": None,
"uptime": None,
"is_wireless": False,
}
# Enrich with DHCP data
dhcp = dhcp_by_mac.get(mac)
if dhcp:
client["hostname"] = dhcp.get("host-name") or None
dhcp_status = dhcp.get("status", "")
if dhcp_status:
client["dhcp_status"] = dhcp_status
# Enrich with wireless data
wireless = wireless_by_mac.get(mac)
if wireless:
client["is_wireless"] = True
client["signal_strength"] = wireless.get("signal-strength") or None
client["tx_rate"] = wireless.get("tx-rate") or None
client["rx_rate"] = wireless.get("rx-rate") or None
client["uptime"] = wireless.get("uptime") or None
clients.append(client)
# Also include DHCP-only entries (no ARP match -- e.g. expired leases)
for mac, lease in dhcp_by_mac.items():
if mac in seen_macs:
continue
seen_macs.add(mac)
client = {
"mac": mac,
"ip": lease.get("active-address") or lease.get("address", ""),
"interface": lease.get("active-server") or "",
"hostname": lease.get("host-name") or None,
"status": "stale", # No ARP entry = not actively reachable
"signal_strength": None,
"tx_rate": None,
"rx_rate": None,
"uptime": None,
"is_wireless": mac in wireless_by_mac,
}
wireless = wireless_by_mac.get(mac)
if wireless:
client["signal_strength"] = wireless.get("signal-strength") or None
client["tx_rate"] = wireless.get("tx-rate") or None
client["rx_rate"] = wireless.get("rx-rate") or None
client["uptime"] = wireless.get("uptime") or None
clients.append(client)
return clients
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/clients",
summary="List connected client devices (ARP + DHCP + wireless)",
)
async def list_clients(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Discover all client devices connected to a MikroTik device.
Fetches ARP table, DHCP server leases, and wireless registration table
in parallel, then merges by MAC address into a unified client list.
Wireless fetch failure is non-fatal (device may not have wireless interfaces).
DHCP fetch failure is non-fatal (device may not run a DHCP server).
ARP fetch failure is fatal (core data source).
"""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
device_id_str = str(device_id)
# Fetch all three sources in parallel
arp_result, dhcp_result, wireless_result = await asyncio.gather(
routeros_proxy.execute_command(device_id_str, "/ip/arp/print"),
routeros_proxy.execute_command(device_id_str, "/ip/dhcp-server/lease/print"),
routeros_proxy.execute_command(
device_id_str, "/interface/wireless/registration-table/print"
),
return_exceptions=True,
)
# ARP is required -- if it failed, return 502
if isinstance(arp_result, Exception):
logger.error("ARP fetch exception", device_id=device_id_str, error=str(arp_result))
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Failed to fetch ARP table: {arp_result}",
)
if not arp_result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=arp_result.get("error", "Failed to fetch ARP table"),
)
arp_data: list[dict[str, Any]] = arp_result.get("data", [])
# DHCP is optional -- log warning and continue with empty data
dhcp_data: list[dict[str, Any]] = []
if isinstance(dhcp_result, Exception):
logger.warning(
"DHCP fetch exception (continuing without DHCP data)",
device_id=device_id_str,
error=str(dhcp_result),
)
elif not dhcp_result.get("success"):
logger.warning(
"DHCP fetch failed (continuing without DHCP data)",
device_id=device_id_str,
error=dhcp_result.get("error"),
)
else:
dhcp_data = dhcp_result.get("data", [])
# Wireless is optional -- many devices have no wireless interfaces
wireless_data: list[dict[str, Any]] = []
if isinstance(wireless_result, Exception):
logger.warning(
"Wireless fetch exception (device may not have wireless interfaces)",
device_id=device_id_str,
error=str(wireless_result),
)
elif not wireless_result.get("success"):
logger.warning(
"Wireless fetch failed (device may not have wireless interfaces)",
device_id=device_id_str,
error=wireless_result.get("error"),
)
else:
wireless_data = wireless_result.get("data", [])
# Merge by MAC address
clients = _merge_client_data(arp_data, dhcp_data, wireless_data)
logger.info(
"client_discovery_complete",
device_id=device_id_str,
tenant_id=str(tenant_id),
arp_count=len(arp_data),
dhcp_count=len(dhcp_data),
wireless_count=len(wireless_data),
merged_count=len(clients),
)
return {
"clients": clients,
"device_id": device_id_str,
"timestamp": datetime.now(timezone.utc).isoformat(),
}

View File

@@ -0,0 +1,745 @@
"""
Config backup API endpoints.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/devices/{device_id}/config/
Provides:
- GET /backups — list backup timeline
- POST /backups — trigger manual backup
- POST /checkpoint — create a checkpoint (restore point)
- GET /backups/{sha}/export — retrieve export.rsc text
- GET /backups/{sha}/binary — download backup.bin
- POST /preview-restore — preview impact analysis before restore
- POST /restore — restore a config version (two-phase panic-revert)
- POST /emergency-rollback — rollback to most recent pre-push backup
- GET /schedules — view effective backup schedule
- PUT /schedules — create/update device-specific schedule override
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read-only (GET); operator and above = write (POST/PUT).
"""
import asyncio
import logging
import uuid
from datetime import timezone, datetime
from typing import Any
from fastapi import APIRouter, Depends, HTTPException, Request, status
from fastapi.responses import Response
from pydantic import BaseModel, ConfigDict
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role, require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.config_backup import ConfigBackupRun, ConfigBackupSchedule
from app.config import settings
from app.models.device import Device
from app.services import backup_service, git_store
from app.services import restore_service
from app.services.crypto import decrypt_credentials_hybrid
from app.services.rsc_parser import parse_rsc, validate_rsc, compute_impact
logger = logging.getLogger(__name__)
router = APIRouter(tags=["config-backups"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""
Verify the current user is allowed to access the given tenant.
- super_admin can access any tenant — re-sets DB tenant context to target tenant.
- All other roles must match their own tenant_id.
"""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# ---------------------------------------------------------------------------
# Request/Response schemas
# ---------------------------------------------------------------------------
class RestoreRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
commit_sha: str
class ScheduleUpdate(BaseModel):
model_config = ConfigDict(extra="forbid")
cron_expression: str
enabled: bool
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/backups",
summary="List backup timeline for a device",
dependencies=[require_scope("config:read")],
)
async def list_backups(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return backup timeline for a device, newest first.
Each entry includes: id, commit_sha, trigger_type, lines_added,
lines_removed, and created_at.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigBackupRun)
.where(
ConfigBackupRun.device_id == device_id, # type: ignore[arg-type]
ConfigBackupRun.tenant_id == tenant_id, # type: ignore[arg-type]
)
.order_by(ConfigBackupRun.created_at.desc())
)
runs = result.scalars().all()
return [
{
"id": str(run.id),
"commit_sha": run.commit_sha,
"trigger_type": run.trigger_type,
"lines_added": run.lines_added,
"lines_removed": run.lines_removed,
"encryption_tier": run.encryption_tier,
"created_at": run.created_at.isoformat(),
}
for run in runs
]
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/backups",
summary="Trigger a manual config backup",
status_code=status.HTTP_201_CREATED,
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def trigger_backup(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Trigger an immediate manual backup for a device.
Captures export.rsc and backup.bin via SSH, commits to the tenant's
git store, and records a ConfigBackupRun with trigger_type='manual'.
Returns the backup metadata dict.
"""
await _check_tenant_access(current_user, tenant_id, db)
try:
result = await backup_service.run_backup(
device_id=str(device_id),
tenant_id=str(tenant_id),
trigger_type="manual",
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Manual backup failed for device %s tenant %s: %s",
device_id,
tenant_id,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Backup failed: {exc}",
) from exc
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/checkpoint",
summary="Create a checkpoint (restore point) of the current config",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def create_checkpoint(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Create a checkpoint (restore point) of the current device config.
Identical to a manual backup but tagged with trigger_type='checkpoint'.
Checkpoints serve as named restore points that operators create before
making risky changes, so they can easily roll back.
"""
await _check_tenant_access(current_user, tenant_id, db)
try:
result = await backup_service.run_backup(
device_id=str(device_id),
tenant_id=str(tenant_id),
trigger_type="checkpoint",
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Checkpoint backup failed for device %s tenant %s: %s",
device_id,
tenant_id,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Checkpoint failed: {exc}",
) from exc
return result
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/backups/{commit_sha}/export",
summary="Get export.rsc text for a specific backup",
response_class=Response,
dependencies=[require_scope("config:read")],
)
async def get_export(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
commit_sha: str,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> Response:
"""Return the raw /export compact text for a specific backup version.
For encrypted backups (encryption_tier != NULL), the Transit ciphertext
stored in git is decrypted on-demand before returning plaintext.
Legacy plaintext backups (encryption_tier = NULL) are returned as-is.
Content-Type: text/plain
"""
await _check_tenant_access(current_user, tenant_id, db)
loop = asyncio.get_event_loop()
try:
content_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
commit_sha,
str(device_id),
"export.rsc",
)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Backup version not found: {exc}",
) from exc
# Check if this backup is encrypted — decrypt via Transit if so
result = await db.execute(
select(ConfigBackupRun).where(
ConfigBackupRun.commit_sha == commit_sha,
ConfigBackupRun.device_id == device_id,
)
)
backup_run = result.scalar_one_or_none()
if backup_run and backup_run.encryption_tier:
try:
from app.services.crypto import decrypt_data_transit
plaintext = await decrypt_data_transit(
content_bytes.decode("utf-8"), str(tenant_id)
)
content_bytes = plaintext.encode("utf-8")
except Exception as dec_err:
logger.error(
"Failed to decrypt export for device %s sha %s: %s",
device_id, commit_sha, dec_err,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to decrypt backup content",
) from dec_err
return Response(content=content_bytes, media_type="text/plain")
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/backups/{commit_sha}/binary",
summary="Download backup.bin for a specific backup",
response_class=Response,
dependencies=[require_scope("config:read")],
)
async def get_binary(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
commit_sha: str,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> Response:
"""Download the RouterOS binary backup file for a specific backup version.
For encrypted backups, the Transit ciphertext is decrypted and the
base64-encoded binary is decoded back to raw bytes before returning.
Legacy plaintext backups are returned as-is.
Content-Type: application/octet-stream (attachment download).
"""
await _check_tenant_access(current_user, tenant_id, db)
loop = asyncio.get_event_loop()
try:
content_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
commit_sha,
str(device_id),
"backup.bin",
)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Backup version not found: {exc}",
) from exc
# Check if this backup is encrypted — decrypt via Transit if so
result = await db.execute(
select(ConfigBackupRun).where(
ConfigBackupRun.commit_sha == commit_sha,
ConfigBackupRun.device_id == device_id,
)
)
backup_run = result.scalar_one_or_none()
if backup_run and backup_run.encryption_tier:
try:
import base64 as b64
from app.services.crypto import decrypt_data_transit
# Transit ciphertext -> base64-encoded binary -> raw bytes
b64_plaintext = await decrypt_data_transit(
content_bytes.decode("utf-8"), str(tenant_id)
)
content_bytes = b64.b64decode(b64_plaintext)
except Exception as dec_err:
logger.error(
"Failed to decrypt binary backup for device %s sha %s: %s",
device_id, commit_sha, dec_err,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to decrypt backup content",
) from dec_err
return Response(
content=content_bytes,
media_type="application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="backup-{commit_sha[:8]}.bin"'
},
)
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/preview-restore",
summary="Preview the impact of restoring a config backup",
dependencies=[require_scope("config:read")],
)
@limiter.limit("20/minute")
async def preview_restore(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: RestoreRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Preview the impact of restoring a config backup before executing.
Reads the target config from the git backup, fetches the current config
from the live device (falling back to the latest backup if unreachable),
and returns a diff with categories, risk levels, warnings, and validation.
"""
await _check_tenant_access(current_user, tenant_id, db)
loop = asyncio.get_event_loop()
# 1. Read target export from git
try:
target_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
body.commit_sha,
str(device_id),
"export.rsc",
)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Backup export not found: {exc}",
) from exc
target_text = target_bytes.decode("utf-8", errors="replace")
# 2. Get current export from device (live) or fallback to latest backup
current_text = ""
try:
result = await db.execute(
select(Device).where(Device.id == device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device and (device.encrypted_credentials_transit or device.encrypted_credentials):
key = settings.get_encryption_key_bytes()
creds_json = await decrypt_credentials_hybrid(
device.encrypted_credentials_transit,
device.encrypted_credentials,
str(tenant_id),
key,
)
import json
creds = json.loads(creds_json)
current_text = await backup_service.capture_export(
device.ip_address,
username=creds.get("username", "admin"),
password=creds.get("password", ""),
)
except Exception:
# Fallback to latest backup in git
logger.debug(
"Live export failed for device %s, falling back to latest backup",
device_id,
)
latest = await db.execute(
select(ConfigBackupRun)
.where(
ConfigBackupRun.device_id == device_id, # type: ignore[arg-type]
)
.order_by(ConfigBackupRun.created_at.desc())
.limit(1)
)
latest_run = latest.scalar_one_or_none()
if latest_run:
try:
current_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
latest_run.commit_sha,
str(device_id),
"export.rsc",
)
current_text = current_bytes.decode("utf-8", errors="replace")
except Exception:
current_text = ""
# 3. Parse and analyze
current_parsed = parse_rsc(current_text)
target_parsed = parse_rsc(target_text)
validation = validate_rsc(target_text)
impact = compute_impact(current_parsed, target_parsed)
return {
"diff": impact["diff"],
"categories": impact["categories"],
"warnings": impact["warnings"],
"validation": validation,
}
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/restore",
summary="Restore a config version (two-phase push with panic-revert)",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def restore_config_endpoint(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: RestoreRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Restore a device config to a specific backup version.
Implements two-phase push with panic-revert:
1. Pre-backup is taken on device (mandatory before any push)
2. RouterOS scheduler is installed as safety net (auto-reverts if unreachable)
3. Config is pushed via /import
4. Wait 60s for config to settle
5. Reachability check — remove scheduler if device is reachable
6. Return committed/reverted/failed status
Returns: {"status": str, "message": str, "pre_backup_sha": str}
"""
await _check_tenant_access(current_user, tenant_id, db)
try:
result = await restore_service.restore_config(
device_id=str(device_id),
tenant_id=str(tenant_id),
commit_sha=body.commit_sha,
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Restore failed for device %s tenant %s commit %s: %s",
device_id,
tenant_id,
body.commit_sha,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Restore failed: {exc}",
) from exc
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/emergency-rollback",
summary="Emergency rollback to most recent pre-push backup",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def emergency_rollback(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Emergency rollback: restore the most recent pre-push backup.
Used when a device goes offline after a config push.
Finds the latest 'pre-restore', 'checkpoint', or 'pre-template-push'
backup and restores it via the two-phase panic-revert process.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigBackupRun)
.where(
ConfigBackupRun.device_id == device_id, # type: ignore[arg-type]
ConfigBackupRun.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupRun.trigger_type.in_(
["pre-restore", "checkpoint", "pre-template-push"]
),
)
.order_by(ConfigBackupRun.created_at.desc())
.limit(1)
)
backup = result.scalar_one_or_none()
if not backup:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No pre-push backup found for rollback",
)
try:
restore_result = await restore_service.restore_config(
device_id=str(device_id),
tenant_id=str(tenant_id),
commit_sha=backup.commit_sha,
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Emergency rollback failed for device %s tenant %s: %s",
device_id,
tenant_id,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Emergency rollback failed: {exc}",
) from exc
return {
**restore_result,
"rolled_back_to": backup.commit_sha,
"rolled_back_to_date": backup.created_at.isoformat(),
}
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/schedules",
summary="Get effective backup schedule for a device",
dependencies=[require_scope("config:read")],
)
async def get_schedule(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Return the effective backup schedule for a device.
Returns the device-specific override if it exists; falls back to the
tenant-level default. If no schedule is configured, returns a synthetic
default (2am UTC daily, enabled=True).
"""
await _check_tenant_access(current_user, tenant_id, db)
# Check for device-specific override first
result = await db.execute(
select(ConfigBackupSchedule).where(
ConfigBackupSchedule.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupSchedule.device_id == device_id, # type: ignore[arg-type]
)
)
schedule = result.scalar_one_or_none()
if schedule is None:
# Fall back to tenant-level default
result = await db.execute(
select(ConfigBackupSchedule).where(
ConfigBackupSchedule.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupSchedule.device_id.is_(None), # type: ignore[union-attr]
)
)
schedule = result.scalar_one_or_none()
if schedule is None:
# No schedule configured — return synthetic default
return {
"id": None,
"tenant_id": str(tenant_id),
"device_id": str(device_id),
"cron_expression": "0 2 * * *",
"enabled": True,
"is_default": True,
}
is_device_specific = schedule.device_id is not None
return {
"id": str(schedule.id),
"tenant_id": str(schedule.tenant_id),
"device_id": str(schedule.device_id) if schedule.device_id else None,
"cron_expression": schedule.cron_expression,
"enabled": schedule.enabled,
"is_default": not is_device_specific,
}
@router.put(
"/tenants/{tenant_id}/devices/{device_id}/config/schedules",
summary="Create or update the device-specific backup schedule",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def update_schedule(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: ScheduleUpdate,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Create or update the device-specific backup schedule override.
If no device-specific schedule exists, creates one. If one exists, updates
its cron_expression and enabled fields.
Returns the updated schedule.
"""
await _check_tenant_access(current_user, tenant_id, db)
# Look for existing device-specific schedule
result = await db.execute(
select(ConfigBackupSchedule).where(
ConfigBackupSchedule.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupSchedule.device_id == device_id, # type: ignore[arg-type]
)
)
schedule = result.scalar_one_or_none()
if schedule is None:
# Create new device-specific schedule
schedule = ConfigBackupSchedule(
tenant_id=tenant_id,
device_id=device_id,
cron_expression=body.cron_expression,
enabled=body.enabled,
)
db.add(schedule)
else:
# Update existing schedule
schedule.cron_expression = body.cron_expression
schedule.enabled = body.enabled
await db.flush()
# Hot-reload the scheduler so changes take effect immediately
from app.services.backup_scheduler import on_schedule_change
await on_schedule_change(tenant_id, device_id)
return {
"id": str(schedule.id),
"tenant_id": str(schedule.tenant_id),
"device_id": str(schedule.device_id),
"cron_expression": schedule.cron_expression,
"enabled": schedule.enabled,
"is_default": False,
}

View File

@@ -0,0 +1,371 @@
"""
Dynamic RouterOS config editor API endpoints.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/devices/{device_id}/config-editor/
Proxies commands to the Go poller's CmdResponder via the RouterOS proxy service.
Provides:
- GET /browse -- browse a RouterOS menu path
- POST /add -- add a new entry
- POST /set -- edit an existing entry
- POST /remove -- delete an entry
- POST /execute -- execute an arbitrary CLI command
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read-only (GET browse); operator and above = write (POST).
"""
import uuid
import structlog
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role, require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.security.command_blocklist import check_command_safety, check_path_safety
from app.services import routeros_proxy
from app.services.audit_service import log_action
logger = structlog.get_logger(__name__)
audit_logger = structlog.get_logger("audit")
router = APIRouter(tags=["config-editor"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
from app.database import set_tenant_context
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# Set RLS context for regular users too
await set_tenant_context(db, str(tenant_id))
async def _check_device_online(
db: AsyncSession, device_id: uuid.UUID
) -> Device:
"""Verify the device exists and is online. Returns the Device object."""
result = await db.execute(
select(Device).where(Device.id == device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
if device.status != "online":
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Device is offline \u2014 config editor requires a live connection.",
)
return device
# ---------------------------------------------------------------------------
# Request schemas
# ---------------------------------------------------------------------------
class AddEntryRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
properties: dict[str, str]
class SetEntryRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
entry_id: str | None = None # Optional for singleton paths (e.g. /ip/dns)
properties: dict[str, str]
class RemoveEntryRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
entry_id: str
class ExecuteRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
command: str
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/browse",
summary="Browse a RouterOS menu path",
dependencies=[require_scope("config:read")],
)
async def browse_menu(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
path: str = Query("/interface", description="RouterOS menu path to browse"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Browse a RouterOS menu path and return all entries at that path."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(path)
result = await routeros_proxy.browse_menu(str(device_id), path)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to browse menu path"),
)
audit_logger.info(
"routeros_config_browsed",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
path=path,
)
return {
"success": True,
"entries": result.get("data", []),
"error": None,
"path": path,
}
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/add",
summary="Add a new entry to a RouterOS menu path",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def add_entry(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: AddEntryRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Add a new entry to a RouterOS menu path with the given properties."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(body.path, write=True)
result = await routeros_proxy.add_entry(str(device_id), body.path, body.properties)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to add entry"),
)
audit_logger.info(
"routeros_config_added",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
path=body.path,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_add",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"path": body.path, "properties": body.properties},
)
except Exception:
pass
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/set",
summary="Edit an existing entry in a RouterOS menu path",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def set_entry(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: SetEntryRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Update an existing entry's properties on the device."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(body.path, write=True)
result = await routeros_proxy.update_entry(
str(device_id), body.path, body.entry_id, body.properties
)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to update entry"),
)
audit_logger.info(
"routeros_config_modified",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
path=body.path,
entry_id=body.entry_id,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_set",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"path": body.path, "entry_id": body.entry_id, "properties": body.properties},
)
except Exception:
pass
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/remove",
summary="Delete an entry from a RouterOS menu path",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def remove_entry(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: RemoveEntryRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Remove an entry from a RouterOS menu path."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(body.path, write=True)
result = await routeros_proxy.remove_entry(
str(device_id), body.path, body.entry_id
)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to remove entry"),
)
audit_logger.info(
"routeros_config_removed",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
path=body.path,
entry_id=body.entry_id,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_remove",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"path": body.path, "entry_id": body.entry_id},
)
except Exception:
pass
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/execute",
summary="Execute an arbitrary RouterOS CLI command",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def execute_command(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: ExecuteRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Execute an arbitrary RouterOS CLI command on the device."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_command_safety(body.command)
result = await routeros_proxy.execute_cli(str(device_id), body.command)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to execute command"),
)
audit_logger.info(
"routeros_command_executed",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
command=body.command,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_execute",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"command": body.command},
)
except Exception:
pass
return result

View File

@@ -0,0 +1,94 @@
"""
Device group management API endpoints.
Routes: /api/tenants/{tenant_id}/device-groups
RBAC:
- viewer: GET (read-only)
- operator: POST, PUT (write)
- tenant_admin/admin: DELETE
"""
import uuid
from fastapi import APIRouter, Depends, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_operator_or_above, require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.routers.devices import _check_tenant_access
from app.schemas.device import DeviceGroupCreate, DeviceGroupResponse, DeviceGroupUpdate
from app.services import device as device_service
router = APIRouter(tags=["device-groups"])
@router.get(
"/tenants/{tenant_id}/device-groups",
response_model=list[DeviceGroupResponse],
summary="List device groups",
)
async def list_groups(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[DeviceGroupResponse]:
"""List all device groups for a tenant. Viewer role and above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.get_groups(db=db, tenant_id=tenant_id)
@router.post(
"/tenants/{tenant_id}/device-groups",
response_model=DeviceGroupResponse,
status_code=status.HTTP_201_CREATED,
summary="Create a device group",
dependencies=[Depends(require_operator_or_above)],
)
async def create_group(
tenant_id: uuid.UUID,
data: DeviceGroupCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceGroupResponse:
"""Create a new device group. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.create_group(db=db, tenant_id=tenant_id, data=data)
@router.put(
"/tenants/{tenant_id}/device-groups/{group_id}",
response_model=DeviceGroupResponse,
summary="Update a device group",
dependencies=[Depends(require_operator_or_above)],
)
async def update_group(
tenant_id: uuid.UUID,
group_id: uuid.UUID,
data: DeviceGroupUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceGroupResponse:
"""Update a device group. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.update_group(
db=db, tenant_id=tenant_id, group_id=group_id, data=data
)
@router.delete(
"/tenants/{tenant_id}/device-groups/{group_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a device group",
dependencies=[Depends(require_tenant_admin_or_above)],
)
async def delete_group(
tenant_id: uuid.UUID,
group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Delete a device group. Requires tenant_admin or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.delete_group(db=db, tenant_id=tenant_id, group_id=group_id)

View File

@@ -0,0 +1,150 @@
"""
Device syslog fetch endpoint via NATS RouterOS proxy.
Provides:
- GET /tenants/{tenant_id}/devices/{device_id}/logs -- fetch device log entries
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: viewer and above can read logs.
"""
import uuid
import structlog
from fastapi import APIRouter, Depends, HTTPException, Query, status
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services import routeros_proxy
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["device-logs"])
# ---------------------------------------------------------------------------
# Helpers (same pattern as config_editor.py)
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
async def _check_device_exists(
db: AsyncSession, device_id: uuid.UUID
) -> None:
"""Verify the device exists (does not require online status for logs)."""
from sqlalchemy import select
from app.models.device import Device
result = await db.execute(
select(Device).where(Device.id == device_id)
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
# ---------------------------------------------------------------------------
# Response model
# ---------------------------------------------------------------------------
class LogEntry(BaseModel):
time: str
topics: str
message: str
class LogsResponse(BaseModel):
logs: list[LogEntry]
device_id: str
count: int
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/logs",
response_model=LogsResponse,
summary="Fetch device syslog entries via RouterOS API",
dependencies=[Depends(require_min_role("viewer"))],
)
async def get_device_logs(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
limit: int = Query(default=100, ge=1, le=500),
topic: str | None = Query(default=None, description="Filter by log topic"),
search: str | None = Query(default=None, description="Search in message/topics"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> LogsResponse:
"""Fetch device log entries via the RouterOS /log/print command."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_exists(db, device_id)
# Build RouterOS command args
args = [f"=count={limit}"]
if topic:
args.append(f"?topics={topic}")
result = await routeros_proxy.execute_command(
str(device_id), "/log/print", args=args, timeout=15.0
)
if not result.get("success"):
error_msg = result.get("error", "Unknown error fetching logs")
logger.warning(
"failed to fetch device logs",
device_id=str(device_id),
error=error_msg,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Failed to fetch device logs: {error_msg}",
)
# Parse log entries from RouterOS response
raw_entries = result.get("data", [])
logs: list[LogEntry] = []
for entry in raw_entries:
log_entry = LogEntry(
time=entry.get("time", ""),
topics=entry.get("topics", ""),
message=entry.get("message", ""),
)
# Apply search filter (case-insensitive) if provided
if search:
search_lower = search.lower()
if (
search_lower not in log_entry.message.lower()
and search_lower not in log_entry.topics.lower()
):
continue
logs.append(log_entry)
return LogsResponse(
logs=logs,
device_id=str(device_id),
count=len(logs),
)

View File

@@ -0,0 +1,94 @@
"""
Device tag management API endpoints.
Routes: /api/tenants/{tenant_id}/device-tags
RBAC:
- viewer: GET (read-only)
- operator: POST, PUT (write)
- tenant_admin/admin: DELETE
"""
import uuid
from fastapi import APIRouter, Depends, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_operator_or_above, require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.routers.devices import _check_tenant_access
from app.schemas.device import DeviceTagCreate, DeviceTagResponse, DeviceTagUpdate
from app.services import device as device_service
router = APIRouter(tags=["device-tags"])
@router.get(
"/tenants/{tenant_id}/device-tags",
response_model=list[DeviceTagResponse],
summary="List device tags",
)
async def list_tags(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[DeviceTagResponse]:
"""List all device tags for a tenant. Viewer role and above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.get_tags(db=db, tenant_id=tenant_id)
@router.post(
"/tenants/{tenant_id}/device-tags",
response_model=DeviceTagResponse,
status_code=status.HTTP_201_CREATED,
summary="Create a device tag",
dependencies=[Depends(require_operator_or_above)],
)
async def create_tag(
tenant_id: uuid.UUID,
data: DeviceTagCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceTagResponse:
"""Create a new device tag. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.create_tag(db=db, tenant_id=tenant_id, data=data)
@router.put(
"/tenants/{tenant_id}/device-tags/{tag_id}",
response_model=DeviceTagResponse,
summary="Update a device tag",
dependencies=[Depends(require_operator_or_above)],
)
async def update_tag(
tenant_id: uuid.UUID,
tag_id: uuid.UUID,
data: DeviceTagUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceTagResponse:
"""Update a device tag. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.update_tag(
db=db, tenant_id=tenant_id, tag_id=tag_id, data=data
)
@router.delete(
"/tenants/{tenant_id}/device-tags/{tag_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a device tag",
dependencies=[Depends(require_tenant_admin_or_above)],
)
async def delete_tag(
tenant_id: uuid.UUID,
tag_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Delete a device tag. Requires tenant_admin or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.delete_tag(db=db, tenant_id=tenant_id, tag_id=tag_id)

View File

@@ -0,0 +1,452 @@
"""
Device management API endpoints.
All routes are tenant-scoped under /api/tenants/{tenant_id}/devices.
RLS is enforced via PostgreSQL — the app_user engine automatically filters
cross-tenant data based on the SET LOCAL app.current_tenant context set by
get_current_user dependency.
RBAC:
- viewer: GET (read-only)
- operator: POST, PUT (write)
- admin/tenant_admin: DELETE
"""
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.services.audit_service import log_action
from app.middleware.rbac import (
require_min_role,
require_operator_or_above,
require_scope,
require_tenant_admin_or_above,
)
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.schemas.device import (
BulkAddRequest,
BulkAddResult,
DeviceCreate,
DeviceListResponse,
DeviceResponse,
DeviceUpdate,
SubnetScanRequest,
SubnetScanResponse,
)
from app.services import device as device_service
from app.services.scanner import scan_subnet
router = APIRouter(tags=["devices"])
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""
Verify the current user is allowed to access the given tenant.
- super_admin can access any tenant — re-sets DB tenant context to target tenant.
- All other roles must match their own tenant_id.
"""
if current_user.is_super_admin:
# Re-set tenant context to the target tenant so RLS allows the operation
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# ---------------------------------------------------------------------------
# Device CRUD
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices",
response_model=DeviceListResponse,
summary="List devices with pagination and filtering",
dependencies=[require_scope("devices:read")],
)
async def list_devices(
tenant_id: uuid.UUID,
page: int = Query(1, ge=1, description="Page number (1-based)"),
page_size: int = Query(25, ge=1, le=100, description="Items per page (1-100)"),
status_filter: Optional[str] = Query(None, alias="status"),
search: Optional[str] = Query(None, description="Text search on hostname or IP"),
tag_id: Optional[uuid.UUID] = Query(None),
group_id: Optional[uuid.UUID] = Query(None),
sort_by: str = Query("created_at", description="Field to sort by"),
sort_order: str = Query("desc", description="asc or desc"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceListResponse:
"""List devices for a tenant with optional pagination, filtering, and sorting."""
await _check_tenant_access(current_user, tenant_id, db)
items, total = await device_service.get_devices(
db=db,
tenant_id=tenant_id,
page=page,
page_size=page_size,
status=status_filter,
search=search,
tag_id=tag_id,
group_id=group_id,
sort_by=sort_by,
sort_order=sort_order,
)
return DeviceListResponse(items=items, total=total, page=page, page_size=page_size)
@router.post(
"/tenants/{tenant_id}/devices",
response_model=DeviceResponse,
status_code=status.HTTP_201_CREATED,
summary="Add a device (validates TCP connectivity first)",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def create_device(
request: Request,
tenant_id: uuid.UUID,
data: DeviceCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceResponse:
"""
Create a new device. Requires operator role or above.
The device IP/port is TCP-probed before the record is saved.
Credentials are encrypted with AES-256-GCM before storage and never returned.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await device_service.create_device(
db=db,
tenant_id=tenant_id,
data=data,
encryption_key=settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_create",
resource_type="device", resource_id=str(result.id),
details={"hostname": data.hostname, "ip_address": data.ip_address},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
return result
@router.get(
"/tenants/{tenant_id}/devices/{device_id}",
response_model=DeviceResponse,
summary="Get a single device",
dependencies=[require_scope("devices:read")],
)
async def get_device(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceResponse:
"""Get device details. Viewer role and above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.get_device(db=db, tenant_id=tenant_id, device_id=device_id)
@router.put(
"/tenants/{tenant_id}/devices/{device_id}",
response_model=DeviceResponse,
summary="Update a device",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def update_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
data: DeviceUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceResponse:
"""Update device fields. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
result = await device_service.update_device(
db=db,
tenant_id=tenant_id,
device_id=device_id,
data=data,
encryption_key=settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_update",
resource_type="device", resource_id=str(device_id),
device_id=device_id,
details={"changes": data.model_dump(exclude_unset=True)},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
return result
@router.delete(
"/tenants/{tenant_id}/devices/{device_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a device",
dependencies=[Depends(require_tenant_admin_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def delete_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Hard-delete a device. Requires tenant_admin or above."""
await _check_tenant_access(current_user, tenant_id, db)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_delete",
resource_type="device", resource_id=str(device_id),
device_id=device_id,
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
await device_service.delete_device(db=db, tenant_id=tenant_id, device_id=device_id)
# ---------------------------------------------------------------------------
# Subnet scan and bulk add
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/devices/scan",
response_model=SubnetScanResponse,
summary="Scan a subnet for MikroTik devices",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def scan_devices(
request: Request,
tenant_id: uuid.UUID,
data: SubnetScanRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> SubnetScanResponse:
"""
Scan a CIDR subnet for hosts with open RouterOS API ports (8728/8729).
Returns a list of discovered IPs for the user to review and selectively
import — does NOT automatically add devices.
Requires operator role or above.
"""
if not current_user.is_super_admin and current_user.tenant_id != tenant_id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
discovered = await scan_subnet(data.cidr)
import ipaddress
network = ipaddress.ip_network(data.cidr, strict=False)
total_scanned = network.num_addresses - 2 if network.num_addresses > 2 else network.num_addresses
# Audit log the scan (fire-and-forget — never breaks the response)
try:
await log_action(
db, tenant_id, current_user.user_id, "subnet_scan",
resource_type="network", resource_id=data.cidr,
details={
"cidr": data.cidr,
"devices_found": len(discovered),
"ip": request.client.host if request.client else None,
},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
return SubnetScanResponse(
cidr=data.cidr,
discovered=discovered,
total_scanned=total_scanned,
total_discovered=len(discovered),
)
@router.post(
"/tenants/{tenant_id}/devices/bulk-add",
response_model=BulkAddResult,
status_code=status.HTTP_201_CREATED,
summary="Bulk-add devices from scan results",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def bulk_add_devices(
request: Request,
tenant_id: uuid.UUID,
data: BulkAddRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> BulkAddResult:
"""
Add multiple devices at once from scan results.
Per-device credentials take precedence over shared credentials.
Devices that fail connectivity checks or validation are reported in `failed`.
Requires operator role or above.
"""
await _check_tenant_access(current_user, tenant_id, db)
added = []
failed = []
encryption_key = settings.get_encryption_key_bytes()
for dev_data in data.devices:
# Resolve credentials: per-device first, then shared
username = dev_data.username or data.shared_username
password = dev_data.password or data.shared_password
if not username or not password:
failed.append({
"ip_address": dev_data.ip_address,
"error": "No credentials provided (set per-device or shared credentials)",
})
continue
create_data = DeviceCreate(
hostname=dev_data.hostname or dev_data.ip_address,
ip_address=dev_data.ip_address,
api_port=dev_data.api_port,
api_ssl_port=dev_data.api_ssl_port,
username=username,
password=password,
)
try:
device = await device_service.create_device(
db=db,
tenant_id=tenant_id,
data=create_data,
encryption_key=encryption_key,
)
added.append(device)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_adopt",
resource_type="device", resource_id=str(device.id),
details={"hostname": create_data.hostname, "ip_address": create_data.ip_address},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
except HTTPException as exc:
failed.append({"ip_address": dev_data.ip_address, "error": exc.detail})
except Exception as exc:
failed.append({"ip_address": dev_data.ip_address, "error": str(exc)})
return BulkAddResult(added=added, failed=failed)
# ---------------------------------------------------------------------------
# Group assignment
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/groups/{group_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Add device to a group",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def add_device_to_group(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Assign a device to a group. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.assign_device_to_group(db, tenant_id, device_id, group_id)
@router.delete(
"/tenants/{tenant_id}/devices/{device_id}/groups/{group_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Remove device from a group",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def remove_device_from_group(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Remove a device from a group. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.remove_device_from_group(db, tenant_id, device_id, group_id)
# ---------------------------------------------------------------------------
# Tag assignment
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/tags/{tag_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Add tag to a device",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def add_tag_to_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
tag_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Assign a tag to a device. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.assign_tag_to_device(db, tenant_id, device_id, tag_id)
@router.delete(
"/tenants/{tenant_id}/devices/{device_id}/tags/{tag_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Remove tag from a device",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def remove_tag_from_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
tag_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Remove a tag from a device. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.remove_tag_from_device(db, tenant_id, device_id, tag_id)

View File

@@ -0,0 +1,164 @@
"""Unified events timeline API endpoint.
Provides a single GET endpoint that unions alert events, device status changes,
and config backup runs into a unified timeline for the dashboard.
RLS enforced via get_db() (app_user engine with tenant context).
"""
import logging
import uuid
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["events"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
# ---------------------------------------------------------------------------
# Unified events endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/events",
summary="List unified events (alerts, status changes, config backups)",
)
async def list_events(
tenant_id: uuid.UUID,
limit: int = Query(50, ge=1, le=200, description="Max events to return"),
event_type: Optional[str] = Query(
None,
description="Filter by event type: alert, status_change, config_backup",
),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return a unified list of recent events across alerts, device status, and config backups.
Events are ordered by timestamp descending, limited to `limit` (default 50).
RLS automatically filters to the tenant's data via the app_user session.
"""
await _check_tenant_access(current_user, tenant_id, db)
if event_type and event_type not in ("alert", "status_change", "config_backup"):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="event_type must be one of: alert, status_change, config_backup",
)
events: list[dict[str, Any]] = []
# 1. Alert events
if not event_type or event_type == "alert":
alert_result = await db.execute(
text("""
SELECT ae.id, ae.status, ae.severity, ae.metric, ae.message,
ae.fired_at, ae.device_id, d.hostname
FROM alert_events ae
LEFT JOIN devices d ON d.id = ae.device_id
ORDER BY ae.fired_at DESC
LIMIT :limit
"""),
{"limit": limit},
)
for row in alert_result.fetchall():
alert_status = row[1] or "firing"
metric = row[3] or "unknown"
events.append({
"id": str(row[0]),
"event_type": "alert",
"severity": row[2],
"title": f"{alert_status}: {metric}",
"description": row[4] or f"Alert {alert_status} for {metric}",
"device_hostname": row[7],
"device_id": str(row[6]) if row[6] else None,
"timestamp": row[5].isoformat() if row[5] else None,
})
# 2. Device status changes (inferred from current status + last_seen)
if not event_type or event_type == "status_change":
status_result = await db.execute(
text("""
SELECT d.id, d.hostname, d.status, d.last_seen
FROM devices d
WHERE d.last_seen IS NOT NULL
ORDER BY d.last_seen DESC
LIMIT :limit
"""),
{"limit": limit},
)
for row in status_result.fetchall():
device_status = row[2] or "unknown"
hostname = row[1] or "Unknown device"
severity = "info" if device_status == "online" else "warning"
events.append({
"id": f"status-{row[0]}",
"event_type": "status_change",
"severity": severity,
"title": f"Device {device_status}",
"description": f"{hostname} is now {device_status}",
"device_hostname": hostname,
"device_id": str(row[0]),
"timestamp": row[3].isoformat() if row[3] else None,
})
# 3. Config backup runs
if not event_type or event_type == "config_backup":
backup_result = await db.execute(
text("""
SELECT cbr.id, cbr.trigger_type, cbr.created_at,
cbr.device_id, d.hostname
FROM config_backup_runs cbr
LEFT JOIN devices d ON d.id = cbr.device_id
ORDER BY cbr.created_at DESC
LIMIT :limit
"""),
{"limit": limit},
)
for row in backup_result.fetchall():
trigger_type = row[1] or "manual"
hostname = row[4] or "Unknown device"
events.append({
"id": str(row[0]),
"event_type": "config_backup",
"severity": "info",
"title": "Config backup",
"description": f"{trigger_type} backup completed for {hostname}",
"device_hostname": hostname,
"device_id": str(row[3]) if row[3] else None,
"timestamp": row[2].isoformat() if row[2] else None,
})
# Sort all events by timestamp descending, then apply final limit
events.sort(
key=lambda e: e["timestamp"] or "",
reverse=True,
)
return events[:limit]

View File

@@ -0,0 +1,712 @@
"""Firmware API endpoints for version overview, cache management, preferred channel,
and firmware upgrade orchestration.
Tenant-scoped routes under /api/tenants/{tenant_id}/firmware/*.
Global routes under /api/firmware/* for version listing and admin actions.
"""
import asyncio
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services.audit_service import log_action
router = APIRouter(tags=["firmware"])
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
class PreferredChannelRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
preferred_channel: str # "stable", "long-term", "testing"
class FirmwareDownloadRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
architecture: str
channel: str
version: str
# =========================================================================
# TENANT-SCOPED ENDPOINTS
# =========================================================================
@router.get(
"/tenants/{tenant_id}/firmware/overview",
summary="Get firmware status for all devices in tenant",
dependencies=[require_scope("firmware:write")],
)
async def get_firmware_overview(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
from app.services.firmware_service import get_firmware_overview as _get_overview
return await _get_overview(str(tenant_id))
@router.patch(
"/tenants/{tenant_id}/devices/{device_id}/preferred-channel",
summary="Set preferred firmware channel for a device",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def set_device_preferred_channel(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: PreferredChannelRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if body.preferred_channel not in ("stable", "long-term", "testing"):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="preferred_channel must be one of: stable, long-term, testing",
)
result = await db.execute(
text("""
UPDATE devices SET preferred_channel = :channel, updated_at = NOW()
WHERE id = :device_id
RETURNING id
"""),
{"channel": body.preferred_channel, "device_id": str(device_id)},
)
if not result.fetchone():
raise HTTPException(status_code=404, detail="Device not found")
await db.commit()
return {"status": "ok", "preferred_channel": body.preferred_channel}
@router.patch(
"/tenants/{tenant_id}/device-groups/{group_id}/preferred-channel",
summary="Set preferred firmware channel for a device group",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def set_group_preferred_channel(
request: Request,
tenant_id: uuid.UUID,
group_id: uuid.UUID,
body: PreferredChannelRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if body.preferred_channel not in ("stable", "long-term", "testing"):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="preferred_channel must be one of: stable, long-term, testing",
)
result = await db.execute(
text("""
UPDATE device_groups SET preferred_channel = :channel
WHERE id = :group_id
RETURNING id
"""),
{"channel": body.preferred_channel, "group_id": str(group_id)},
)
if not result.fetchone():
raise HTTPException(status_code=404, detail="Device group not found")
await db.commit()
return {"status": "ok", "preferred_channel": body.preferred_channel}
# =========================================================================
# GLOBAL ENDPOINTS (firmware versions are not tenant-scoped)
# =========================================================================
@router.get(
"/firmware/versions",
summary="List all known firmware versions from cache",
)
async def list_firmware_versions(
architecture: Optional[str] = Query(None),
channel: Optional[str] = Query(None),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
filters = []
params: dict[str, Any] = {}
if architecture:
filters.append("architecture = :arch")
params["arch"] = architecture
if channel:
filters.append("channel = :channel")
params["channel"] = channel
where = f"WHERE {' AND '.join(filters)}" if filters else ""
result = await db.execute(
text(f"""
SELECT id, architecture, channel, version, npk_url,
npk_local_path, npk_size_bytes, checked_at
FROM firmware_versions
{where}
ORDER BY architecture, channel, checked_at DESC
"""),
params,
)
return [
{
"id": str(row[0]),
"architecture": row[1],
"channel": row[2],
"version": row[3],
"npk_url": row[4],
"npk_local_path": row[5],
"npk_size_bytes": row[6],
"checked_at": row[7].isoformat() if row[7] else None,
}
for row in result.fetchall()
]
@router.post(
"/firmware/check",
summary="Trigger immediate firmware version check (super admin only)",
)
async def trigger_firmware_check(
current_user: CurrentUser = Depends(get_current_user),
) -> dict[str, Any]:
if not current_user.is_super_admin:
raise HTTPException(status_code=403, detail="Super admin only")
from app.services.firmware_service import check_latest_versions
results = await check_latest_versions()
return {"status": "ok", "versions_discovered": len(results), "versions": results}
@router.get(
"/firmware/cache",
summary="List locally cached NPK files (super admin only)",
)
async def list_firmware_cache(
current_user: CurrentUser = Depends(get_current_user),
) -> list[dict[str, Any]]:
if not current_user.is_super_admin:
raise HTTPException(status_code=403, detail="Super admin only")
from app.services.firmware_service import get_cached_firmware
return await get_cached_firmware()
@router.post(
"/firmware/download",
summary="Download a specific NPK to local cache (super admin only)",
)
async def download_firmware(
body: FirmwareDownloadRequest,
current_user: CurrentUser = Depends(get_current_user),
) -> dict[str, str]:
if not current_user.is_super_admin:
raise HTTPException(status_code=403, detail="Super admin only")
from app.services.firmware_service import download_firmware as _download
path = await _download(body.architecture, body.channel, body.version)
return {"status": "ok", "path": path}
# =========================================================================
# UPGRADE ENDPOINTS
# =========================================================================
class UpgradeRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_id: str
target_version: str
architecture: str
channel: str = "stable"
confirmed_major_upgrade: bool = False
scheduled_at: Optional[str] = None # ISO datetime or None for immediate
class MassUpgradeRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_ids: list[str]
target_version: str
channel: str = "stable"
confirmed_major_upgrade: bool = False
scheduled_at: Optional[str] = None
@router.post(
"/tenants/{tenant_id}/firmware/upgrade",
summary="Start or schedule a single device firmware upgrade",
status_code=status.HTTP_202_ACCEPTED,
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def start_firmware_upgrade(
request: Request,
tenant_id: uuid.UUID,
body: UpgradeRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot initiate upgrades")
# Look up device architecture if not provided
architecture = body.architecture
if not architecture:
dev_result = await db.execute(
text("SELECT architecture FROM devices WHERE id = CAST(:id AS uuid)"),
{"id": body.device_id},
)
dev_row = dev_result.fetchone()
if not dev_row or not dev_row[0]:
raise HTTPException(422, "Device architecture unknown — cannot upgrade")
architecture = dev_row[0]
# Create upgrade job
job_id = str(uuid.uuid4())
await db.execute(
text("""
INSERT INTO firmware_upgrade_jobs
(id, tenant_id, device_id, target_version, architecture, channel,
status, confirmed_major_upgrade, scheduled_at)
VALUES
(CAST(:id AS uuid), CAST(:tenant_id AS uuid), CAST(:device_id AS uuid),
:target_version, :architecture, :channel,
:status, :confirmed, :scheduled_at)
"""),
{
"id": job_id,
"tenant_id": str(tenant_id),
"device_id": body.device_id,
"target_version": body.target_version,
"architecture": architecture,
"channel": body.channel,
"status": "scheduled" if body.scheduled_at else "pending",
"confirmed": body.confirmed_major_upgrade,
"scheduled_at": body.scheduled_at,
},
)
await db.commit()
# Schedule or start immediately
if body.scheduled_at:
from app.services.upgrade_service import schedule_upgrade
schedule_upgrade(job_id, datetime.fromisoformat(body.scheduled_at))
else:
from app.services.upgrade_service import start_upgrade
asyncio.create_task(start_upgrade(job_id))
try:
await log_action(
db, tenant_id, current_user.user_id, "firmware_upgrade",
resource_type="firmware", resource_id=job_id,
device_id=uuid.UUID(body.device_id),
details={"target_version": body.target_version, "channel": body.channel},
)
except Exception:
pass
return {"status": "accepted", "job_id": job_id}
@router.post(
"/tenants/{tenant_id}/firmware/mass-upgrade",
summary="Start or schedule a mass firmware upgrade for multiple devices",
status_code=status.HTTP_202_ACCEPTED,
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("5/minute")
async def start_mass_firmware_upgrade(
request: Request,
tenant_id: uuid.UUID,
body: MassUpgradeRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot initiate upgrades")
rollout_group_id = str(uuid.uuid4())
jobs = []
for device_id in body.device_ids:
# Look up architecture per device
dev_result = await db.execute(
text("SELECT architecture FROM devices WHERE id = CAST(:id AS uuid)"),
{"id": device_id},
)
dev_row = dev_result.fetchone()
architecture = dev_row[0] if dev_row and dev_row[0] else "unknown"
job_id = str(uuid.uuid4())
await db.execute(
text("""
INSERT INTO firmware_upgrade_jobs
(id, tenant_id, device_id, rollout_group_id,
target_version, architecture, channel,
status, confirmed_major_upgrade, scheduled_at)
VALUES
(CAST(:id AS uuid), CAST(:tenant_id AS uuid),
CAST(:device_id AS uuid), CAST(:group_id AS uuid),
:target_version, :architecture, :channel,
:status, :confirmed, :scheduled_at)
"""),
{
"id": job_id,
"tenant_id": str(tenant_id),
"device_id": device_id,
"group_id": rollout_group_id,
"target_version": body.target_version,
"architecture": architecture,
"channel": body.channel,
"status": "scheduled" if body.scheduled_at else "pending",
"confirmed": body.confirmed_major_upgrade,
"scheduled_at": body.scheduled_at,
},
)
jobs.append({"job_id": job_id, "device_id": device_id, "architecture": architecture})
await db.commit()
# Schedule or start immediately
if body.scheduled_at:
from app.services.upgrade_service import schedule_mass_upgrade
schedule_mass_upgrade(rollout_group_id, datetime.fromisoformat(body.scheduled_at))
else:
from app.services.upgrade_service import start_mass_upgrade
asyncio.create_task(start_mass_upgrade(rollout_group_id))
return {
"status": "accepted",
"rollout_group_id": rollout_group_id,
"jobs": jobs,
}
@router.get(
"/tenants/{tenant_id}/firmware/upgrades",
summary="List firmware upgrade jobs for tenant",
dependencies=[require_scope("firmware:write")],
)
async def list_upgrade_jobs(
tenant_id: uuid.UUID,
upgrade_status: Optional[str] = Query(None, alias="status"),
device_id: Optional[str] = Query(None),
rollout_group_id: Optional[str] = Query(None),
page: int = Query(1, ge=1),
per_page: int = Query(50, ge=1, le=200),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
filters = ["1=1"]
params: dict[str, Any] = {}
if upgrade_status:
filters.append("j.status = :status")
params["status"] = upgrade_status
if device_id:
filters.append("j.device_id = CAST(:device_id AS uuid)")
params["device_id"] = device_id
if rollout_group_id:
filters.append("j.rollout_group_id = CAST(:group_id AS uuid)")
params["group_id"] = rollout_group_id
where = " AND ".join(filters)
offset = (page - 1) * per_page
count_result = await db.execute(
text(f"SELECT COUNT(*) FROM firmware_upgrade_jobs j WHERE {where}"),
params,
)
total = count_result.scalar() or 0
result = await db.execute(
text(f"""
SELECT j.id, j.device_id, j.rollout_group_id,
j.target_version, j.architecture, j.channel,
j.status, j.pre_upgrade_backup_sha, j.scheduled_at,
j.started_at, j.completed_at, j.error_message,
j.confirmed_major_upgrade, j.created_at,
d.hostname AS device_hostname
FROM firmware_upgrade_jobs j
LEFT JOIN devices d ON d.id = j.device_id
WHERE {where}
ORDER BY j.created_at DESC
LIMIT :limit OFFSET :offset
"""),
{**params, "limit": per_page, "offset": offset},
)
items = [
{
"id": str(row[0]),
"device_id": str(row[1]),
"rollout_group_id": str(row[2]) if row[2] else None,
"target_version": row[3],
"architecture": row[4],
"channel": row[5],
"status": row[6],
"pre_upgrade_backup_sha": row[7],
"scheduled_at": row[8].isoformat() if row[8] else None,
"started_at": row[9].isoformat() if row[9] else None,
"completed_at": row[10].isoformat() if row[10] else None,
"error_message": row[11],
"confirmed_major_upgrade": row[12],
"created_at": row[13].isoformat() if row[13] else None,
"device_hostname": row[14],
}
for row in result.fetchall()
]
return {"items": items, "total": total, "page": page, "per_page": per_page}
@router.get(
"/tenants/{tenant_id}/firmware/upgrades/{job_id}",
summary="Get single upgrade job detail",
dependencies=[require_scope("firmware:write")],
)
async def get_upgrade_job(
tenant_id: uuid.UUID,
job_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT j.id, j.device_id, j.rollout_group_id,
j.target_version, j.architecture, j.channel,
j.status, j.pre_upgrade_backup_sha, j.scheduled_at,
j.started_at, j.completed_at, j.error_message,
j.confirmed_major_upgrade, j.created_at,
d.hostname AS device_hostname
FROM firmware_upgrade_jobs j
LEFT JOIN devices d ON d.id = j.device_id
WHERE j.id = CAST(:job_id AS uuid)
"""),
{"job_id": str(job_id)},
)
row = result.fetchone()
if not row:
raise HTTPException(404, "Upgrade job not found")
return {
"id": str(row[0]),
"device_id": str(row[1]),
"rollout_group_id": str(row[2]) if row[2] else None,
"target_version": row[3],
"architecture": row[4],
"channel": row[5],
"status": row[6],
"pre_upgrade_backup_sha": row[7],
"scheduled_at": row[8].isoformat() if row[8] else None,
"started_at": row[9].isoformat() if row[9] else None,
"completed_at": row[10].isoformat() if row[10] else None,
"error_message": row[11],
"confirmed_major_upgrade": row[12],
"created_at": row[13].isoformat() if row[13] else None,
"device_hostname": row[14],
}
@router.get(
"/tenants/{tenant_id}/firmware/rollouts/{rollout_group_id}",
summary="Get mass rollout status with all jobs",
dependencies=[require_scope("firmware:write")],
)
async def get_rollout_status(
tenant_id: uuid.UUID,
rollout_group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT j.id, j.device_id, j.status, j.target_version,
j.architecture, j.error_message, j.started_at,
j.completed_at, d.hostname
FROM firmware_upgrade_jobs j
LEFT JOIN devices d ON d.id = j.device_id
WHERE j.rollout_group_id = CAST(:group_id AS uuid)
ORDER BY j.created_at ASC
"""),
{"group_id": str(rollout_group_id)},
)
rows = result.fetchall()
if not rows:
raise HTTPException(404, "Rollout group not found")
# Compute summary
total = len(rows)
completed = sum(1 for r in rows if r[2] == "completed")
failed = sum(1 for r in rows if r[2] == "failed")
paused = sum(1 for r in rows if r[2] == "paused")
pending = sum(1 for r in rows if r[2] in ("pending", "scheduled"))
# Find currently running device
active_statuses = {"downloading", "uploading", "rebooting", "verifying"}
current_device = None
for r in rows:
if r[2] in active_statuses:
current_device = r[8] or str(r[1])
break
jobs = [
{
"id": str(r[0]),
"device_id": str(r[1]),
"status": r[2],
"target_version": r[3],
"architecture": r[4],
"error_message": r[5],
"started_at": r[6].isoformat() if r[6] else None,
"completed_at": r[7].isoformat() if r[7] else None,
"device_hostname": r[8],
}
for r in rows
]
return {
"rollout_group_id": str(rollout_group_id),
"total": total,
"completed": completed,
"failed": failed,
"paused": paused,
"pending": pending,
"current_device": current_device,
"jobs": jobs,
}
@router.post(
"/tenants/{tenant_id}/firmware/upgrades/{job_id}/cancel",
summary="Cancel a scheduled or pending upgrade",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def cancel_upgrade_endpoint(
request: Request,
tenant_id: uuid.UUID,
job_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot cancel upgrades")
from app.services.upgrade_service import cancel_upgrade
await cancel_upgrade(str(job_id))
return {"status": "ok", "message": "Upgrade cancelled"}
@router.post(
"/tenants/{tenant_id}/firmware/upgrades/{job_id}/retry",
summary="Retry a failed upgrade",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def retry_upgrade_endpoint(
request: Request,
tenant_id: uuid.UUID,
job_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot retry upgrades")
from app.services.upgrade_service import retry_failed_upgrade
await retry_failed_upgrade(str(job_id))
return {"status": "ok", "message": "Upgrade retry started"}
@router.post(
"/tenants/{tenant_id}/firmware/rollouts/{rollout_group_id}/resume",
summary="Resume a paused mass rollout",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def resume_rollout_endpoint(
request: Request,
tenant_id: uuid.UUID,
rollout_group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot resume rollouts")
from app.services.upgrade_service import resume_mass_upgrade
await resume_mass_upgrade(str(rollout_group_id))
return {"status": "ok", "message": "Rollout resumed"}
@router.post(
"/tenants/{tenant_id}/firmware/rollouts/{rollout_group_id}/abort",
summary="Abort remaining devices in a paused rollout",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("5/minute")
async def abort_rollout_endpoint(
request: Request,
tenant_id: uuid.UUID,
rollout_group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot abort rollouts")
from app.services.upgrade_service import abort_mass_upgrade
aborted = await abort_mass_upgrade(str(rollout_group_id))
return {"status": "ok", "aborted_count": aborted}

View File

@@ -0,0 +1,309 @@
"""Maintenance windows API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/ for:
- Maintenance window CRUD (list, create, update, delete)
- Filterable by status: upcoming, active, past
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: operator and above for all operations.
"""
import json
import logging
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["maintenance-windows"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_operator(current_user: CurrentUser) -> None:
"""Raise 403 if user does not have at least operator role."""
if current_user.role == "viewer":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Requires at least operator role.",
)
# ---------------------------------------------------------------------------
# Request/response schemas
# ---------------------------------------------------------------------------
class MaintenanceWindowCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
device_ids: list[str] = []
start_at: datetime
end_at: datetime
suppress_alerts: bool = True
notes: Optional[str] = None
class MaintenanceWindowUpdate(BaseModel):
model_config = ConfigDict(extra="forbid")
name: Optional[str] = None
device_ids: Optional[list[str]] = None
start_at: Optional[datetime] = None
end_at: Optional[datetime] = None
suppress_alerts: Optional[bool] = None
notes: Optional[str] = None
class MaintenanceWindowResponse(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
tenant_id: str
name: str
device_ids: list[str]
start_at: str
end_at: str
suppress_alerts: bool
notes: Optional[str] = None
created_by: Optional[str] = None
created_at: str
# ---------------------------------------------------------------------------
# CRUD endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/maintenance-windows",
summary="List maintenance windows for tenant",
)
async def list_maintenance_windows(
tenant_id: uuid.UUID,
window_status: Optional[str] = Query(None, alias="status"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
filters = ["1=1"]
params: dict[str, Any] = {}
if window_status == "active":
filters.append("mw.start_at <= NOW() AND mw.end_at >= NOW()")
elif window_status == "upcoming":
filters.append("mw.start_at > NOW()")
elif window_status == "past":
filters.append("mw.end_at < NOW()")
where = " AND ".join(filters)
result = await db.execute(
text(f"""
SELECT mw.id, mw.tenant_id, mw.name, mw.device_ids,
mw.start_at, mw.end_at, mw.suppress_alerts,
mw.notes, mw.created_by, mw.created_at
FROM maintenance_windows mw
WHERE {where}
ORDER BY mw.start_at DESC
"""),
params,
)
return [
{
"id": str(row[0]),
"tenant_id": str(row[1]),
"name": row[2],
"device_ids": row[3] if isinstance(row[3], list) else [],
"start_at": row[4].isoformat() if row[4] else None,
"end_at": row[5].isoformat() if row[5] else None,
"suppress_alerts": row[6],
"notes": row[7],
"created_by": str(row[8]) if row[8] else None,
"created_at": row[9].isoformat() if row[9] else None,
}
for row in result.fetchall()
]
@router.post(
"/tenants/{tenant_id}/maintenance-windows",
summary="Create maintenance window",
status_code=status.HTTP_201_CREATED,
)
@limiter.limit("20/minute")
async def create_maintenance_window(
request: Request,
tenant_id: uuid.UUID,
body: MaintenanceWindowCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
if body.end_at <= body.start_at:
raise HTTPException(422, "end_at must be after start_at")
window_id = str(uuid.uuid4())
await db.execute(
text("""
INSERT INTO maintenance_windows
(id, tenant_id, name, device_ids, start_at, end_at,
suppress_alerts, notes, created_by)
VALUES
(CAST(:id AS uuid), CAST(:tenant_id AS uuid),
:name, CAST(:device_ids AS jsonb), :start_at, :end_at,
:suppress_alerts, :notes, CAST(:created_by AS uuid))
"""),
{
"id": window_id,
"tenant_id": str(tenant_id),
"name": body.name,
"device_ids": json.dumps(body.device_ids),
"start_at": body.start_at,
"end_at": body.end_at,
"suppress_alerts": body.suppress_alerts,
"notes": body.notes,
"created_by": str(current_user.user_id),
},
)
await db.commit()
return {
"id": window_id,
"tenant_id": str(tenant_id),
"name": body.name,
"device_ids": body.device_ids,
"start_at": body.start_at.isoformat(),
"end_at": body.end_at.isoformat(),
"suppress_alerts": body.suppress_alerts,
"notes": body.notes,
"created_by": str(current_user.user_id),
"created_at": datetime.utcnow().isoformat(),
}
@router.put(
"/tenants/{tenant_id}/maintenance-windows/{window_id}",
summary="Update maintenance window",
)
@limiter.limit("20/minute")
async def update_maintenance_window(
request: Request,
tenant_id: uuid.UUID,
window_id: uuid.UUID,
body: MaintenanceWindowUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
# Build dynamic SET clause for partial updates
set_parts: list[str] = ["updated_at = NOW()"]
params: dict[str, Any] = {"window_id": str(window_id)}
if body.name is not None:
set_parts.append("name = :name")
params["name"] = body.name
if body.device_ids is not None:
set_parts.append("device_ids = CAST(:device_ids AS jsonb)")
params["device_ids"] = json.dumps(body.device_ids)
if body.start_at is not None:
set_parts.append("start_at = :start_at")
params["start_at"] = body.start_at
if body.end_at is not None:
set_parts.append("end_at = :end_at")
params["end_at"] = body.end_at
if body.suppress_alerts is not None:
set_parts.append("suppress_alerts = :suppress_alerts")
params["suppress_alerts"] = body.suppress_alerts
if body.notes is not None:
set_parts.append("notes = :notes")
params["notes"] = body.notes
set_clause = ", ".join(set_parts)
result = await db.execute(
text(f"""
UPDATE maintenance_windows
SET {set_clause}
WHERE id = CAST(:window_id AS uuid)
RETURNING id, tenant_id, name, device_ids, start_at, end_at,
suppress_alerts, notes, created_by, created_at
"""),
params,
)
row = result.fetchone()
if not row:
raise HTTPException(404, "Maintenance window not found")
await db.commit()
return {
"id": str(row[0]),
"tenant_id": str(row[1]),
"name": row[2],
"device_ids": row[3] if isinstance(row[3], list) else [],
"start_at": row[4].isoformat() if row[4] else None,
"end_at": row[5].isoformat() if row[5] else None,
"suppress_alerts": row[6],
"notes": row[7],
"created_by": str(row[8]) if row[8] else None,
"created_at": row[9].isoformat() if row[9] else None,
}
@router.delete(
"/tenants/{tenant_id}/maintenance-windows/{window_id}",
summary="Delete maintenance window",
status_code=status.HTTP_204_NO_CONTENT,
)
@limiter.limit("5/minute")
async def delete_maintenance_window(
request: Request,
tenant_id: uuid.UUID,
window_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
result = await db.execute(
text(
"DELETE FROM maintenance_windows WHERE id = CAST(:id AS uuid) RETURNING id"
),
{"id": str(window_id)},
)
if not result.fetchone():
raise HTTPException(404, "Maintenance window not found")
await db.commit()

View File

@@ -0,0 +1,414 @@
"""
Metrics API endpoints for querying TimescaleDB hypertables.
All device-scoped routes are tenant-scoped under
/api/tenants/{tenant_id}/devices/{device_id}/metrics/*.
Fleet summary endpoints are under /api/tenants/{tenant_id}/fleet/summary
and /api/fleet/summary (super_admin cross-tenant).
RLS is enforced via get_db() — the app_user engine applies tenant filtering
automatically based on the SET LOCAL app.current_tenant context.
All endpoints require authentication (get_current_user) and enforce
tenant access via _check_tenant_access.
"""
import uuid
from datetime import datetime, timedelta
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.tenant_context import CurrentUser, get_current_user
router = APIRouter(tags=["metrics"])
def _bucket_for_range(start: datetime, end: datetime) -> timedelta:
"""
Select an appropriate time_bucket size based on the requested time range.
Shorter ranges get finer granularity; longer ranges get coarser buckets
to keep result sets manageable.
Returns a timedelta because asyncpg requires a Python timedelta (not a
string interval literal) when binding the first argument of time_bucket().
"""
delta = end - start
hours = delta.total_seconds() / 3600
if hours <= 1:
return timedelta(minutes=1)
elif hours <= 6:
return timedelta(minutes=5)
elif hours <= 24:
return timedelta(minutes=15)
elif hours <= 168: # 7 days
return timedelta(hours=1)
elif hours <= 720: # 30 days
return timedelta(hours=6)
else:
return timedelta(days=1)
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""
Verify the current user is allowed to access the given tenant.
- super_admin can access any tenant — re-sets DB tenant context to target tenant.
- All other roles must match their own tenant_id.
"""
if current_user.is_super_admin:
# Re-set tenant context to the target tenant so RLS allows the operation
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# ---------------------------------------------------------------------------
# Health metrics
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/health",
summary="Time-bucketed health metrics (CPU, memory, disk, temperature)",
)
async def device_health_metrics(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
start: datetime = Query(..., description="Start of time range (ISO format)"),
end: datetime = Query(..., description="End of time range (ISO format)"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return time-bucketed CPU, memory, disk, and temperature metrics for a device.
Bucket size adapts automatically to the requested time range.
"""
await _check_tenant_access(current_user, tenant_id, db)
bucket = _bucket_for_range(start, end)
result = await db.execute(
text("""
SELECT
time_bucket(:bucket, time) AS bucket,
avg(cpu_load)::smallint AS avg_cpu,
max(cpu_load)::smallint AS max_cpu,
avg(CASE WHEN total_memory > 0
THEN round((1 - free_memory::float / total_memory) * 100)
ELSE NULL END)::smallint AS avg_mem_pct,
avg(CASE WHEN total_disk > 0
THEN round((1 - free_disk::float / total_disk) * 100)
ELSE NULL END)::smallint AS avg_disk_pct,
avg(temperature)::smallint AS avg_temp
FROM health_metrics
WHERE device_id = :device_id
AND time >= :start AND time < :end
GROUP BY bucket
ORDER BY bucket ASC
"""),
{"bucket": bucket, "device_id": str(device_id), "start": start, "end": end},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
# ---------------------------------------------------------------------------
# Interface traffic metrics
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/interfaces",
summary="Time-bucketed interface bandwidth metrics (bps from cumulative byte deltas)",
)
async def device_interface_metrics(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
start: datetime = Query(..., description="Start of time range (ISO format)"),
end: datetime = Query(..., description="End of time range (ISO format)"),
interface: Optional[str] = Query(None, description="Filter to a specific interface name"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return time-bucketed interface traffic metrics for a device.
Bandwidth (bps) is computed from raw cumulative byte counters using
SQL LAG() window functions — no poller-side state is required.
Counter wraps (rx_bytes < prev_rx) are treated as NULL to avoid
incorrect spikes.
"""
await _check_tenant_access(current_user, tenant_id, db)
bucket = _bucket_for_range(start, end)
# Build interface filter clause conditionally.
# The interface name is passed as a bind parameter — never interpolated
# into the SQL string — so this is safe from SQL injection.
interface_filter = "AND interface = :interface" if interface else ""
sql = f"""
WITH ordered AS (
SELECT
time,
interface,
rx_bytes,
tx_bytes,
LAG(rx_bytes) OVER (PARTITION BY interface ORDER BY time) AS prev_rx,
LAG(tx_bytes) OVER (PARTITION BY interface ORDER BY time) AS prev_tx,
EXTRACT(EPOCH FROM time - LAG(time) OVER (PARTITION BY interface ORDER BY time)) AS dt
FROM interface_metrics
WHERE device_id = :device_id
AND time >= :start AND time < :end
{interface_filter}
),
with_bps AS (
SELECT
time,
interface,
rx_bytes,
tx_bytes,
CASE WHEN rx_bytes >= prev_rx AND dt > 0
THEN ((rx_bytes - prev_rx) * 8 / dt)::bigint
ELSE NULL END AS rx_bps,
CASE WHEN tx_bytes >= prev_tx AND dt > 0
THEN ((tx_bytes - prev_tx) * 8 / dt)::bigint
ELSE NULL END AS tx_bps
FROM ordered
WHERE prev_rx IS NOT NULL
)
SELECT
time_bucket(:bucket, time) AS bucket,
interface,
avg(rx_bps)::bigint AS avg_rx_bps,
avg(tx_bps)::bigint AS avg_tx_bps,
max(rx_bps)::bigint AS max_rx_bps,
max(tx_bps)::bigint AS max_tx_bps
FROM with_bps
WHERE rx_bps IS NOT NULL
GROUP BY bucket, interface
ORDER BY interface, bucket ASC
"""
params: dict[str, Any] = {
"bucket": bucket,
"device_id": str(device_id),
"start": start,
"end": end,
}
if interface:
params["interface"] = interface
result = await db.execute(text(sql), params)
rows = result.mappings().all()
return [dict(row) for row in rows]
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/interfaces/list",
summary="List distinct interface names for a device",
)
async def device_interface_list(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[str]:
"""Return distinct interface names seen in interface_metrics for a device."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT DISTINCT interface
FROM interface_metrics
WHERE device_id = :device_id
ORDER BY interface
"""),
{"device_id": str(device_id)},
)
rows = result.scalars().all()
return list(rows)
# ---------------------------------------------------------------------------
# Wireless metrics
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/wireless",
summary="Time-bucketed wireless metrics (clients, signal, CCQ)",
)
async def device_wireless_metrics(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
start: datetime = Query(..., description="Start of time range (ISO format)"),
end: datetime = Query(..., description="End of time range (ISO format)"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return time-bucketed wireless metrics per interface for a device."""
await _check_tenant_access(current_user, tenant_id, db)
bucket = _bucket_for_range(start, end)
result = await db.execute(
text("""
SELECT
time_bucket(:bucket, time) AS bucket,
interface,
avg(client_count)::smallint AS avg_clients,
max(client_count)::smallint AS max_clients,
avg(avg_signal)::smallint AS avg_signal,
avg(ccq)::smallint AS avg_ccq,
max(frequency) AS frequency
FROM wireless_metrics
WHERE device_id = :device_id
AND time >= :start AND time < :end
GROUP BY bucket, interface
ORDER BY interface, bucket ASC
"""),
{"bucket": bucket, "device_id": str(device_id), "start": start, "end": end},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/wireless/latest",
summary="Latest wireless stats per interface (not time-bucketed)",
)
async def device_wireless_latest(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return the most recent wireless reading per interface for a device."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT DISTINCT ON (interface)
interface, client_count, avg_signal, ccq, frequency, time
FROM wireless_metrics
WHERE device_id = :device_id
ORDER BY interface, time DESC
"""),
{"device_id": str(device_id)},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
# ---------------------------------------------------------------------------
# Sparkline
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/sparkline",
summary="Last 12 health readings for sparkline display",
)
async def device_sparkline(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return the last 12 CPU readings (in chronological order) for sparkline
display in the fleet table.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT cpu_load, time
FROM (
SELECT cpu_load, time
FROM health_metrics
WHERE device_id = :device_id
ORDER BY time DESC
LIMIT 12
) sub
ORDER BY time ASC
"""),
{"device_id": str(device_id)},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
# ---------------------------------------------------------------------------
# Fleet summary
# ---------------------------------------------------------------------------
_FLEET_SUMMARY_SQL = """
SELECT
d.id, d.hostname, d.ip_address, d.status, d.model, d.last_seen,
d.uptime_seconds, d.last_cpu_load, d.last_memory_used_pct,
d.latitude, d.longitude,
d.tenant_id, t.name AS tenant_name
FROM devices d
JOIN tenants t ON d.tenant_id = t.id
ORDER BY t.name, d.hostname
"""
@router.get(
"/tenants/{tenant_id}/fleet/summary",
summary="Fleet summary for a tenant (latest metrics per device)",
)
async def fleet_summary(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return fleet summary for a single tenant.
Queries the devices table (not hypertables) for speed.
RLS filters to only devices belonging to the tenant automatically.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(text(_FLEET_SUMMARY_SQL))
rows = result.mappings().all()
return [dict(row) for row in rows]
@router.get(
"/fleet/summary",
summary="Cross-tenant fleet summary (super_admin only)",
)
async def fleet_summary_all(
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return fleet summary across ALL tenants.
Requires super_admin role. The RLS policy for super_admin returns all
rows across all tenants, so the same SQL query works without modification.
This avoids the N+1 problem of fetching per-tenant summaries in a loop.
"""
if current_user.role != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Super admin required",
)
result = await db.execute(text(_FLEET_SUMMARY_SQL))
rows = result.mappings().all()
return [dict(row) for row in rows]

View File

@@ -0,0 +1,146 @@
"""Report generation API endpoint.
POST /api/tenants/{tenant_id}/reports/generate
Generates PDF or CSV reports for device inventory, metrics summary,
alert history, and change log.
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: require at least operator role.
"""
import uuid
from datetime import datetime
from enum import Enum
from typing import Optional
import structlog
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, ConfigDict
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services.report_service import generate_report
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["reports"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_operator(current_user: CurrentUser) -> None:
"""Raise 403 if user is a viewer (reports require operator+)."""
if current_user.role == "viewer":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Reports require at least operator role.",
)
# ---------------------------------------------------------------------------
# Request schema
# ---------------------------------------------------------------------------
class ReportType(str, Enum):
device_inventory = "device_inventory"
metrics_summary = "metrics_summary"
alert_history = "alert_history"
change_log = "change_log"
class ReportFormat(str, Enum):
pdf = "pdf"
csv = "csv"
class ReportRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
type: ReportType
date_from: Optional[datetime] = None
date_to: Optional[datetime] = None
format: ReportFormat = ReportFormat.pdf
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/reports/generate",
summary="Generate a report (PDF or CSV)",
response_class=StreamingResponse,
)
async def generate_report_endpoint(
tenant_id: uuid.UUID,
body: ReportRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> StreamingResponse:
"""Generate and download a report as PDF or CSV.
- device_inventory: no date range required
- metrics_summary, alert_history, change_log: date_from and date_to required
"""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
# Validate date range for time-based reports
if body.type != ReportType.device_inventory:
if not body.date_from or not body.date_to:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"date_from and date_to are required for {body.type.value} reports.",
)
if body.date_from > body.date_to:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="date_from must be before date_to.",
)
try:
file_bytes, content_type, filename = await generate_report(
db=db,
tenant_id=tenant_id,
report_type=body.type.value,
date_from=body.date_from,
date_to=body.date_to,
fmt=body.format.value,
)
except Exception as exc:
logger.error("report_generation_failed", error=str(exc), report_type=body.type.value)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Report generation failed: {str(exc)}",
)
import io
return StreamingResponse(
io.BytesIO(file_bytes),
media_type=content_type,
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(len(file_bytes)),
},
)

View File

@@ -0,0 +1,155 @@
"""System settings router — global SMTP configuration.
Super-admin only. Stores SMTP settings in system_settings table with
Transit encryption for passwords. Falls back to .env values.
"""
import logging
from typing import Optional
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy import text
from app.config import settings
from app.database import AdminAsyncSessionLocal
from app.middleware.rbac import require_role
from app.services.email_service import SMTPConfig, send_test_email, test_smtp_connection
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/settings", tags=["settings"])
SMTP_KEYS = [
"smtp_host",
"smtp_port",
"smtp_user",
"smtp_password",
"smtp_use_tls",
"smtp_from_address",
"smtp_provider",
]
class SMTPSettingsUpdate(BaseModel):
smtp_host: str
smtp_port: int = 587
smtp_user: Optional[str] = None
smtp_password: Optional[str] = None
smtp_use_tls: bool = False
smtp_from_address: str = "noreply@example.com"
smtp_provider: str = "custom"
class SMTPTestRequest(BaseModel):
to: str
smtp_host: Optional[str] = None
smtp_port: Optional[int] = None
smtp_user: Optional[str] = None
smtp_password: Optional[str] = None
smtp_use_tls: Optional[bool] = None
smtp_from_address: Optional[str] = None
async def _get_system_settings(keys: list[str]) -> dict:
"""Read settings from system_settings table."""
async with AdminAsyncSessionLocal() as session:
result = await session.execute(
text("SELECT key, value FROM system_settings WHERE key = ANY(:keys)"),
{"keys": keys},
)
return {row[0]: row[1] for row in result.fetchall()}
async def _set_system_settings(updates: dict, user_id: str) -> None:
"""Upsert settings into system_settings table."""
async with AdminAsyncSessionLocal() as session:
for key, value in updates.items():
await session.execute(
text("""
INSERT INTO system_settings (key, value, updated_by, updated_at)
VALUES (:key, :value, CAST(:user_id AS uuid), now())
ON CONFLICT (key) DO UPDATE
SET value = :value, updated_by = CAST(:user_id AS uuid), updated_at = now()
"""),
{"key": key, "value": str(value) if value is not None else None, "user_id": user_id},
)
await session.commit()
async def get_smtp_config() -> SMTPConfig:
"""Get SMTP config from system_settings, falling back to .env."""
db_settings = await _get_system_settings(SMTP_KEYS)
return SMTPConfig(
host=db_settings.get("smtp_host") or settings.SMTP_HOST,
port=int(db_settings.get("smtp_port") or settings.SMTP_PORT),
user=db_settings.get("smtp_user") or settings.SMTP_USER,
password=db_settings.get("smtp_password") or settings.SMTP_PASSWORD,
use_tls=(db_settings.get("smtp_use_tls") or str(settings.SMTP_USE_TLS)).lower() == "true",
from_address=db_settings.get("smtp_from_address") or settings.SMTP_FROM_ADDRESS,
)
@router.get("/smtp")
async def get_smtp_settings(user=Depends(require_role("super_admin"))):
"""Get current global SMTP configuration. Password is redacted."""
db_settings = await _get_system_settings(SMTP_KEYS)
return {
"smtp_host": db_settings.get("smtp_host") or settings.SMTP_HOST,
"smtp_port": int(db_settings.get("smtp_port") or settings.SMTP_PORT),
"smtp_user": db_settings.get("smtp_user") or settings.SMTP_USER or "",
"smtp_use_tls": (db_settings.get("smtp_use_tls") or str(settings.SMTP_USE_TLS)).lower() == "true",
"smtp_from_address": db_settings.get("smtp_from_address") or settings.SMTP_FROM_ADDRESS,
"smtp_provider": db_settings.get("smtp_provider") or "custom",
"smtp_password_set": bool(db_settings.get("smtp_password") or settings.SMTP_PASSWORD),
"source": "database" if db_settings.get("smtp_host") else "environment",
}
@router.put("/smtp")
async def update_smtp_settings(
data: SMTPSettingsUpdate,
user=Depends(require_role("super_admin")),
):
"""Update global SMTP configuration."""
updates = {
"smtp_host": data.smtp_host,
"smtp_port": str(data.smtp_port),
"smtp_user": data.smtp_user,
"smtp_use_tls": str(data.smtp_use_tls).lower(),
"smtp_from_address": data.smtp_from_address,
"smtp_provider": data.smtp_provider,
}
if data.smtp_password is not None:
updates["smtp_password"] = data.smtp_password
await _set_system_settings(updates, str(user.id))
return {"status": "ok"}
@router.post("/smtp/test")
async def test_smtp(
data: SMTPTestRequest,
user=Depends(require_role("super_admin")),
):
"""Test SMTP connection and optionally send a test email."""
# Use provided values or fall back to saved config
saved = await get_smtp_config()
config = SMTPConfig(
host=data.smtp_host or saved.host,
port=data.smtp_port if data.smtp_port is not None else saved.port,
user=data.smtp_user if data.smtp_user is not None else saved.user,
password=data.smtp_password if data.smtp_password is not None else saved.password,
use_tls=data.smtp_use_tls if data.smtp_use_tls is not None else saved.use_tls,
from_address=data.smtp_from_address or saved.from_address,
)
conn_result = await test_smtp_connection(config)
if not conn_result["success"]:
return conn_result
if data.to:
return await send_test_email(data.to, config)
return conn_result

141
backend/app/routers/sse.py Normal file
View File

@@ -0,0 +1,141 @@
"""SSE streaming endpoint for real-time event delivery.
Provides a Server-Sent Events endpoint per tenant that streams device status,
alert, config push, and firmware progress events in real time. Authentication
is via a short-lived, single-use exchange token (obtained from POST /auth/sse-token)
to avoid exposing the full JWT in query parameters.
"""
import asyncio
import json
import uuid
from typing import AsyncGenerator, Optional
import redis.asyncio as aioredis
import structlog
from fastapi import APIRouter, HTTPException, Query, Request, status
from sse_starlette.sse import EventSourceResponse, ServerSentEvent
from app.services.sse_manager import SSEConnectionManager
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["sse"])
# ─── Redis for SSE token validation ───────────────────────────────────────────
_redis: aioredis.Redis | None = None
async def _get_sse_redis() -> aioredis.Redis:
"""Lazily initialise and return the SSE Redis client."""
global _redis
if _redis is None:
from app.config import settings
_redis = aioredis.from_url(settings.REDIS_URL, decode_responses=True)
return _redis
async def _validate_sse_token(token: str) -> dict:
"""Validate a short-lived SSE exchange token via Redis.
The token is single-use: retrieved and deleted atomically with GETDEL.
If the token is not found (expired or already used), raises 401.
Args:
token: SSE exchange token string (from query param).
Returns:
Dict with user_id, tenant_id, and role.
Raises:
HTTPException 401: If the token is invalid, expired, or already used.
"""
redis = await _get_sse_redis()
key = f"sse_token:{token}"
data = await redis.getdel(key) # Single-use: delete on retrieval
if not data:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired SSE token",
)
return json.loads(data)
@router.get(
"/tenants/{tenant_id}/events/stream",
summary="SSE event stream for real-time tenant events",
response_class=EventSourceResponse,
)
async def event_stream(
request: Request,
tenant_id: uuid.UUID,
token: str = Query(..., description="Short-lived SSE exchange token (from POST /auth/sse-token)"),
) -> EventSourceResponse:
"""Stream real-time events for a tenant via Server-Sent Events.
Event types: device_status, alert_fired, alert_resolved, config_push,
firmware_progress, metric_update.
Supports Last-Event-ID header for reconnection replay.
Sends heartbeat comments every 15 seconds on idle connections.
"""
# Validate exchange token from query parameter (single-use, 30s TTL)
user_context = await _validate_sse_token(token)
user_role = user_context.get("role", "")
user_tenant_id = user_context.get("tenant_id")
user_id = user_context.get("user_id", "")
# Authorization: user must belong to the requested tenant or be super_admin
if user_role != "super_admin" and (user_tenant_id is None or str(user_tenant_id) != str(tenant_id)):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not authorized for this tenant",
)
# super_admin receives events from ALL tenants (tenant_id filter = None)
filter_tenant_id: Optional[str] = None if user_role == "super_admin" else str(tenant_id)
# Generate unique connection ID
connection_id = f"sse-{uuid.uuid4().hex[:12]}"
# Check for Last-Event-ID header (reconnection replay)
last_event_id = request.headers.get("Last-Event-ID")
logger.info(
"sse.stream_requested",
connection_id=connection_id,
tenant_id=str(tenant_id),
user_id=user_id,
role=user_role,
last_event_id=last_event_id,
)
manager = SSEConnectionManager()
queue = await manager.connect(
connection_id=connection_id,
tenant_id=filter_tenant_id,
last_event_id=last_event_id,
)
async def event_generator() -> AsyncGenerator[ServerSentEvent, None]:
"""Yield SSE events from the queue with 15s heartbeat on idle."""
try:
while True:
try:
event = await asyncio.wait_for(queue.get(), timeout=15.0)
yield ServerSentEvent(
data=event["data"],
event=event["event"],
id=event["id"],
)
except asyncio.TimeoutError:
# Send heartbeat comment to keep connection alive
yield ServerSentEvent(comment="heartbeat")
except asyncio.CancelledError:
break
finally:
await manager.disconnect()
logger.info("sse.stream_closed", connection_id=connection_id)
return EventSourceResponse(event_generator())

View File

@@ -0,0 +1,613 @@
"""
Config template CRUD, preview, and push API endpoints.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/templates/
Provides:
- GET /templates -- list templates (optional tag filter)
- POST /templates -- create a template
- GET /templates/{id} -- get single template
- PUT /templates/{id} -- update a template
- DELETE /templates/{id} -- delete a template
- POST /templates/{id}/preview -- preview rendered template for a device
- POST /templates/{id}/push -- push template to devices (sequential rollout)
- GET /templates/push-status/{rollout_id} -- poll push progress
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read (GET/preview); operator and above = write (POST/PUT/DELETE/push).
"""
import asyncio
import logging
import uuid
from datetime import datetime, timezone
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role, require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.config_template import ConfigTemplate, ConfigTemplateTag, TemplatePushJob
from app.models.device import Device
from app.services import template_service
logger = logging.getLogger(__name__)
router = APIRouter(tags=["templates"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
def _serialize_template(template: ConfigTemplate, include_content: bool = False) -> dict:
"""Serialize a ConfigTemplate to a response dict."""
result: dict[str, Any] = {
"id": str(template.id),
"name": template.name,
"description": template.description,
"tags": [tag.name for tag in template.tags],
"variable_count": len(template.variables) if template.variables else 0,
"created_at": template.created_at.isoformat(),
"updated_at": template.updated_at.isoformat(),
}
if include_content:
result["content"] = template.content
result["variables"] = template.variables or []
return result
# ---------------------------------------------------------------------------
# Request/Response schemas
# ---------------------------------------------------------------------------
class VariableDef(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
type: str = "string" # string | ip | integer | boolean | subnet
default: Optional[str] = None
description: Optional[str] = None
class TemplateCreateRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
description: Optional[str] = None
content: str
variables: list[VariableDef] = []
tags: list[str] = []
class TemplateUpdateRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
description: Optional[str] = None
content: str
variables: list[VariableDef] = []
tags: list[str] = []
class PreviewRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_id: str
variables: dict[str, str] = {}
class PushRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_ids: list[str]
variables: dict[str, str] = {}
# ---------------------------------------------------------------------------
# CRUD endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/templates",
summary="List config templates",
dependencies=[require_scope("config:read")],
)
async def list_templates(
tenant_id: uuid.UUID,
tag: Optional[str] = Query(None, description="Filter by tag name"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> list[dict]:
"""List all config templates for a tenant with optional tag filtering."""
await _check_tenant_access(current_user, tenant_id, db)
query = (
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(ConfigTemplate.tenant_id == tenant_id) # type: ignore[arg-type]
.order_by(ConfigTemplate.updated_at.desc())
)
if tag:
query = query.where(
ConfigTemplate.id.in_( # type: ignore[attr-defined]
select(ConfigTemplateTag.template_id).where(
ConfigTemplateTag.name == tag,
ConfigTemplateTag.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
)
result = await db.execute(query)
templates = result.scalars().all()
return [_serialize_template(t) for t in templates]
@router.post(
"/tenants/{tenant_id}/templates",
summary="Create a config template",
status_code=status.HTTP_201_CREATED,
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def create_template(
request: Request,
tenant_id: uuid.UUID,
body: TemplateCreateRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Create a new config template with Jinja2 content and variable definitions."""
await _check_tenant_access(current_user, tenant_id, db)
# Auto-extract variables from content for comparison
detected = template_service.extract_variables(body.content)
provided_names = {v.name for v in body.variables}
unmatched = set(detected) - provided_names
if unmatched:
logger.warning(
"Template '%s' has undeclared variables: %s (auto-adding as string type)",
body.name, unmatched,
)
# Create template
template = ConfigTemplate(
tenant_id=tenant_id,
name=body.name,
description=body.description,
content=body.content,
variables=[v.model_dump() for v in body.variables],
)
db.add(template)
await db.flush() # Get the generated ID
# Create tags
for tag_name in body.tags:
tag = ConfigTemplateTag(
tenant_id=tenant_id,
name=tag_name,
template_id=template.id,
)
db.add(tag)
await db.flush()
# Re-query with tags loaded
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(ConfigTemplate.id == template.id) # type: ignore[arg-type]
)
template = result.scalar_one()
return _serialize_template(template, include_content=True)
@router.get(
"/tenants/{tenant_id}/templates/{template_id}",
summary="Get a single config template",
dependencies=[require_scope("config:read")],
)
async def get_template(
tenant_id: uuid.UUID,
template_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Get a config template with full content, variables, and tags."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
return _serialize_template(template, include_content=True)
@router.put(
"/tenants/{tenant_id}/templates/{template_id}",
summary="Update a config template",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def update_template(
request: Request,
tenant_id: uuid.UUID,
template_id: uuid.UUID,
body: TemplateUpdateRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Update an existing config template."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
# Update fields
template.name = body.name
template.description = body.description
template.content = body.content
template.variables = [v.model_dump() for v in body.variables]
# Replace tags: delete old, create new
await db.execute(
delete(ConfigTemplateTag).where(
ConfigTemplateTag.template_id == template_id # type: ignore[arg-type]
)
)
for tag_name in body.tags:
tag = ConfigTemplateTag(
tenant_id=tenant_id,
name=tag_name,
template_id=template.id,
)
db.add(tag)
await db.flush()
# Re-query with fresh tags
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(ConfigTemplate.id == template.id) # type: ignore[arg-type]
)
template = result.scalar_one()
return _serialize_template(template, include_content=True)
@router.delete(
"/tenants/{tenant_id}/templates/{template_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a config template",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def delete_template(
request: Request,
tenant_id: uuid.UUID,
template_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> None:
"""Delete a config template. Tags are cascade-deleted. Push jobs are SET NULL."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigTemplate).where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
await db.delete(template)
# ---------------------------------------------------------------------------
# Preview & Push endpoints
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/templates/{template_id}/preview",
summary="Preview template rendered for a specific device",
dependencies=[require_scope("config:read")],
)
async def preview_template(
tenant_id: uuid.UUID,
template_id: uuid.UUID,
body: PreviewRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Render a template with device context and custom variables for preview."""
await _check_tenant_access(current_user, tenant_id, db)
# Load template
result = await db.execute(
select(ConfigTemplate).where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
# Load device
result = await db.execute(
select(Device).where(Device.id == body.device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {body.device_id} not found",
)
# Validate variables against type definitions
if template.variables:
for var_def in template.variables:
var_name = var_def.get("name", "")
var_type = var_def.get("type", "string")
value = body.variables.get(var_name)
if value is None:
# Use default if available
default = var_def.get("default")
if default is not None:
body.variables[var_name] = default
continue
error = template_service.validate_variable(var_name, value, var_type)
if error:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=error,
)
# Render
try:
rendered = template_service.render_template(
template.content,
{
"hostname": device.hostname,
"ip_address": device.ip_address,
"model": device.model,
},
body.variables,
)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Template rendering failed: {exc}",
)
return {
"rendered": rendered,
"device_hostname": device.hostname,
}
@router.post(
"/tenants/{tenant_id}/templates/{template_id}/push",
summary="Push template to devices (sequential rollout with panic-revert)",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def push_template(
request: Request,
tenant_id: uuid.UUID,
template_id: uuid.UUID,
body: PushRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Start a template push to one or more devices.
Creates push jobs for each device and starts a background sequential rollout.
Returns the rollout_id for status polling.
"""
await _check_tenant_access(current_user, tenant_id, db)
# Load template
result = await db.execute(
select(ConfigTemplate).where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
if not body.device_ids:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="At least one device_id is required",
)
# Validate variables
if template.variables:
for var_def in template.variables:
var_name = var_def.get("name", "")
var_type = var_def.get("type", "string")
value = body.variables.get(var_name)
if value is None:
default = var_def.get("default")
if default is not None:
body.variables[var_name] = default
continue
error = template_service.validate_variable(var_name, value, var_type)
if error:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=error,
)
rollout_id = uuid.uuid4()
jobs_created = []
for device_id_str in body.device_ids:
# Load device to render template per-device
result = await db.execute(
select(Device).where(Device.id == device_id_str) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id_str} not found",
)
# Render template with this device's context
try:
rendered = template_service.render_template(
template.content,
{
"hostname": device.hostname,
"ip_address": device.ip_address,
"model": device.model,
},
body.variables,
)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Template rendering failed for device {device.hostname}: {exc}",
)
# Create push job
job = TemplatePushJob(
tenant_id=tenant_id,
template_id=template_id,
device_id=device.id,
rollout_id=rollout_id,
rendered_content=rendered,
status="pending",
)
db.add(job)
jobs_created.append({
"job_id": str(job.id),
"device_id": str(device.id),
"device_hostname": device.hostname,
})
await db.flush()
# Start background push task
asyncio.create_task(template_service.push_to_devices(str(rollout_id)))
return {
"rollout_id": str(rollout_id),
"jobs": jobs_created,
}
@router.get(
"/tenants/{tenant_id}/templates/push-status/{rollout_id}",
summary="Poll push progress for a rollout",
dependencies=[require_scope("config:read")],
)
async def push_status(
tenant_id: uuid.UUID,
rollout_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Return all push job statuses for a rollout with device hostnames."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(TemplatePushJob, Device.hostname)
.join(Device, TemplatePushJob.device_id == Device.id) # type: ignore[arg-type]
.where(
TemplatePushJob.rollout_id == rollout_id, # type: ignore[arg-type]
TemplatePushJob.tenant_id == tenant_id, # type: ignore[arg-type]
)
.order_by(TemplatePushJob.created_at.asc())
)
rows = result.all()
jobs = []
for job, hostname in rows:
jobs.append({
"device_id": str(job.device_id),
"hostname": hostname,
"status": job.status,
"error_message": job.error_message,
"started_at": job.started_at.isoformat() if job.started_at else None,
"completed_at": job.completed_at.isoformat() if job.completed_at else None,
})
return {
"rollout_id": str(rollout_id),
"jobs": jobs,
}

View File

@@ -0,0 +1,367 @@
"""
Tenant management endpoints.
GET /api/tenants — list tenants (super_admin: all; tenant_admin: own only)
POST /api/tenants — create tenant (super_admin only)
GET /api/tenants/{id} — get tenant detail
PUT /api/tenants/{id} — update tenant (super_admin only)
DELETE /api/tenants/{id} — delete tenant (super_admin only)
"""
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy import func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.middleware.rate_limit import limiter
from app.database import get_admin_db, get_db
from app.middleware.rbac import require_super_admin, require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser
from app.models.device import Device
from app.models.tenant import Tenant
from app.models.user import User
from app.schemas.tenant import TenantCreate, TenantResponse, TenantUpdate
router = APIRouter(prefix="/tenants", tags=["tenants"])
async def _get_tenant_response(
tenant: Tenant,
db: AsyncSession,
) -> TenantResponse:
"""Build a TenantResponse with user and device counts."""
user_count_result = await db.execute(
select(func.count(User.id)).where(User.tenant_id == tenant.id)
)
user_count = user_count_result.scalar_one() or 0
device_count_result = await db.execute(
select(func.count(Device.id)).where(Device.tenant_id == tenant.id)
)
device_count = device_count_result.scalar_one() or 0
return TenantResponse(
id=tenant.id,
name=tenant.name,
description=tenant.description,
contact_email=tenant.contact_email,
user_count=user_count,
device_count=device_count,
created_at=tenant.created_at,
)
@router.get("", response_model=list[TenantResponse], summary="List tenants")
async def list_tenants(
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> list[TenantResponse]:
"""
List tenants.
- super_admin: sees all tenants
- tenant_admin: sees only their own tenant
"""
if current_user.is_super_admin:
result = await db.execute(select(Tenant).order_by(Tenant.name))
tenants = result.scalars().all()
else:
if not current_user.tenant_id:
return []
result = await db.execute(
select(Tenant).where(Tenant.id == current_user.tenant_id)
)
tenants = result.scalars().all()
return [await _get_tenant_response(tenant, db) for tenant in tenants]
@router.post("", response_model=TenantResponse, status_code=status.HTTP_201_CREATED, summary="Create a tenant")
@limiter.limit("20/minute")
async def create_tenant(
request: Request,
data: TenantCreate,
current_user: CurrentUser = Depends(require_super_admin),
db: AsyncSession = Depends(get_admin_db),
) -> TenantResponse:
"""Create a new tenant (super_admin only)."""
# Check for name uniqueness
existing = await db.execute(select(Tenant).where(Tenant.name == data.name))
if existing.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Tenant with name '{data.name}' already exists",
)
tenant = Tenant(name=data.name, description=data.description, contact_email=data.contact_email)
db.add(tenant)
await db.commit()
await db.refresh(tenant)
# Seed default alert rules for new tenant
default_rules = [
("High CPU Usage", "cpu_load", "gt", 90, 5, "warning"),
("High Memory Usage", "memory_used_pct", "gt", 90, 5, "warning"),
("High Disk Usage", "disk_used_pct", "gt", 85, 3, "warning"),
("Device Offline", "device_offline", "eq", 1, 1, "critical"),
]
for name, metric, operator, threshold, duration, sev in default_rules:
await db.execute(text("""
INSERT INTO alert_rules (id, tenant_id, name, metric, operator, threshold, duration_polls, severity, enabled, is_default)
VALUES (gen_random_uuid(), CAST(:tenant_id AS uuid), :name, :metric, :operator, :threshold, :duration, :severity, TRUE, TRUE)
"""), {
"tenant_id": str(tenant.id), "name": name, "metric": metric,
"operator": operator, "threshold": threshold, "duration": duration, "severity": sev,
})
await db.commit()
# Seed starter config templates for new tenant
await _seed_starter_templates(db, tenant.id)
await db.commit()
# Provision OpenBao Transit key for the new tenant (non-blocking)
try:
from app.config import settings
from app.services.key_service import provision_tenant_key
if settings.OPENBAO_ADDR:
await provision_tenant_key(db, tenant.id)
await db.commit()
except Exception as exc:
import logging
logging.getLogger(__name__).warning(
"OpenBao key provisioning failed for tenant %s (will be provisioned on next startup): %s",
tenant.id,
exc,
)
return await _get_tenant_response(tenant, db)
@router.get("/{tenant_id}", response_model=TenantResponse, summary="Get tenant detail")
async def get_tenant(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> TenantResponse:
"""Get tenant detail. Tenant admins can only view their own tenant."""
# Enforce tenant_admin can only see their own tenant
if not current_user.is_super_admin and current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
return await _get_tenant_response(tenant, db)
@router.put("/{tenant_id}", response_model=TenantResponse, summary="Update a tenant")
@limiter.limit("20/minute")
async def update_tenant(
request: Request,
tenant_id: uuid.UUID,
data: TenantUpdate,
current_user: CurrentUser = Depends(require_super_admin),
db: AsyncSession = Depends(get_admin_db),
) -> TenantResponse:
"""Update tenant (super_admin only)."""
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
if data.name is not None:
# Check name uniqueness
name_check = await db.execute(
select(Tenant).where(Tenant.name == data.name, Tenant.id != tenant_id)
)
if name_check.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Tenant with name '{data.name}' already exists",
)
tenant.name = data.name
if data.description is not None:
tenant.description = data.description
if data.contact_email is not None:
tenant.contact_email = data.contact_email
await db.commit()
await db.refresh(tenant)
return await _get_tenant_response(tenant, db)
@router.delete("/{tenant_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Delete a tenant")
@limiter.limit("5/minute")
async def delete_tenant(
request: Request,
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(require_super_admin),
db: AsyncSession = Depends(get_admin_db),
) -> None:
"""Delete tenant (super_admin only). Cascades to all users and devices."""
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
await db.delete(tenant)
await db.commit()
# ---------------------------------------------------------------------------
# Starter template seeding
# ---------------------------------------------------------------------------
_STARTER_TEMPLATES = [
{
"name": "Basic Router",
"description": "Complete SOHO/branch router setup: WAN on ether1, LAN bridge, DHCP, DNS, NAT, basic firewall",
"content": """/interface bridge add name=bridge-lan comment="LAN bridge"
/interface bridge port add bridge=bridge-lan interface=ether2
/interface bridge port add bridge=bridge-lan interface=ether3
/interface bridge port add bridge=bridge-lan interface=ether4
/interface bridge port add bridge=bridge-lan interface=ether5
# WAN — DHCP client on ether1
/ip dhcp-client add interface={{ wan_interface }} disabled=no comment="WAN uplink"
# LAN address
/ip address add address={{ lan_gateway }}/{{ lan_cidr }} interface=bridge-lan
# DNS
/ip dns set servers={{ dns_servers }} allow-remote-requests=yes
# DHCP server for LAN
/ip pool add name=lan-pool ranges={{ dhcp_start }}-{{ dhcp_end }}
/ip dhcp-server network add address={{ lan_network }}/{{ lan_cidr }} gateway={{ lan_gateway }} dns-server={{ lan_gateway }}
/ip dhcp-server add name=lan-dhcp interface=bridge-lan address-pool=lan-pool disabled=no
# NAT masquerade
/ip firewall nat add chain=srcnat out-interface={{ wan_interface }} action=masquerade
# Firewall — input chain
/ip firewall filter
add chain=input connection-state=established,related action=accept
add chain=input connection-state=invalid action=drop
add chain=input in-interface={{ wan_interface }} action=drop comment="Drop all other WAN input"
# Firewall — forward chain
add chain=forward connection-state=established,related action=accept
add chain=forward connection-state=invalid action=drop
add chain=forward in-interface=bridge-lan out-interface={{ wan_interface }} action=accept comment="Allow LAN to WAN"
add chain=forward action=drop comment="Drop everything else"
# NTP
/system ntp client set enabled=yes servers={{ ntp_server }}
# Identity
/system identity set name={{ device.hostname }}""",
"variables": [
{"name": "wan_interface", "type": "string", "default": "ether1", "description": "WAN-facing interface"},
{"name": "lan_gateway", "type": "ip", "default": "192.168.88.1", "description": "LAN gateway IP"},
{"name": "lan_cidr", "type": "integer", "default": "24", "description": "LAN subnet mask bits"},
{"name": "lan_network", "type": "ip", "default": "192.168.88.0", "description": "LAN network address"},
{"name": "dhcp_start", "type": "ip", "default": "192.168.88.100", "description": "DHCP pool start"},
{"name": "dhcp_end", "type": "ip", "default": "192.168.88.254", "description": "DHCP pool end"},
{"name": "dns_servers", "type": "string", "default": "8.8.8.8,8.8.4.4", "description": "Upstream DNS servers"},
{"name": "ntp_server", "type": "string", "default": "pool.ntp.org", "description": "NTP server"},
],
},
{
"name": "Basic Firewall",
"description": "Standard firewall ruleset with WAN protection and LAN forwarding",
"content": """/ip firewall filter
add chain=input connection-state=established,related action=accept
add chain=input connection-state=invalid action=drop
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=8291 action=drop comment="Block Winbox from WAN"
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=22 action=drop comment="Block SSH from WAN"
add chain=forward connection-state=established,related action=accept
add chain=forward connection-state=invalid action=drop
add chain=forward src-address={{ allowed_network }} action=accept
add chain=forward action=drop""",
"variables": [
{"name": "wan_interface", "type": "string", "default": "ether1", "description": "WAN-facing interface"},
{"name": "allowed_network", "type": "subnet", "default": "192.168.88.0/24", "description": "Allowed source network"},
],
},
{
"name": "DHCP Server Setup",
"description": "Configure DHCP server with address pool, DNS, and gateway",
"content": """/ip pool add name=dhcp-pool ranges={{ pool_start }}-{{ pool_end }}
/ip dhcp-server network add address={{ gateway }}/24 gateway={{ gateway }} dns-server={{ dns_server }}
/ip dhcp-server add name=dhcp1 interface={{ interface }} address-pool=dhcp-pool disabled=no""",
"variables": [
{"name": "pool_start", "type": "ip", "default": "192.168.88.100", "description": "DHCP pool start address"},
{"name": "pool_end", "type": "ip", "default": "192.168.88.254", "description": "DHCP pool end address"},
{"name": "gateway", "type": "ip", "default": "192.168.88.1", "description": "Default gateway"},
{"name": "dns_server", "type": "ip", "default": "8.8.8.8", "description": "DNS server address"},
{"name": "interface", "type": "string", "default": "bridge-lan", "description": "Interface to serve DHCP on"},
],
},
{
"name": "Wireless AP Config",
"description": "Configure wireless access point with WPA2 security",
"content": """/interface wireless security-profiles add name=portal-wpa2 mode=dynamic-keys authentication-types=wpa2-psk wpa2-pre-shared-key={{ password }}
/interface wireless set wlan1 mode=ap-bridge ssid={{ ssid }} security-profile=portal-wpa2 frequency={{ frequency }} channel-width={{ channel_width }} disabled=no""",
"variables": [
{"name": "ssid", "type": "string", "default": "MikroTik-AP", "description": "Wireless network name"},
{"name": "password", "type": "string", "default": "", "description": "WPA2 pre-shared key (min 8 characters)"},
{"name": "frequency", "type": "integer", "default": "2412", "description": "Wireless frequency in MHz"},
{"name": "channel_width", "type": "string", "default": "20/40mhz-XX", "description": "Channel width setting"},
],
},
{
"name": "Initial Device Setup",
"description": "Set device identity, NTP, DNS, and disable unused services",
"content": """/system identity set name={{ device.hostname }}
/system ntp client set enabled=yes servers={{ ntp_server }}
/ip dns set servers={{ dns_servers }} allow-remote-requests=no
/ip service disable telnet,ftp,www,api-ssl
/ip service set ssh port=22
/ip service set winbox port=8291""",
"variables": [
{"name": "ntp_server", "type": "ip", "default": "pool.ntp.org", "description": "NTP server address"},
{"name": "dns_servers", "type": "string", "default": "8.8.8.8,8.8.4.4", "description": "Comma-separated DNS servers"},
],
},
]
async def _seed_starter_templates(db, tenant_id) -> None:
"""Insert starter config templates for a newly created tenant."""
import json as _json
for tmpl in _STARTER_TEMPLATES:
await db.execute(text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
VALUES (gen_random_uuid(), CAST(:tid AS uuid), :name, :desc, :content, CAST(:vars AS jsonb))
"""), {
"tid": str(tenant_id),
"name": tmpl["name"],
"desc": tmpl["description"],
"content": tmpl["content"],
"vars": _json.dumps(tmpl["variables"]),
})

View File

@@ -0,0 +1,374 @@
"""
Network topology inference endpoint.
Endpoint: GET /api/tenants/{tenant_id}/topology
Builds a topology graph of managed devices by:
1. Querying all devices for the tenant (via RLS)
2. Fetching /ip/neighbor tables from online devices via NATS
3. Matching neighbor addresses to known devices
4. Falling back to shared /24 subnet inference when neighbor data is unavailable
5. Caching results in Redis with 5-minute TTL
"""
import asyncio
import ipaddress
import json
import logging
import uuid
from typing import Any
import redis.asyncio as aioredis
import structlog
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db, set_tenant_context
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.models.vpn import VpnPeer
from app.services import routeros_proxy
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["topology"])
# ---------------------------------------------------------------------------
# Redis connection (lazy initialized, same pattern as routeros_proxy NATS)
# ---------------------------------------------------------------------------
_redis: aioredis.Redis | None = None
TOPOLOGY_CACHE_TTL = 300 # 5 minutes
async def _get_redis() -> aioredis.Redis:
"""Get or create a Redis connection for topology caching."""
global _redis
if _redis is None:
_redis = aioredis.from_url(settings.REDIS_URL, decode_responses=True)
logger.info("Topology Redis connection established")
return _redis
# ---------------------------------------------------------------------------
# Response schemas
# ---------------------------------------------------------------------------
class TopologyNode(BaseModel):
id: str
hostname: str
ip: str
status: str
model: str | None
uptime: str | None
class TopologyEdge(BaseModel):
source: str
target: str
label: str
class TopologyResponse(BaseModel):
nodes: list[TopologyNode]
edges: list[TopologyEdge]
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
def _format_uptime(seconds: int | None) -> str | None:
"""Convert uptime seconds to a human-readable string."""
if seconds is None:
return None
days = seconds // 86400
hours = (seconds % 86400) // 3600
minutes = (seconds % 3600) // 60
if days > 0:
return f"{days}d {hours}h {minutes}m"
if hours > 0:
return f"{hours}h {minutes}m"
return f"{minutes}m"
def _get_subnet_key(ip_str: str) -> str | None:
"""Return the /24 network key for an IPv4 address, or None if invalid."""
try:
addr = ipaddress.ip_address(ip_str)
if isinstance(addr, ipaddress.IPv4Address):
network = ipaddress.ip_network(f"{ip_str}/24", strict=False)
return str(network)
except ValueError:
pass
return None
def _build_edges_from_neighbors(
neighbor_data: dict[str, list[dict[str, Any]]],
ip_to_device: dict[str, str],
) -> list[TopologyEdge]:
"""Build topology edges from neighbor discovery results.
Args:
neighbor_data: Mapping of device_id -> list of neighbor entries.
ip_to_device: Mapping of IP address -> device_id for known devices.
Returns:
De-duplicated list of topology edges.
"""
seen_edges: set[tuple[str, str]] = set()
edges: list[TopologyEdge] = []
for device_id, neighbors in neighbor_data.items():
for neighbor in neighbors:
# RouterOS neighbor entry has 'address' (or 'address4') field
neighbor_ip = neighbor.get("address") or neighbor.get("address4", "")
if not neighbor_ip:
continue
target_device_id = ip_to_device.get(neighbor_ip)
if target_device_id is None or target_device_id == device_id:
continue
# De-duplicate bidirectional edges (A->B and B->A become one edge)
edge_key = tuple(sorted([device_id, target_device_id]))
if edge_key in seen_edges:
continue
seen_edges.add(edge_key)
interface_name = neighbor.get("interface", "neighbor")
edges.append(
TopologyEdge(
source=device_id,
target=target_device_id,
label=interface_name,
)
)
return edges
def _build_edges_from_subnets(
devices: list[Device],
existing_connected: set[tuple[str, str]],
) -> list[TopologyEdge]:
"""Infer edges from shared /24 subnets for devices without neighbor data.
Only adds subnet-based edges for device pairs that are NOT already connected
via neighbor discovery.
"""
# Group devices by /24 subnet
subnet_groups: dict[str, list[str]] = {}
for device in devices:
subnet_key = _get_subnet_key(device.ip_address)
if subnet_key:
subnet_groups.setdefault(subnet_key, []).append(str(device.id))
edges: list[TopologyEdge] = []
for subnet, device_ids in subnet_groups.items():
if len(device_ids) < 2:
continue
# Connect all pairs in the subnet
for i, src in enumerate(device_ids):
for tgt in device_ids[i + 1 :]:
edge_key = tuple(sorted([src, tgt]))
if edge_key in existing_connected:
continue
edges.append(
TopologyEdge(
source=src,
target=tgt,
label="shared subnet",
)
)
existing_connected.add(edge_key)
return edges
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/topology",
response_model=TopologyResponse,
summary="Get network topology for a tenant",
)
async def get_topology(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> TopologyResponse:
"""Build and return a network topology graph for the given tenant.
The topology is inferred from:
1. LLDP/CDP/MNDP neighbor discovery on online devices
2. Shared /24 subnet fallback for devices without neighbor data
Results are cached in Redis with a 5-minute TTL.
"""
await _check_tenant_access(current_user, tenant_id, db)
cache_key = f"topology:{tenant_id}"
# Check Redis cache
try:
rd = await _get_redis()
cached = await rd.get(cache_key)
if cached:
data = json.loads(cached)
return TopologyResponse(**data)
except Exception as exc:
logger.warning("Redis cache read failed, computing topology fresh", error=str(exc))
# Fetch all devices for tenant (RLS enforced via get_db)
result = await db.execute(
select(
Device.id,
Device.hostname,
Device.ip_address,
Device.status,
Device.model,
Device.uptime_seconds,
)
)
rows = result.all()
if not rows:
return TopologyResponse(nodes=[], edges=[])
# Build nodes
nodes: list[TopologyNode] = []
ip_to_device: dict[str, str] = {}
online_device_ids: list[str] = []
devices_by_id: dict[str, Any] = {}
for row in rows:
device_id = str(row.id)
nodes.append(
TopologyNode(
id=device_id,
hostname=row.hostname,
ip=row.ip_address,
status=row.status,
model=row.model,
uptime=_format_uptime(row.uptime_seconds),
)
)
ip_to_device[row.ip_address] = device_id
if row.status == "online":
online_device_ids.append(device_id)
# Fetch neighbor tables from online devices in parallel
neighbor_data: dict[str, list[dict[str, Any]]] = {}
if online_device_ids:
tasks = [
routeros_proxy.execute_command(
device_id, "/ip/neighbor/print", timeout=10.0
)
for device_id in online_device_ids
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for device_id, res in zip(online_device_ids, results):
if isinstance(res, Exception):
logger.warning(
"Neighbor fetch failed",
device_id=device_id,
error=str(res),
)
continue
if isinstance(res, dict) and res.get("success") and res.get("data"):
neighbor_data[device_id] = res["data"]
# Build edges from neighbor discovery
neighbor_edges = _build_edges_from_neighbors(neighbor_data, ip_to_device)
# Track connected pairs for subnet fallback
connected_pairs: set[tuple[str, str]] = set()
for edge in neighbor_edges:
connected_pairs.add(tuple(sorted([edge.source, edge.target])))
# VPN-based edges: query WireGuard peers to infer hub-spoke topology.
# VPN peers all connect to the same WireGuard server. The gateway device
# is the managed device NOT in the VPN peers list (it's the server, not a
# client). If found, create star edges from gateway to each VPN peer device.
vpn_edges: list[TopologyEdge] = []
vpn_peer_device_ids: set[str] = set()
try:
peer_result = await db.execute(
select(VpnPeer.device_id).where(VpnPeer.is_enabled.is_(True))
)
vpn_peer_device_ids = {str(row[0]) for row in peer_result.all()}
if vpn_peer_device_ids:
# Gateway = managed devices NOT in VPN peers (typically the Core router)
all_device_ids = {str(row.id) for row in rows}
gateway_ids = all_device_ids - vpn_peer_device_ids
# Pick the gateway that's online (prefer online devices)
gateway_id = None
for gid in gateway_ids:
if gid in online_device_ids:
gateway_id = gid
break
if not gateway_id and gateway_ids:
gateway_id = next(iter(gateway_ids))
if gateway_id:
for peer_device_id in vpn_peer_device_ids:
edge_key = tuple(sorted([gateway_id, peer_device_id]))
if edge_key not in connected_pairs:
vpn_edges.append(
TopologyEdge(
source=gateway_id,
target=peer_device_id,
label="vpn tunnel",
)
)
connected_pairs.add(edge_key)
except Exception as exc:
logger.warning("VPN edge detection failed", error=str(exc))
# Fallback: infer connections from shared /24 subnets
# Query full Device objects for subnet analysis
device_result = await db.execute(select(Device))
all_devices = list(device_result.scalars().all())
subnet_edges = _build_edges_from_subnets(all_devices, connected_pairs)
all_edges = neighbor_edges + vpn_edges + subnet_edges
topology = TopologyResponse(nodes=nodes, edges=all_edges)
# Cache result in Redis
try:
rd = await _get_redis()
await rd.set(cache_key, topology.model_dump_json(), ex=TOPOLOGY_CACHE_TTL)
except Exception as exc:
logger.warning("Redis cache write failed", error=str(exc))
return topology

View File

@@ -0,0 +1,391 @@
"""Transparency log API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/ for:
- Paginated, filterable key access transparency log listing
- Transparency log statistics (total events, last 24h, unique devices, justification breakdown)
- CSV export of transparency logs
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: admin and above can view transparency logs (tenant_admin or super_admin).
Phase 31: Data Access Transparency Dashboard - TRUST-01, TRUST-02
Shows tenant admins every KMS credential access event for their tenant.
"""
import csv
import io
import logging
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy import and_, func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["transparency"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_admin(current_user: CurrentUser) -> None:
"""Raise 403 if user does not have at least admin role.
Transparency data is sensitive operational intelligence --
only tenant_admin and super_admin can view it.
"""
allowed = {"super_admin", "admin", "tenant_admin"}
if current_user.role not in allowed:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="At least admin role required to view transparency logs.",
)
# ---------------------------------------------------------------------------
# Response models
# ---------------------------------------------------------------------------
class TransparencyLogItem(BaseModel):
id: str
action: str
device_name: Optional[str] = None
device_id: Optional[str] = None
justification: Optional[str] = None
operator_email: Optional[str] = None
correlation_id: Optional[str] = None
resource_type: Optional[str] = None
resource_id: Optional[str] = None
ip_address: Optional[str] = None
created_at: str
class TransparencyLogResponse(BaseModel):
items: list[TransparencyLogItem]
total: int
page: int
per_page: int
class TransparencyStats(BaseModel):
total_events: int
events_last_24h: int
unique_devices: int
justification_breakdown: dict[str, int]
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/transparency-logs",
response_model=TransparencyLogResponse,
summary="List KMS credential access events for tenant",
)
async def list_transparency_logs(
tenant_id: uuid.UUID,
page: int = Query(default=1, ge=1),
per_page: int = Query(default=50, ge=1, le=100),
device_id: Optional[uuid.UUID] = Query(default=None),
justification: Optional[str] = Query(default=None),
action: Optional[str] = Query(default=None),
date_from: Optional[datetime] = Query(default=None),
date_to: Optional[datetime] = Query(default=None),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Any:
_require_admin(current_user)
await _check_tenant_access(current_user, tenant_id, db)
# Build filter conditions using parameterized text fragments
conditions = [text("k.tenant_id = :tenant_id")]
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
if device_id:
conditions.append(text("k.device_id = :device_id"))
params["device_id"] = str(device_id)
if justification:
conditions.append(text("k.justification = :justification"))
params["justification"] = justification
if action:
conditions.append(text("k.action = :action"))
params["action"] = action
if date_from:
conditions.append(text("k.created_at >= :date_from"))
params["date_from"] = date_from.isoformat()
if date_to:
conditions.append(text("k.created_at <= :date_to"))
params["date_to"] = date_to.isoformat()
where_clause = and_(*conditions)
# Shared SELECT columns for data queries
_data_columns = text(
"k.id, k.action, d.hostname AS device_name, "
"k.device_id, k.justification, u.email AS operator_email, "
"k.correlation_id, k.resource_type, k.resource_id, "
"k.ip_address, k.created_at"
)
_data_from = text(
"key_access_log k "
"LEFT JOIN users u ON k.user_id = u.id "
"LEFT JOIN devices d ON k.device_id = d.id"
)
# Count total
count_result = await db.execute(
select(func.count())
.select_from(text("key_access_log k"))
.where(where_clause),
params,
)
total = count_result.scalar() or 0
# Paginated query
offset = (page - 1) * per_page
params["limit"] = per_page
params["offset"] = offset
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("k.created_at DESC"))
.limit(per_page)
.offset(offset),
params,
)
rows = result.mappings().all()
items = [
TransparencyLogItem(
id=str(row["id"]),
action=row["action"],
device_name=row["device_name"],
device_id=str(row["device_id"]) if row["device_id"] else None,
justification=row["justification"],
operator_email=row["operator_email"],
correlation_id=row["correlation_id"],
resource_type=row["resource_type"],
resource_id=row["resource_id"],
ip_address=row["ip_address"],
created_at=row["created_at"].isoformat() if row["created_at"] else "",
)
for row in rows
]
return TransparencyLogResponse(
items=items,
total=total,
page=page,
per_page=per_page,
)
@router.get(
"/tenants/{tenant_id}/transparency-logs/stats",
response_model=TransparencyStats,
summary="Get transparency log statistics",
)
async def get_transparency_stats(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> TransparencyStats:
_require_admin(current_user)
await _check_tenant_access(current_user, tenant_id, db)
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
# Total events
total_result = await db.execute(
select(func.count())
.select_from(text("key_access_log"))
.where(text("tenant_id = :tenant_id")),
params,
)
total_events = total_result.scalar() or 0
# Events in last 24 hours
last_24h_result = await db.execute(
select(func.count())
.select_from(text("key_access_log"))
.where(
and_(
text("tenant_id = :tenant_id"),
text("created_at >= NOW() - INTERVAL '24 hours'"),
)
),
params,
)
events_last_24h = last_24h_result.scalar() or 0
# Unique devices
unique_devices_result = await db.execute(
select(func.count(text("DISTINCT device_id")))
.select_from(text("key_access_log"))
.where(
and_(
text("tenant_id = :tenant_id"),
text("device_id IS NOT NULL"),
)
),
params,
)
unique_devices = unique_devices_result.scalar() or 0
# Justification breakdown
breakdown_result = await db.execute(
select(
text("COALESCE(justification, 'system') AS justification_label"),
func.count().label("count"),
)
.select_from(text("key_access_log"))
.where(text("tenant_id = :tenant_id"))
.group_by(text("justification_label")),
params,
)
justification_breakdown: dict[str, int] = {}
for row in breakdown_result.mappings().all():
justification_breakdown[row["justification_label"]] = row["count"]
return TransparencyStats(
total_events=total_events,
events_last_24h=events_last_24h,
unique_devices=unique_devices,
justification_breakdown=justification_breakdown,
)
@router.get(
"/tenants/{tenant_id}/transparency-logs/export",
summary="Export transparency logs as CSV",
)
async def export_transparency_logs(
tenant_id: uuid.UUID,
device_id: Optional[uuid.UUID] = Query(default=None),
justification: Optional[str] = Query(default=None),
action: Optional[str] = Query(default=None),
date_from: Optional[datetime] = Query(default=None),
date_to: Optional[datetime] = Query(default=None),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> StreamingResponse:
_require_admin(current_user)
await _check_tenant_access(current_user, tenant_id, db)
# Build filter conditions
conditions = [text("k.tenant_id = :tenant_id")]
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
if device_id:
conditions.append(text("k.device_id = :device_id"))
params["device_id"] = str(device_id)
if justification:
conditions.append(text("k.justification = :justification"))
params["justification"] = justification
if action:
conditions.append(text("k.action = :action"))
params["action"] = action
if date_from:
conditions.append(text("k.created_at >= :date_from"))
params["date_from"] = date_from.isoformat()
if date_to:
conditions.append(text("k.created_at <= :date_to"))
params["date_to"] = date_to.isoformat()
where_clause = and_(*conditions)
_data_columns = text(
"k.id, k.action, d.hostname AS device_name, "
"k.device_id, k.justification, u.email AS operator_email, "
"k.correlation_id, k.resource_type, k.resource_id, "
"k.ip_address, k.created_at"
)
_data_from = text(
"key_access_log k "
"LEFT JOIN users u ON k.user_id = u.id "
"LEFT JOIN devices d ON k.device_id = d.id"
)
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("k.created_at DESC")),
params,
)
all_rows = result.mappings().all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"ID",
"Action",
"Device Name",
"Device ID",
"Justification",
"Operator Email",
"Correlation ID",
"Resource Type",
"Resource ID",
"IP Address",
"Timestamp",
])
for row in all_rows:
writer.writerow([
str(row["id"]),
row["action"],
row["device_name"] or "",
str(row["device_id"]) if row["device_id"] else "",
row["justification"] or "",
row["operator_email"] or "",
row["correlation_id"] or "",
row["resource_type"] or "",
row["resource_id"] or "",
row["ip_address"] or "",
str(row["created_at"]),
])
output.seek(0)
return StreamingResponse(
iter([output.getvalue()]),
media_type="text/csv",
headers={
"Content-Disposition": "attachment; filename=transparency-logs.csv"
},
)

View File

@@ -0,0 +1,231 @@
"""
User management endpoints (scoped to tenant).
GET /api/tenants/{tenant_id}/users — list users in tenant
POST /api/tenants/{tenant_id}/users — create user in tenant
GET /api/tenants/{tenant_id}/users/{id} — get user detail
PUT /api/tenants/{tenant_id}/users/{id} — update user
DELETE /api/tenants/{tenant_id}/users/{id} — deactivate user
"""
import uuid
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.middleware.rate_limit import limiter
from app.database import get_admin_db
from app.middleware.rbac import require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser
from app.models.tenant import Tenant
from app.models.user import User, UserRole
from app.schemas.user import UserCreate, UserResponse, UserUpdate
from app.services.auth import hash_password
router = APIRouter(prefix="/tenants", tags=["users"])
async def _check_tenant_access(
tenant_id: uuid.UUID,
current_user: CurrentUser,
db: AsyncSession,
) -> Tenant:
"""
Verify the tenant exists and the current user has access to it.
super_admin can access any tenant.
tenant_admin can only access their own tenant.
"""
if not current_user.is_super_admin and current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
return tenant
@router.get("/{tenant_id}/users", response_model=list[UserResponse], summary="List users in tenant")
async def list_users(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> list[UserResponse]:
"""
List users in a tenant.
- super_admin: can list users in any tenant
- tenant_admin: can only list users in their own tenant
"""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User)
.where(User.tenant_id == tenant_id)
.order_by(User.name)
)
users = result.scalars().all()
return [UserResponse.model_validate(user) for user in users]
@router.post(
"/{tenant_id}/users",
response_model=UserResponse,
status_code=status.HTTP_201_CREATED,
summary="Create a user in tenant",
)
@limiter.limit("20/minute")
async def create_user(
request: Request,
tenant_id: uuid.UUID,
data: UserCreate,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> UserResponse:
"""
Create a user within a tenant.
- super_admin: can create users in any tenant
- tenant_admin: can only create users in their own tenant
- No email invitation flow — admin creates accounts with temporary passwords
"""
await _check_tenant_access(tenant_id, current_user, db)
# Check email uniqueness (global, not per-tenant)
existing = await db.execute(
select(User).where(User.email == data.email.lower())
)
if existing.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="A user with this email already exists",
)
user = User(
email=data.email.lower(),
hashed_password=hash_password(data.password),
name=data.name,
role=data.role.value,
tenant_id=tenant_id,
is_active=True,
must_upgrade_auth=True,
)
db.add(user)
await db.commit()
await db.refresh(user)
return UserResponse.model_validate(user)
@router.get("/{tenant_id}/users/{user_id}", response_model=UserResponse, summary="Get user detail")
async def get_user(
tenant_id: uuid.UUID,
user_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> UserResponse:
"""Get user detail."""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User).where(User.id == user_id, User.tenant_id == tenant_id)
)
user = result.scalar_one_or_none()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="User not found",
)
return UserResponse.model_validate(user)
@router.put("/{tenant_id}/users/{user_id}", response_model=UserResponse, summary="Update a user")
@limiter.limit("20/minute")
async def update_user(
request: Request,
tenant_id: uuid.UUID,
user_id: uuid.UUID,
data: UserUpdate,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> UserResponse:
"""
Update user attributes (name, role, is_active).
Role assignment is editable by admins.
"""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User).where(User.id == user_id, User.tenant_id == tenant_id)
)
user = result.scalar_one_or_none()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="User not found",
)
if data.name is not None:
user.name = data.name
if data.role is not None:
user.role = data.role.value
if data.is_active is not None:
user.is_active = data.is_active
await db.commit()
await db.refresh(user)
return UserResponse.model_validate(user)
@router.delete("/{tenant_id}/users/{user_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Deactivate a user")
@limiter.limit("5/minute")
async def deactivate_user(
request: Request,
tenant_id: uuid.UUID,
user_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> None:
"""
Deactivate a user (soft delete — sets is_active=False).
This preserves audit trail while preventing login.
"""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User).where(User.id == user_id, User.tenant_id == tenant_id)
)
user = result.scalar_one_or_none()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="User not found",
)
# Prevent self-deactivation
if user.id == current_user.user_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Cannot deactivate your own account",
)
user.is_active = False
await db.commit()

236
backend/app/routers/vpn.py Normal file
View File

@@ -0,0 +1,236 @@
"""WireGuard VPN API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/vpn/ for:
- VPN setup (enable WireGuard for tenant)
- VPN config management (update endpoint, enable/disable)
- Peer management (add device, remove, get config)
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: operator and above for all operations.
"""
import uuid
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.schemas.vpn import (
VpnConfigResponse,
VpnConfigUpdate,
VpnOnboardRequest,
VpnOnboardResponse,
VpnPeerConfig,
VpnPeerCreate,
VpnPeerResponse,
VpnSetupRequest,
)
from app.services import vpn_service
router = APIRouter(tags=["vpn"])
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
def _require_operator(current_user: CurrentUser) -> None:
if current_user.role == "viewer":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Operator role required")
# ── VPN Config ──
@router.get("/tenants/{tenant_id}/vpn", response_model=VpnConfigResponse | None)
async def get_vpn_config(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Get VPN configuration for this tenant."""
await _check_tenant_access(current_user, tenant_id, db)
config = await vpn_service.get_vpn_config(db, tenant_id)
if not config:
return None
peers = await vpn_service.get_peers(db, tenant_id)
resp = VpnConfigResponse.model_validate(config)
resp.peer_count = len(peers)
return resp
@router.post("/tenants/{tenant_id}/vpn", response_model=VpnConfigResponse, status_code=status.HTTP_201_CREATED)
@limiter.limit("20/minute")
async def setup_vpn(
request: Request,
tenant_id: uuid.UUID,
body: VpnSetupRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Enable VPN for this tenant — generates server keys."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
config = await vpn_service.setup_vpn(db, tenant_id, endpoint=body.endpoint)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
return VpnConfigResponse.model_validate(config)
@router.patch("/tenants/{tenant_id}/vpn", response_model=VpnConfigResponse)
@limiter.limit("20/minute")
async def update_vpn_config(
request: Request,
tenant_id: uuid.UUID,
body: VpnConfigUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Update VPN settings (endpoint, enable/disable)."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
config = await vpn_service.update_vpn_config(
db, tenant_id, endpoint=body.endpoint, is_enabled=body.is_enabled
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
peers = await vpn_service.get_peers(db, tenant_id)
resp = VpnConfigResponse.model_validate(config)
resp.peer_count = len(peers)
return resp
# ── VPN Peers ──
@router.get("/tenants/{tenant_id}/vpn/peers", response_model=list[VpnPeerResponse])
async def list_peers(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""List all VPN peers for this tenant."""
await _check_tenant_access(current_user, tenant_id, db)
peers = await vpn_service.get_peers(db, tenant_id)
# Enrich with device info
device_ids = [p.device_id for p in peers]
devices = {}
if device_ids:
result = await db.execute(select(Device).where(Device.id.in_(device_ids)))
devices = {d.id: d for d in result.scalars().all()}
# Read live WireGuard status for handshake enrichment
wg_status = vpn_service.read_wg_status()
responses = []
for peer in peers:
resp = VpnPeerResponse.model_validate(peer)
device = devices.get(peer.device_id)
if device:
resp.device_hostname = device.hostname
resp.device_ip = device.ip_address
# Enrich with live handshake from WireGuard container
live_handshake = vpn_service.get_peer_handshake(wg_status, peer.peer_public_key)
if live_handshake:
resp.last_handshake = live_handshake
responses.append(resp)
return responses
@router.post("/tenants/{tenant_id}/vpn/peers", response_model=VpnPeerResponse, status_code=status.HTTP_201_CREATED)
@limiter.limit("20/minute")
async def add_peer(
request: Request,
tenant_id: uuid.UUID,
body: VpnPeerCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Add a device as a VPN peer."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
peer = await vpn_service.add_peer(db, tenant_id, body.device_id, additional_allowed_ips=body.additional_allowed_ips)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
# Enrich with device info
result = await db.execute(select(Device).where(Device.id == peer.device_id))
device = result.scalar_one_or_none()
resp = VpnPeerResponse.model_validate(peer)
if device:
resp.device_hostname = device.hostname
resp.device_ip = device.ip_address
return resp
@router.post("/tenants/{tenant_id}/vpn/peers/onboard", response_model=VpnOnboardResponse, status_code=status.HTTP_201_CREATED)
@limiter.limit("10/minute")
async def onboard_device(
request: Request,
tenant_id: uuid.UUID,
body: VpnOnboardRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Create device + VPN peer in one step. Returns RouterOS commands for tunnel setup."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
result = await vpn_service.onboard_device(
db, tenant_id,
hostname=body.hostname,
username=body.username,
password=body.password,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
return VpnOnboardResponse(**result)
@router.delete("/tenants/{tenant_id}/vpn/peers/{peer_id}", status_code=status.HTTP_204_NO_CONTENT)
@limiter.limit("5/minute")
async def remove_peer(
request: Request,
tenant_id: uuid.UUID,
peer_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Remove a VPN peer."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
await vpn_service.remove_peer(db, tenant_id, peer_id)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
@router.get("/tenants/{tenant_id}/vpn/peers/{peer_id}/config", response_model=VpnPeerConfig)
async def get_peer_device_config(
tenant_id: uuid.UUID,
peer_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Get the full config for a peer — includes private key and RouterOS commands."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
config = await vpn_service.get_peer_config(db, tenant_id, peer_id)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
return VpnPeerConfig(**config)