feat: The Other Dude v9.0.1 — full-featured email system

ci: add GitHub Pages deployment workflow for docs site

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-08 17:46:37 -05:00
commit b840047e19
511 changed files with 106948 additions and 0 deletions

52
.env.example Normal file
View File

@@ -0,0 +1,52 @@
# .env.example -- Copy to .env for development, .env.prod for production
# DO NOT commit .env or .env.prod to git
# Environment (dev | staging | production)
ENVIRONMENT=dev
LOG_LEVEL=debug
DEBUG=true
# Database
POSTGRES_DB=mikrotik
POSTGRES_USER=postgres
POSTGRES_PASSWORD=CHANGE_ME_IN_PRODUCTION
DATABASE_URL=postgresql+asyncpg://postgres:CHANGE_ME_IN_PRODUCTION@postgres:5432/mikrotik
SYNC_DATABASE_URL=postgresql+psycopg2://postgres:CHANGE_ME_IN_PRODUCTION@postgres:5432/mikrotik
APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:CHANGE_ME_IN_PRODUCTION@postgres:5432/mikrotik
# Poller database (different role, no RLS)
POLLER_DATABASE_URL=postgres://poller_user:poller_password@postgres:5432/mikrotik
# Redis
REDIS_URL=redis://redis:6379/0
# NATS
NATS_URL=nats://nats:4222
# Security
JWT_SECRET_KEY=CHANGE_ME_IN_PRODUCTION
CREDENTIAL_ENCRYPTION_KEY=CHANGE_ME_IN_PRODUCTION
# First admin bootstrap (dev only)
FIRST_ADMIN_EMAIL=admin@mikrotik-portal.dev
FIRST_ADMIN_PASSWORD=changeme-in-production
# CORS (comma-separated origins)
# Dev: localhost ports for Vite/React dev server
# Prod: set to your actual domain, e.g., https://mikrotik.yourdomain.com
CORS_ORIGINS=http://localhost:3000,http://localhost:5173,http://localhost:8080
# Git store path
GIT_STORE_PATH=/data/git-store
# Firmware
FIRMWARE_CACHE_DIR=/data/firmware-cache
# SMTP (system emails like password reset)
# For dev: run `docker compose --profile mail-testing up -d` for Mailpit UI at http://localhost:8025
SMTP_HOST=mailpit
SMTP_PORT=1025
SMTP_USER=
SMTP_PASSWORD=
SMTP_USE_TLS=false
SMTP_FROM_ADDRESS=noreply@example.com

43
.env.staging.example Normal file
View File

@@ -0,0 +1,43 @@
# .env.staging -- Copy to .env.staging and fill in values
# DO NOT commit this file to git
ENVIRONMENT=staging
LOG_LEVEL=info
DEBUG=false
# Database
POSTGRES_DB=mikrotik
POSTGRES_USER=postgres
POSTGRES_PASSWORD=CHANGE_ME_STAGING
DATABASE_URL=postgresql+asyncpg://postgres:CHANGE_ME_STAGING@postgres:5432/mikrotik
SYNC_DATABASE_URL=postgresql+psycopg2://postgres:CHANGE_ME_STAGING@postgres:5432/mikrotik
APP_USER_DATABASE_URL=postgresql+asyncpg://app_user:CHANGE_ME_STAGING@postgres:5432/mikrotik
# Poller database (different role, no RLS)
POLLER_DATABASE_URL=postgres://poller_user:poller_password@postgres:5432/mikrotik
# Redis
REDIS_URL=redis://redis:6379/0
# NATS
NATS_URL=nats://nats:4222
# Security -- generate unique values for staging
# JWT: python3 -c "import secrets; print(secrets.token_urlsafe(64))"
# Fernet: python3 -c "import secrets, base64; print(base64.b64encode(secrets.token_bytes(32)).decode())"
JWT_SECRET_KEY=CHANGE_ME_STAGING
CREDENTIAL_ENCRYPTION_KEY=CHANGE_ME_STAGING
# First admin bootstrap
FIRST_ADMIN_EMAIL=admin@mikrotik-portal.staging
FIRST_ADMIN_PASSWORD=CHANGE_ME_STAGING
# CORS (staging URL)
CORS_ORIGINS=http://localhost:3080
# Git store path
GIT_STORE_PATH=/data/git-store
# Firmware
FIRMWARE_CACHE_DIR=/data/firmware-cache

267
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,267 @@
name: CI
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
# Cancel in-progress runs for the same branch/PR to save runner minutes.
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
jobs:
# ---------------------------------------------------------------------------
# LINT — parallel linting for all three services
# ---------------------------------------------------------------------------
python-lint:
name: Lint Python (Ruff)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install Ruff
run: pip install ruff
- name: Ruff check
run: ruff check backend/
- name: Ruff format check
run: ruff format --check backend/
go-lint:
name: Lint Go (golangci-lint)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "1.24"
- name: golangci-lint
uses: golangci/golangci-lint-action@v6
with:
working-directory: poller
frontend-lint:
name: Lint Frontend (ESLint + tsc)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: "18"
cache: "npm"
cache-dependency-path: frontend/package-lock.json
- name: Install dependencies
working-directory: frontend
run: npm ci
- name: ESLint
working-directory: frontend
run: npx eslint .
- name: TypeScript type check
working-directory: frontend
run: npx tsc --noEmit
# ---------------------------------------------------------------------------
# TEST — parallel test suites for all three services
# ---------------------------------------------------------------------------
backend-test:
name: Test Backend (pytest)
runs-on: ubuntu-latest
services:
postgres:
image: timescale/timescaledb:latest-pg17
env:
POSTGRES_DB: mikrotik_test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
redis:
image: redis:7-alpine
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
nats:
image: nats:2-alpine
ports:
- 4222:4222
options: >-
--health-cmd "true"
--health-interval 10s
--health-timeout 5s
--health-retries 5
env:
ENVIRONMENT: dev
DATABASE_URL: "postgresql+asyncpg://postgres:postgres@localhost:5432/mikrotik_test"
SYNC_DATABASE_URL: "postgresql+psycopg2://postgres:postgres@localhost:5432/mikrotik_test"
APP_USER_DATABASE_URL: "postgresql+asyncpg://app_user:app_password@localhost:5432/mikrotik_test"
TEST_DATABASE_URL: "postgresql+asyncpg://postgres:postgres@localhost:5432/mikrotik_test"
TEST_APP_USER_DATABASE_URL: "postgresql+asyncpg://app_user:app_password@localhost:5432/mikrotik_test"
CREDENTIAL_ENCRYPTION_KEY: "LLLjnfBZTSycvL2U07HDSxUeTtLxb9cZzryQl0R9E4w="
JWT_SECRET_KEY: "change-this-in-production-use-a-long-random-string"
REDIS_URL: "redis://localhost:6379/0"
NATS_URL: "nats://localhost:4222"
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/cache@v4
with:
path: ~/.cache/pip
key: pip-${{ hashFiles('backend/pyproject.toml') }}
restore-keys: pip-
- name: Install backend dependencies
working-directory: backend
run: pip install -e ".[dev]"
- name: Set up test database roles
env:
PGPASSWORD: postgres
run: |
# Create app_user role for RLS-enforced connections
psql -h localhost -U postgres -d mikrotik_test -c "
CREATE ROLE app_user WITH LOGIN PASSWORD 'app_password' NOSUPERUSER NOCREATEDB NOCREATEROLE;
GRANT CONNECT ON DATABASE mikrotik_test TO app_user;
GRANT USAGE ON SCHEMA public TO app_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO app_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO app_user;
" || true
# Create poller_user role
psql -h localhost -U postgres -d mikrotik_test -c "
DO \$\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'poller_user') THEN
CREATE ROLE poller_user WITH LOGIN PASSWORD 'poller_password' NOSUPERUSER NOCREATEDB NOCREATEROLE;
END IF;
END
\$\$;
GRANT CONNECT ON DATABASE mikrotik_test TO poller_user;
GRANT USAGE ON SCHEMA public TO poller_user;
" || true
- name: Run backend tests
working-directory: backend
run: python -m pytest tests/ -x -v --tb=short
poller-test:
name: Test Go Poller
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "1.24"
- uses: actions/cache@v4
with:
path: ~/go/pkg/mod
key: go-${{ hashFiles('poller/go.sum') }}
restore-keys: go-
- name: Run poller tests
working-directory: poller
run: go test ./... -v -count=1
frontend-test:
name: Test Frontend (Vitest)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: "18"
cache: "npm"
cache-dependency-path: frontend/package-lock.json
- name: Install dependencies
working-directory: frontend
run: npm ci
- name: Run frontend tests
working-directory: frontend
run: npx vitest run
# ---------------------------------------------------------------------------
# BUILD — sequential Docker builds + Trivy scans (depends on lint + test)
# ---------------------------------------------------------------------------
build:
name: Build & Scan Docker Images
runs-on: ubuntu-latest
needs: [python-lint, go-lint, frontend-lint, backend-test, poller-test, frontend-test]
steps:
- uses: actions/checkout@v4
# Build and scan each image SEQUENTIALLY to avoid OOM.
# Each multi-stage build (Go, Python/pip, Node/tsc) can peak at 1-2 GB.
# Running them in parallel would exceed typical runner memory.
- name: Build API image
run: docker build -f infrastructure/docker/Dockerfile.api -t mikrotik-api:ci .
- name: Scan API image
uses: aquasecurity/trivy-action@0.33.1
with:
image-ref: "mikrotik-api:ci"
format: "table"
exit-code: "1"
severity: "HIGH,CRITICAL"
trivyignores: ".trivyignore"
- name: Build Poller image
run: docker build -f poller/Dockerfile -t mikrotik-poller:ci ./poller
- name: Scan Poller image
uses: aquasecurity/trivy-action@0.33.1
with:
image-ref: "mikrotik-poller:ci"
format: "table"
exit-code: "1"
severity: "HIGH,CRITICAL"
trivyignores: ".trivyignore"
- name: Build Frontend image
run: docker build -f infrastructure/docker/Dockerfile.frontend -t mikrotik-frontend:ci .
- name: Scan Frontend image
uses: aquasecurity/trivy-action@0.33.1
with:
image-ref: "mikrotik-frontend:ci"
format: "table"
exit-code: "1"
severity: "HIGH,CRITICAL"
trivyignores: ".trivyignore"

39
.github/workflows/pages.yml vendored Normal file
View File

@@ -0,0 +1,39 @@
name: Deploy Docs to GitHub Pages
on:
push:
branches: [main]
paths:
- "docs/website/**"
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: pages
cancel-in-progress: false
jobs:
deploy:
name: Deploy to GitHub Pages
runs-on: ubuntu-latest
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- uses: actions/checkout@v4
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: docs/website
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

56
.github/workflows/security-scan.yml vendored Normal file
View File

@@ -0,0 +1,56 @@
name: Container Security Scan
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
jobs:
trivy-scan:
name: Trivy Container Scan
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
# Build and scan each container image sequentially to avoid OOM.
# Scans are BLOCKING (exit-code: 1) — HIGH/CRITICAL CVEs fail the pipeline.
# Add base-image CVEs to .trivyignore with justification if needed.
- name: Build API image
run: docker build -f infrastructure/docker/Dockerfile.api -t mikrotik-api:scan .
- name: Scan API image
uses: aquasecurity/trivy-action@0.33.1
with:
image-ref: "mikrotik-api:scan"
format: "table"
exit-code: "1"
severity: "HIGH,CRITICAL"
trivyignores: ".trivyignore"
- name: Build Poller image
run: docker build -f poller/Dockerfile -t mikrotik-poller:scan ./poller
- name: Scan Poller image
uses: aquasecurity/trivy-action@0.33.1
with:
image-ref: "mikrotik-poller:scan"
format: "table"
exit-code: "1"
severity: "HIGH,CRITICAL"
trivyignores: ".trivyignore"
- name: Build Frontend image
run: docker build -f infrastructure/docker/Dockerfile.frontend -t mikrotik-frontend:scan .
- name: Scan Frontend image
uses: aquasecurity/trivy-action@0.33.1
with:
image-ref: "mikrotik-frontend:scan"
format: "table"
exit-code: "1"
severity: "HIGH,CRITICAL"
trivyignores: ".trivyignore"

40
.gitignore vendored Normal file
View File

@@ -0,0 +1,40 @@
# Environment files with secrets
.env
.env.prod
.env.local
.env.*.local
# Docker data
docker-data/
# Python
__pycache__/
*.pyc
*.pyo
.pytest_cache/
.coverage
htmlcov/
# Node
node_modules/
# IDE
.idea/
.vscode/
*.swp
*.swo
# Build caches
.go-cache/
.npm-cache/
.tmp/
# Git worktrees
.worktrees/
# OS
.DS_Store
Thumbs.db
# Playwright MCP logs
.playwright-mcp/

53
LICENSE Normal file
View File

@@ -0,0 +1,53 @@
Business Source License 1.1
Parameters
Licensor: The Other Dude
Licensed Work: The Other Dude v9.0.0
The Licensed Work is (c) 2026 The Other Dude
Additional Use Grant: You may use the Licensed Work for non-production,
personal, educational, and evaluation purposes.
Change Date: March 8, 2030
Change License: Apache License, Version 2.0
Terms
The Licensor hereby grants you the right to copy, modify, create derivative
works, redistribute, and make non-production use of the Licensed Work. The
Licensor may make an Additional Use Grant, above, permitting limited
production use.
Effective on the Change Date, or the fourth anniversary of the first publicly
available distribution of a specific version of the Licensed Work under this
License, whichever comes first, the Licensor hereby grants you rights under
the terms of the Change License, and the rights granted in the paragraph
above terminate.
If your use of the Licensed Work does not comply with the requirements
currently in effect as described in this License, you must purchase a
commercial license from the Licensor, its affiliated entities, or authorized
resellers, or you must refrain from using the Licensed Work.
All copies of the original and modified Licensed Work, and derivative works
of the Licensed Work, are subject to this License. This License applies
separately for each version of the Licensed Work and the Change Date may vary
for each version of the Licensed Work released by Licensor.
You must conspicuously display this License on each original or modified copy
of the Licensed Work. If you receive the Licensed Work in original or
modified form from a third party, the terms and conditions set forth in this
License apply to your use of that work.
Any use of the Licensed Work in violation of this License will automatically
terminate your rights under this License for the current and all other
versions of the Licensed Work.
This License does not grant you any right in any trademark or logo of
Licensor or its affiliates (provided that you may use a trademark or logo of
Licensor as expressly required by this License).
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
TITLE.

132
README.md Normal file
View File

@@ -0,0 +1,132 @@
# The Other Dude
**Self-hosted MikroTik fleet management for MSPs.**
TOD is a multi-tenant platform for managing RouterOS devices at scale. It replaces
the chaos of juggling WinBox sessions and SSH terminals across hundreds of routers
with a single, centralized web interface -- fleet visibility, configuration management,
real-time monitoring, and zero-knowledge security, all self-hosted on your infrastructure.
---
## Key Features
- **Fleet Management** -- Dashboard with device health, uptime sparklines, virtual-scrolled fleet table, geographic map, and subnet discovery.
- **Configuration Push with Panic-Revert** -- Two-phase config deployment ensures you never brick a remote device. Batch config, templates, and git-backed version history with one-click restore.
- **Real-Time Monitoring** -- Live CPU, memory, disk, and interface traffic via Server-Sent Events backed by NATS JetStream. Configurable alert rules with email, webhook, and Slack notifications.
- **Zero-Knowledge Security** -- 1Password-style architecture. SRP-6a authentication (server never sees your password), per-tenant envelope encryption via Transit KMS, Emergency Kit export.
- **Multi-Tenant with PostgreSQL RLS** -- Full organization isolation enforced at the database layer. Four roles: super_admin, admin, operator, viewer.
- **Internal Certificate Authority** -- Issue and deploy TLS certificates to RouterOS devices via SFTP. Three-tier TLS fallback for maximum compatibility.
- **WireGuard VPN Onboarding** -- Create device + VPN peer in one transaction. Generates ready-to-paste RouterOS commands for devices behind NAT.
- **PDF Reports** -- Fleet summary, device detail, security audit, and performance reports generated server-side.
- **Command Palette UX** -- Cmd+K quick navigation, keyboard shortcuts, dark/light mode, smooth page transitions, and skeleton loaders throughout.
---
## Architecture
```
+----------------+
| Frontend |
| React / Vite |
+-------+--------+
|
/api/ proxy
|
+-------v--------+
| Backend |
| FastAPI |
+--+----+-----+--+
| | |
+-------------+ | +--------------+
| | |
+------v-------+ +------v------+ +----------v----------+
| PostgreSQL | | Redis | | NATS |
| TimescaleDB | | (locks, | | JetStream |
| (RLS) | | caching) | | (pub/sub) |
+------^-------+ +------^------+ +----------^----------+
| | |
+------+------------------+--------------------+------+
| Go Poller |
| RouterOS binary API (port 8729 TLS) |
+---------------------------+-------------------------+
|
+----------v-----------+
| RouterOS Fleet |
| (your devices) |
+----------------------+
```
The **Go poller** communicates with RouterOS devices using the binary API over TLS,
publishing metrics to NATS and persisting to PostgreSQL with TimescaleDB hypertables.
The **FastAPI backend** enforces tenant isolation via Row-Level Security and streams
real-time events to the **React frontend** over SSE. **OpenBao** provides Transit
secret engine for per-tenant envelope encryption.
---
## Tech Stack
| Layer | Technology |
|-------|------------|
| Frontend | React 19, TanStack Router + Query, Tailwind CSS, Vite |
| Backend | Python 3.12, FastAPI, SQLAlchemy 2.0 async, asyncpg |
| Poller | Go 1.24, go-routeros/v3, pgx/v5, nats.go |
| Database | PostgreSQL 17 + TimescaleDB, Row-Level Security |
| Cache / Locks | Redis 7 |
| Message Bus | NATS with JetStream |
| KMS | OpenBao (Transit secret engine) |
| VPN | WireGuard |
| Auth | SRP-6a (zero-knowledge), JWT |
| Reports | Jinja2 + WeasyPrint |
---
## Quick Start
```bash
# Clone and configure
git clone https://github.com/your-org/tod.git && cd tod
cp .env.example .env
# Edit .env -- set CREDENTIAL_ENCRYPTION_KEY and JWT_SECRET_KEY at minimum
# Build images sequentially (avoids OOM on low-RAM machines)
docker compose --profile full build api
docker compose --profile full build poller
docker compose --profile full build frontend
# Start the full stack
docker compose --profile full up -d
# Open the UI
open http://localhost:3000
```
On first launch, the setup wizard walks you through creating a super admin account,
enrolling your Secret Key, adding your first organization, and onboarding your first device.
---
## Documentation
Full documentation is available at [theotherdude.net](https://theotherdude.net).
See the documentation site for screenshots and feature walkthroughs.
---
## License
[Business Source License 1.1](LICENSE)
Free for personal and educational use. Commercial use (managing devices for paying
customers or as part of a paid service) requires a commercial license. See the
LICENSE file for full terms.
---
## The Name
"The Other Dude" -- because every MSP needs one. When the network is down at 2 AM
and someone has to fix it, TOD is the other dude on the job. The Big Lebowski inspired,
the rug really ties the room together.

26
backend/.gitignore vendored Normal file
View File

@@ -0,0 +1,26 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
.venv/
venv/
env/
.env
*.egg-info/
dist/
build/
# IDE
.vscode/
.idea/
*.swp
# Testing
.pytest_cache/
.coverage
htmlcov/
# Logs
*.log

114
backend/alembic.ini Normal file
View File

@@ -0,0 +1,114 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts
# Use forward slashes (/) also on windows to provide os agnostic paths
script_location = alembic
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# timezone =
# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; This defaults
# to alembic/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the character specified by
# "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses
# os.pathsep. Note that this may cause alembic to miss version files if the separator
# character is actually part of the version file path.
# version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
# version_path_separator = ; # Windows
# version_path_separator = : # Unix
version_path_separator = space # No separator in paths.
# set to 'true' to search source files recursively
# in each "version_locations" directory
# New in Alembic version 1.10
# recursive_version_locations = false
# the output encoding used when revision files
# are written from script.mako
# output_encoding = utf-8
sqlalchemy.url = postgresql+asyncpg://postgres:postgres@localhost:5432/mikrotik
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner,
# if available.
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

78
backend/alembic/env.py Normal file
View File

@@ -0,0 +1,78 @@
"""Alembic environment configuration for async SQLAlchemy with PostgreSQL."""
import asyncio
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
# Import all models to register them with Base.metadata
from app.database import Base
import app.models.tenant # noqa: F401
import app.models.user # noqa: F401
import app.models.device # noqa: F401
import app.models.config_backup # noqa: F401
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Override sqlalchemy.url from DATABASE_URL env var if set (for Docker)
if os.environ.get("DATABASE_URL"):
config.set_main_option("sqlalchemy.url", os.environ["DATABASE_URL"])
# Interpret the config file for Python logging.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Add your model's MetaData object here for 'autogenerate' support
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection: Connection) -> None:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
"""Run migrations in 'online' mode with async engine."""
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,376 @@
"""Initial schema with RLS policies for multi-tenant isolation.
Revision ID: 001
Revises: None
Create Date: 2026-02-24
This migration creates:
1. All database tables (tenants, users, devices, device_groups, device_tags,
device_group_memberships, device_tag_assignments)
2. Composite unique indexes for tenant-scoped uniqueness
3. Row Level Security (RLS) on all tenant-scoped tables
4. RLS policies using app.current_tenant PostgreSQL setting
5. The app_user role with appropriate grants (cannot bypass RLS)
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# =========================================================================
# TENANTS TABLE
# =========================================================================
op.create_table(
"tenants",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
nullable=False,
),
sa.Column("name", sa.String(255), nullable=False),
sa.Column("description", sa.Text, nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("name"),
)
op.create_index("ix_tenants_name", "tenants", ["name"], unique=True)
# =========================================================================
# USERS TABLE
# =========================================================================
op.create_table(
"users",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
nullable=False,
),
sa.Column("email", sa.String(255), nullable=False),
sa.Column("hashed_password", sa.String(255), nullable=False),
sa.Column("name", sa.String(255), nullable=False),
sa.Column("role", sa.String(50), nullable=False, server_default="viewer"),
sa.Column("tenant_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("is_active", sa.Boolean, nullable=False, server_default="true"),
sa.Column("last_login", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("email"),
sa.ForeignKeyConstraint(["tenant_id"], ["tenants.id"], ondelete="CASCADE"),
)
op.create_index("ix_users_email", "users", ["email"], unique=True)
op.create_index("ix_users_tenant_id", "users", ["tenant_id"])
# =========================================================================
# DEVICES TABLE
# =========================================================================
op.create_table(
"devices",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
nullable=False,
),
sa.Column("tenant_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("hostname", sa.String(255), nullable=False),
sa.Column("ip_address", sa.String(45), nullable=False),
sa.Column("api_port", sa.Integer, nullable=False, server_default="8728"),
sa.Column("api_ssl_port", sa.Integer, nullable=False, server_default="8729"),
sa.Column("model", sa.String(255), nullable=True),
sa.Column("serial_number", sa.String(255), nullable=True),
sa.Column("firmware_version", sa.String(100), nullable=True),
sa.Column("routeros_version", sa.String(100), nullable=True),
sa.Column("uptime_seconds", sa.Integer, nullable=True),
sa.Column("last_seen", sa.DateTime(timezone=True), nullable=True),
sa.Column("encrypted_credentials", sa.LargeBinary, nullable=True),
sa.Column("status", sa.String(20), nullable=False, server_default="unknown"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(["tenant_id"], ["tenants.id"], ondelete="CASCADE"),
sa.UniqueConstraint("tenant_id", "hostname", name="uq_devices_tenant_hostname"),
)
op.create_index("ix_devices_tenant_id", "devices", ["tenant_id"])
# =========================================================================
# DEVICE GROUPS TABLE
# =========================================================================
op.create_table(
"device_groups",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
nullable=False,
),
sa.Column("tenant_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("name", sa.String(255), nullable=False),
sa.Column("description", sa.Text, nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(["tenant_id"], ["tenants.id"], ondelete="CASCADE"),
sa.UniqueConstraint("tenant_id", "name", name="uq_device_groups_tenant_name"),
)
op.create_index("ix_device_groups_tenant_id", "device_groups", ["tenant_id"])
# =========================================================================
# DEVICE TAGS TABLE
# =========================================================================
op.create_table(
"device_tags",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
nullable=False,
),
sa.Column("tenant_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("name", sa.String(100), nullable=False),
sa.Column("color", sa.String(7), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(["tenant_id"], ["tenants.id"], ondelete="CASCADE"),
sa.UniqueConstraint("tenant_id", "name", name="uq_device_tags_tenant_name"),
)
op.create_index("ix_device_tags_tenant_id", "device_tags", ["tenant_id"])
# =========================================================================
# DEVICE GROUP MEMBERSHIPS TABLE
# =========================================================================
op.create_table(
"device_group_memberships",
sa.Column("device_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("group_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.PrimaryKeyConstraint("device_id", "group_id"),
sa.ForeignKeyConstraint(["device_id"], ["devices.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["group_id"], ["device_groups.id"], ondelete="CASCADE"),
)
# =========================================================================
# DEVICE TAG ASSIGNMENTS TABLE
# =========================================================================
op.create_table(
"device_tag_assignments",
sa.Column("device_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("tag_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.PrimaryKeyConstraint("device_id", "tag_id"),
sa.ForeignKeyConstraint(["device_id"], ["devices.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["tag_id"], ["device_tags.id"], ondelete="CASCADE"),
)
# =========================================================================
# ROW LEVEL SECURITY (RLS)
# =========================================================================
# RLS is the core tenant isolation mechanism. The app_user role CANNOT
# bypass RLS (only superusers can). All queries through app_user will
# be filtered by the current_setting('app.current_tenant') value which
# is set per-request by the tenant_context middleware.
conn = op.get_bind()
# --- TENANTS RLS ---
# Super admin sees all; tenant users see only their tenant
conn.execute(sa.text("ALTER TABLE tenants ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("ALTER TABLE tenants FORCE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON tenants
USING (
id::text = current_setting('app.current_tenant', true)
OR current_setting('app.current_tenant', true) = 'super_admin'
)
WITH CHECK (
id::text = current_setting('app.current_tenant', true)
OR current_setting('app.current_tenant', true) = 'super_admin'
)
"""))
# --- USERS RLS ---
# Users see only other users in their tenant; super_admin sees all
conn.execute(sa.text("ALTER TABLE users ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("ALTER TABLE users FORCE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON users
USING (
tenant_id::text = current_setting('app.current_tenant', true)
OR current_setting('app.current_tenant', true) = 'super_admin'
)
WITH CHECK (
tenant_id::text = current_setting('app.current_tenant', true)
OR current_setting('app.current_tenant', true) = 'super_admin'
)
"""))
# --- DEVICES RLS ---
conn.execute(sa.text("ALTER TABLE devices ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("ALTER TABLE devices FORCE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON devices
USING (tenant_id::text = current_setting('app.current_tenant', true))
WITH CHECK (tenant_id::text = current_setting('app.current_tenant', true))
"""))
# --- DEVICE GROUPS RLS ---
conn.execute(sa.text("ALTER TABLE device_groups ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("ALTER TABLE device_groups FORCE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON device_groups
USING (tenant_id::text = current_setting('app.current_tenant', true))
WITH CHECK (tenant_id::text = current_setting('app.current_tenant', true))
"""))
# --- DEVICE TAGS RLS ---
conn.execute(sa.text("ALTER TABLE device_tags ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("ALTER TABLE device_tags FORCE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON device_tags
USING (tenant_id::text = current_setting('app.current_tenant', true))
WITH CHECK (tenant_id::text = current_setting('app.current_tenant', true))
"""))
# --- DEVICE GROUP MEMBERSHIPS RLS ---
# These are filtered by joining through devices/groups (which already have RLS)
# But we also add direct RLS via a join to the devices table
conn.execute(sa.text("ALTER TABLE device_group_memberships ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("ALTER TABLE device_group_memberships FORCE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON device_group_memberships
USING (
EXISTS (
SELECT 1 FROM devices d
WHERE d.id = device_id
AND d.tenant_id::text = current_setting('app.current_tenant', true)
)
)
WITH CHECK (
EXISTS (
SELECT 1 FROM devices d
WHERE d.id = device_id
AND d.tenant_id::text = current_setting('app.current_tenant', true)
)
)
"""))
# --- DEVICE TAG ASSIGNMENTS RLS ---
conn.execute(sa.text("ALTER TABLE device_tag_assignments ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("ALTER TABLE device_tag_assignments FORCE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON device_tag_assignments
USING (
EXISTS (
SELECT 1 FROM devices d
WHERE d.id = device_id
AND d.tenant_id::text = current_setting('app.current_tenant', true)
)
)
WITH CHECK (
EXISTS (
SELECT 1 FROM devices d
WHERE d.id = device_id
AND d.tenant_id::text = current_setting('app.current_tenant', true)
)
)
"""))
# =========================================================================
# GRANT PERMISSIONS TO app_user (RLS-enforcing application role)
# =========================================================================
# app_user is a non-superuser role — it CANNOT bypass RLS policies.
# All API queries use this role to ensure tenant isolation.
tables = [
"tenants",
"users",
"devices",
"device_groups",
"device_tags",
"device_group_memberships",
"device_tag_assignments",
]
for table in tables:
conn.execute(sa.text(
f"GRANT SELECT, INSERT, UPDATE, DELETE ON {table} TO app_user"
))
# Grant sequence usage for UUID generation (gen_random_uuid is built-in, but just in case)
conn.execute(sa.text("GRANT USAGE ON SCHEMA public TO app_user"))
# Allow app_user to set the tenant context variable
conn.execute(sa.text("GRANT SET ON PARAMETER app.current_tenant TO app_user"))
def downgrade() -> None:
conn = op.get_bind()
# Revoke grants
tables = [
"tenants",
"users",
"devices",
"device_groups",
"device_tags",
"device_group_memberships",
"device_tag_assignments",
]
for table in tables:
try:
conn.execute(sa.text(f"REVOKE ALL ON {table} FROM app_user"))
except Exception:
pass
# Drop tables (in reverse dependency order)
op.drop_table("device_tag_assignments")
op.drop_table("device_group_memberships")
op.drop_table("device_tags")
op.drop_table("device_groups")
op.drop_table("devices")
op.drop_table("users")
op.drop_table("tenants")

View File

@@ -0,0 +1,92 @@
"""Add routeros_major_version column and poller_user PostgreSQL role.
Revision ID: 002
Revises: 001
Create Date: 2026-02-24
This migration:
1. Adds routeros_major_version INTEGER column to devices table (nullable).
Stores the detected major version (6 or 7) as populated by the Go poller.
2. Creates the poller_user PostgreSQL role with SELECT-only access to the
devices table. The poller_user bypasses RLS intentionally — it must read
all devices across all tenants to poll them.
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "002"
down_revision: Union[str, None] = "001"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# =========================================================================
# ADD routeros_major_version COLUMN
# =========================================================================
# Stores the detected RouterOS major version (6 or 7) as an INTEGER.
# Populated by the Go poller after a successful connection and
# /system/resource/print query. NULL until the poller has connected at
# least once.
op.add_column(
"devices",
sa.Column("routeros_major_version", sa.Integer(), nullable=True),
)
# =========================================================================
# CREATE poller_user ROLE AND GRANT PERMISSIONS
# =========================================================================
# The poller_user role is used exclusively by the Go poller service.
# It has SELECT-only access to the devices table and does NOT enforce
# RLS policies (RLS is applied to app_user only). This allows the poller
# to read all devices across all tenants, which is required for polling.
conn = op.get_bind()
conn.execute(sa.text("""
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'poller_user') THEN
CREATE ROLE poller_user WITH LOGIN PASSWORD 'poller_password' BYPASSRLS;
END IF;
END
$$
"""))
conn.execute(sa.text("GRANT CONNECT ON DATABASE mikrotik TO poller_user"))
conn.execute(sa.text("GRANT USAGE ON SCHEMA public TO poller_user"))
# SELECT on devices only — poller needs to read encrypted_credentials
# and other device fields. No INSERT/UPDATE/DELETE needed.
conn.execute(sa.text("GRANT SELECT ON devices TO poller_user"))
def downgrade() -> None:
conn = op.get_bind()
# Revoke grants from poller_user
try:
conn.execute(sa.text("REVOKE SELECT ON devices FROM poller_user"))
except Exception:
pass
try:
conn.execute(sa.text("REVOKE USAGE ON SCHEMA public FROM poller_user"))
except Exception:
pass
try:
conn.execute(sa.text("REVOKE CONNECT ON DATABASE mikrotik FROM poller_user"))
except Exception:
pass
try:
conn.execute(sa.text("DROP ROLE IF EXISTS poller_user"))
except Exception:
pass
# Drop the column
op.drop_column("devices", "routeros_major_version")

View File

@@ -0,0 +1,174 @@
"""Add TimescaleDB hypertables for metrics and denormalized columns on devices.
Revision ID: 003
Revises: 002
Create Date: 2026-02-25
This migration:
1. Creates interface_metrics hypertable for per-interface traffic counters.
2. Creates health_metrics hypertable for per-device CPU/memory/disk/temperature.
3. Creates wireless_metrics hypertable for per-interface wireless client stats.
4. Adds last_cpu_load and last_memory_used_pct denormalized columns to devices
for efficient fleet table display without joining hypertables.
5. Applies RLS tenant_isolation policies and appropriate GRANTs on all hypertables.
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "003"
down_revision: Union[str, None] = "002"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# =========================================================================
# CREATE interface_metrics HYPERTABLE
# =========================================================================
# Stores per-interface byte counters from /interface/print on every poll cycle.
# rx_bps/tx_bps are stored as NULL — computed at query time via LAG() window
# function to avoid delta state in the poller.
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS interface_metrics (
time TIMESTAMPTZ NOT NULL,
device_id UUID NOT NULL,
tenant_id UUID NOT NULL,
interface TEXT NOT NULL,
rx_bytes BIGINT,
tx_bytes BIGINT,
rx_bps BIGINT,
tx_bps BIGINT
)
"""))
conn.execute(sa.text(
"SELECT create_hypertable('interface_metrics', 'time', if_not_exists => TRUE)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_interface_metrics_device_time "
"ON interface_metrics (device_id, time DESC)"
))
conn.execute(sa.text("ALTER TABLE interface_metrics ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON interface_metrics
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text("GRANT SELECT, INSERT ON interface_metrics TO app_user"))
conn.execute(sa.text("GRANT SELECT, INSERT ON interface_metrics TO poller_user"))
# =========================================================================
# CREATE health_metrics HYPERTABLE
# =========================================================================
# Stores per-device system health metrics from /system/resource/print and
# /system/health/print on every poll cycle.
# temperature is nullable — not all RouterOS devices have temperature sensors.
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS health_metrics (
time TIMESTAMPTZ NOT NULL,
device_id UUID NOT NULL,
tenant_id UUID NOT NULL,
cpu_load SMALLINT,
free_memory BIGINT,
total_memory BIGINT,
free_disk BIGINT,
total_disk BIGINT,
temperature SMALLINT
)
"""))
conn.execute(sa.text(
"SELECT create_hypertable('health_metrics', 'time', if_not_exists => TRUE)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_health_metrics_device_time "
"ON health_metrics (device_id, time DESC)"
))
conn.execute(sa.text("ALTER TABLE health_metrics ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON health_metrics
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text("GRANT SELECT, INSERT ON health_metrics TO app_user"))
conn.execute(sa.text("GRANT SELECT, INSERT ON health_metrics TO poller_user"))
# =========================================================================
# CREATE wireless_metrics HYPERTABLE
# =========================================================================
# Stores per-wireless-interface aggregated client stats from
# /interface/wireless/registration-table/print (v6) or
# /interface/wifi/registration-table/print (v7).
# ccq may be 0 on RouterOS v7 (not available in the WiFi API path).
# avg_signal is dBm (negative integer, e.g. -67).
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS wireless_metrics (
time TIMESTAMPTZ NOT NULL,
device_id UUID NOT NULL,
tenant_id UUID NOT NULL,
interface TEXT NOT NULL,
client_count SMALLINT,
avg_signal SMALLINT,
ccq SMALLINT,
frequency INTEGER
)
"""))
conn.execute(sa.text(
"SELECT create_hypertable('wireless_metrics', 'time', if_not_exists => TRUE)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_wireless_metrics_device_time "
"ON wireless_metrics (device_id, time DESC)"
))
conn.execute(sa.text("ALTER TABLE wireless_metrics ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON wireless_metrics
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text("GRANT SELECT, INSERT ON wireless_metrics TO app_user"))
conn.execute(sa.text("GRANT SELECT, INSERT ON wireless_metrics TO poller_user"))
# =========================================================================
# ADD DENORMALIZED COLUMNS TO devices TABLE
# =========================================================================
# These columns are updated by the metrics subscriber alongside each
# health_metrics insert, enabling the fleet table to display CPU and memory
# usage without a JOIN to the hypertable.
op.add_column(
"devices",
sa.Column("last_cpu_load", sa.SmallInteger(), nullable=True),
)
op.add_column(
"devices",
sa.Column("last_memory_used_pct", sa.SmallInteger(), nullable=True),
)
def downgrade() -> None:
# Remove denormalized columns from devices first
op.drop_column("devices", "last_memory_used_pct")
op.drop_column("devices", "last_cpu_load")
conn = op.get_bind()
# Drop hypertables (CASCADE handles indexes, policies, and chunks)
conn.execute(sa.text("DROP TABLE IF EXISTS wireless_metrics CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS health_metrics CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS interface_metrics CASCADE"))

View File

@@ -0,0 +1,128 @@
"""Add config management tables: config_backup_runs, config_backup_schedules, config_push_operations.
Revision ID: 004
Revises: 003
Create Date: 2026-02-25
This migration:
1. Creates config_backup_runs table for backup metadata (content lives in git).
2. Creates config_backup_schedules table for per-tenant/per-device schedule config.
3. Creates config_push_operations table for panic-revert recovery (API-restart safety).
4. Applies RLS tenant_isolation policies and appropriate GRANTs on all tables.
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "004"
down_revision: Union[str, None] = "003"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# =========================================================================
# CREATE config_backup_runs TABLE
# =========================================================================
# Stores metadata for each backup run. The actual config content lives in
# the tenant's bare git repository (GIT_STORE_PATH). This table provides
# the timeline view and change tracking without duplicating file content.
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS config_backup_runs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
device_id UUID NOT NULL REFERENCES devices(id) ON DELETE CASCADE,
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
commit_sha TEXT NOT NULL,
trigger_type TEXT NOT NULL,
lines_added INT,
lines_removed INT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
"""))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_config_backup_runs_device_created "
"ON config_backup_runs (device_id, created_at DESC)"
))
conn.execute(sa.text("ALTER TABLE config_backup_runs ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON config_backup_runs
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text("GRANT SELECT, INSERT ON config_backup_runs TO app_user"))
conn.execute(sa.text("GRANT SELECT ON config_backup_runs TO poller_user"))
# =========================================================================
# CREATE config_backup_schedules TABLE
# =========================================================================
# Stores per-tenant default and per-device override schedules.
# device_id = NULL means tenant default (applies to all devices in tenant).
# A per-device row with a specific device_id overrides the tenant default.
# UNIQUE(tenant_id, device_id) allows one entry per (tenant, device) pair
# where device_id NULL is the tenant-level default.
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS config_backup_schedules (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
device_id UUID REFERENCES devices(id) ON DELETE CASCADE,
cron_expression TEXT NOT NULL DEFAULT '0 2 * * *',
enabled BOOL NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, device_id)
)
"""))
conn.execute(sa.text("ALTER TABLE config_backup_schedules ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON config_backup_schedules
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text("GRANT SELECT, INSERT, UPDATE ON config_backup_schedules TO app_user"))
# =========================================================================
# CREATE config_push_operations TABLE
# =========================================================================
# Tracks pending two-phase config push operations for panic-revert recovery.
# If the API pod restarts during the 60-second verification window, the
# startup handler checks for 'pending_verification' rows and either verifies
# connectivity (clean up the RouterOS scheduler job) or marks as failed.
# See Pitfall 6 in 04-RESEARCH.md.
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS config_push_operations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
device_id UUID NOT NULL REFERENCES devices(id) ON DELETE CASCADE,
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
pre_push_commit_sha TEXT NOT NULL,
scheduler_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending_verification',
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ
)
"""))
conn.execute(sa.text("ALTER TABLE config_push_operations ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON config_push_operations
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text("GRANT SELECT, INSERT, UPDATE ON config_push_operations TO app_user"))
def downgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("DROP TABLE IF EXISTS config_push_operations CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS config_backup_schedules CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS config_backup_runs CASCADE"))

View File

@@ -0,0 +1,286 @@
"""Add alerting and firmware management tables.
Revision ID: 005
Revises: 004
Create Date: 2026-02-25
This migration:
1. ALTERs devices table: adds architecture and preferred_channel columns.
2. ALTERs device_groups table: adds preferred_channel column.
3. Creates alert_rules, notification_channels, alert_rule_channels, alert_events tables.
4. Creates firmware_versions, firmware_upgrade_jobs tables.
5. Applies RLS policies on tenant-scoped tables.
6. Seeds default alert rules for all existing tenants.
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "005"
down_revision: Union[str, None] = "004"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# =========================================================================
# ALTER devices TABLE — add architecture and preferred_channel columns
# =========================================================================
conn.execute(sa.text(
"ALTER TABLE devices ADD COLUMN IF NOT EXISTS architecture TEXT"
))
conn.execute(sa.text(
"ALTER TABLE devices ADD COLUMN IF NOT EXISTS preferred_channel TEXT DEFAULT 'stable' NOT NULL"
))
# =========================================================================
# ALTER device_groups TABLE — add preferred_channel column
# =========================================================================
conn.execute(sa.text(
"ALTER TABLE device_groups ADD COLUMN IF NOT EXISTS preferred_channel TEXT DEFAULT 'stable' NOT NULL"
))
# =========================================================================
# CREATE alert_rules TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS alert_rules (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
device_id UUID REFERENCES devices(id) ON DELETE CASCADE,
group_id UUID REFERENCES device_groups(id) ON DELETE SET NULL,
name TEXT NOT NULL,
metric TEXT NOT NULL,
operator TEXT NOT NULL,
threshold NUMERIC NOT NULL,
duration_polls INTEGER NOT NULL DEFAULT 1,
severity TEXT NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
is_default BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
"""))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_alert_rules_tenant_enabled "
"ON alert_rules (tenant_id, enabled)"
))
conn.execute(sa.text("ALTER TABLE alert_rules ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON alert_rules
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE, DELETE ON alert_rules TO app_user"
))
conn.execute(sa.text("GRANT ALL ON alert_rules TO poller_user"))
# =========================================================================
# CREATE notification_channels TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS notification_channels (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
name TEXT NOT NULL,
channel_type TEXT NOT NULL,
smtp_host TEXT,
smtp_port INTEGER,
smtp_user TEXT,
smtp_password BYTEA,
smtp_use_tls BOOLEAN DEFAULT FALSE,
from_address TEXT,
to_address TEXT,
webhook_url TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
"""))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_notification_channels_tenant "
"ON notification_channels (tenant_id)"
))
conn.execute(sa.text("ALTER TABLE notification_channels ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON notification_channels
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE, DELETE ON notification_channels TO app_user"
))
conn.execute(sa.text("GRANT ALL ON notification_channels TO poller_user"))
# =========================================================================
# CREATE alert_rule_channels TABLE (M2M association)
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS alert_rule_channels (
rule_id UUID NOT NULL REFERENCES alert_rules(id) ON DELETE CASCADE,
channel_id UUID NOT NULL REFERENCES notification_channels(id) ON DELETE CASCADE,
PRIMARY KEY (rule_id, channel_id)
)
"""))
conn.execute(sa.text("ALTER TABLE alert_rule_channels ENABLE ROW LEVEL SECURITY"))
# RLS for M2M: join through parent table's tenant_id via rule_id
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON alert_rule_channels
USING (rule_id IN (
SELECT id FROM alert_rules
WHERE tenant_id::text = current_setting('app.current_tenant')
))
"""))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE, DELETE ON alert_rule_channels TO app_user"
))
conn.execute(sa.text("GRANT ALL ON alert_rule_channels TO poller_user"))
# =========================================================================
# CREATE alert_events TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS alert_events (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
rule_id UUID REFERENCES alert_rules(id) ON DELETE SET NULL,
device_id UUID NOT NULL REFERENCES devices(id) ON DELETE CASCADE,
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
status TEXT NOT NULL,
severity TEXT NOT NULL,
metric TEXT,
value NUMERIC,
threshold NUMERIC,
message TEXT,
is_flapping BOOLEAN NOT NULL DEFAULT FALSE,
acknowledged_at TIMESTAMPTZ,
acknowledged_by UUID REFERENCES users(id) ON DELETE SET NULL,
silenced_until TIMESTAMPTZ,
fired_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
resolved_at TIMESTAMPTZ
)
"""))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_alert_events_device_rule_status "
"ON alert_events (device_id, rule_id, status)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_alert_events_tenant_fired "
"ON alert_events (tenant_id, fired_at)"
))
conn.execute(sa.text("ALTER TABLE alert_events ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON alert_events
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE, DELETE ON alert_events TO app_user"
))
conn.execute(sa.text("GRANT ALL ON alert_events TO poller_user"))
# =========================================================================
# CREATE firmware_versions TABLE (global — NOT tenant-scoped)
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS firmware_versions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
architecture TEXT NOT NULL,
channel TEXT NOT NULL,
version TEXT NOT NULL,
npk_url TEXT NOT NULL,
npk_local_path TEXT,
npk_size_bytes BIGINT,
checked_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(architecture, channel, version)
)
"""))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_firmware_versions_arch_channel "
"ON firmware_versions (architecture, channel)"
))
# No RLS on firmware_versions — global cache table
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE ON firmware_versions TO app_user"
))
conn.execute(sa.text("GRANT ALL ON firmware_versions TO poller_user"))
# =========================================================================
# CREATE firmware_upgrade_jobs TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS firmware_upgrade_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
device_id UUID NOT NULL REFERENCES devices(id) ON DELETE CASCADE,
rollout_group_id UUID,
target_version TEXT NOT NULL,
architecture TEXT NOT NULL,
channel TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
pre_upgrade_backup_sha TEXT,
scheduled_at TIMESTAMPTZ,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
error_message TEXT,
confirmed_major_upgrade BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
"""))
conn.execute(sa.text("ALTER TABLE firmware_upgrade_jobs ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
CREATE POLICY tenant_isolation ON firmware_upgrade_jobs
USING (tenant_id::text = current_setting('app.current_tenant'))
"""))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE, DELETE ON firmware_upgrade_jobs TO app_user"
))
conn.execute(sa.text("GRANT ALL ON firmware_upgrade_jobs TO poller_user"))
# =========================================================================
# SEED DEFAULT ALERT RULES for all existing tenants
# =========================================================================
# Note: New tenant creation (in the tenants API router) should also seed
# these three default rules. A _seed_default_alert_rules(tenant_id) helper
# should be created in the alerts router or a shared service for this.
conn.execute(sa.text("""
INSERT INTO alert_rules (id, tenant_id, name, metric, operator, threshold, duration_polls, severity, enabled, is_default)
SELECT gen_random_uuid(), t.id, 'High CPU Usage', 'cpu_load', 'gt', 90, 5, 'warning', TRUE, TRUE
FROM tenants t
"""))
conn.execute(sa.text("""
INSERT INTO alert_rules (id, tenant_id, name, metric, operator, threshold, duration_polls, severity, enabled, is_default)
SELECT gen_random_uuid(), t.id, 'High Memory Usage', 'memory_used_pct', 'gt', 90, 5, 'warning', TRUE, TRUE
FROM tenants t
"""))
conn.execute(sa.text("""
INSERT INTO alert_rules (id, tenant_id, name, metric, operator, threshold, duration_polls, severity, enabled, is_default)
SELECT gen_random_uuid(), t.id, 'High Disk Usage', 'disk_used_pct', 'gt', 85, 3, 'warning', TRUE, TRUE
FROM tenants t
"""))
def downgrade() -> None:
conn = op.get_bind()
# Drop tables in reverse dependency order
conn.execute(sa.text("DROP TABLE IF EXISTS firmware_upgrade_jobs CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS firmware_versions CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS alert_events CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS alert_rule_channels CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS notification_channels CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS alert_rules CASCADE"))
# Drop added columns
conn.execute(sa.text("ALTER TABLE devices DROP COLUMN IF EXISTS architecture"))
conn.execute(sa.text("ALTER TABLE devices DROP COLUMN IF EXISTS preferred_channel"))
conn.execute(sa.text("ALTER TABLE device_groups DROP COLUMN IF EXISTS preferred_channel"))

View File

@@ -0,0 +1,212 @@
"""Add config templates, template push jobs, and device location columns.
Revision ID: 006
Revises: 005
Create Date: 2026-02-25
This migration:
1. ALTERs devices table: adds latitude and longitude columns.
2. Creates config_templates table.
3. Creates config_template_tags table.
4. Creates template_push_jobs table.
5. Applies RLS policies on all three new tables.
6. Seeds starter templates for all existing tenants.
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "006"
down_revision: Union[str, None] = "005"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# =========================================================================
# ALTER devices TABLE — add latitude and longitude columns
# =========================================================================
conn.execute(sa.text(
"ALTER TABLE devices ADD COLUMN IF NOT EXISTS latitude DOUBLE PRECISION"
))
conn.execute(sa.text(
"ALTER TABLE devices ADD COLUMN IF NOT EXISTS longitude DOUBLE PRECISION"
))
# =========================================================================
# CREATE config_templates TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS config_templates (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
name TEXT NOT NULL,
description TEXT,
content TEXT NOT NULL,
variables JSONB NOT NULL DEFAULT '[]'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE(tenant_id, name)
)
"""))
# =========================================================================
# CREATE config_template_tags TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS config_template_tags (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
name VARCHAR(100) NOT NULL,
template_id UUID NOT NULL REFERENCES config_templates(id) ON DELETE CASCADE,
UNIQUE(template_id, name)
)
"""))
# =========================================================================
# CREATE template_push_jobs TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS template_push_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
template_id UUID REFERENCES config_templates(id) ON DELETE SET NULL,
device_id UUID NOT NULL REFERENCES devices(id) ON DELETE CASCADE,
rollout_id UUID,
rendered_content TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
pre_push_backup_sha TEXT,
error_message TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)
"""))
# =========================================================================
# RLS POLICIES
# =========================================================================
for table in ("config_templates", "config_template_tags", "template_push_jobs"):
conn.execute(sa.text(f"ALTER TABLE {table} ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text(f"""
CREATE POLICY {table}_tenant_isolation ON {table}
USING (tenant_id = current_setting('app.current_tenant')::uuid)
"""))
conn.execute(sa.text(
f"GRANT SELECT, INSERT, UPDATE, DELETE ON {table} TO app_user"
))
conn.execute(sa.text(f"GRANT ALL ON {table} TO poller_user"))
# =========================================================================
# INDEXES
# =========================================================================
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_config_templates_tenant "
"ON config_templates (tenant_id)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_config_template_tags_template "
"ON config_template_tags (template_id)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_template_push_jobs_tenant_rollout "
"ON template_push_jobs (tenant_id, rollout_id)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_template_push_jobs_device_status "
"ON template_push_jobs (device_id, status)"
))
# =========================================================================
# SEED STARTER TEMPLATES for all existing tenants
# =========================================================================
# 1. Basic Firewall
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'Basic Firewall',
'Standard firewall ruleset with WAN protection and LAN forwarding',
'/ip firewall filter
add chain=input connection-state=established,related action=accept
add chain=input connection-state=invalid action=drop
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=8291 action=drop comment="Block Winbox from WAN"
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=22 action=drop comment="Block SSH from WAN"
add chain=forward connection-state=established,related action=accept
add chain=forward connection-state=invalid action=drop
add chain=forward src-address={{ allowed_network }} action=accept
add chain=forward action=drop',
'[{"name":"wan_interface","type":"string","default":"ether1","description":"WAN-facing interface"},{"name":"allowed_network","type":"subnet","default":"192.168.1.0/24","description":"Allowed source network"}]'::jsonb
FROM tenants t
ON CONFLICT DO NOTHING
"""))
# 2. DHCP Server Setup
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'DHCP Server Setup',
'Configure DHCP server with address pool, DNS, and gateway',
'/ip pool add name=dhcp-pool ranges={{ pool_start }}-{{ pool_end }}
/ip dhcp-server network add address={{ gateway }}/24 gateway={{ gateway }} dns-server={{ dns_server }}
/ip dhcp-server add name=dhcp1 interface={{ interface }} address-pool=dhcp-pool disabled=no',
'[{"name":"pool_start","type":"ip","default":"192.168.1.100","description":"DHCP pool start address"},{"name":"pool_end","type":"ip","default":"192.168.1.254","description":"DHCP pool end address"},{"name":"gateway","type":"ip","default":"192.168.1.1","description":"Default gateway"},{"name":"dns_server","type":"ip","default":"8.8.8.8","description":"DNS server address"},{"name":"interface","type":"string","default":"bridge1","description":"Interface to serve DHCP on"}]'::jsonb
FROM tenants t
ON CONFLICT DO NOTHING
"""))
# 3. Wireless AP Config
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'Wireless AP Config',
'Configure wireless access point with WPA2 security',
'/interface wireless security-profiles add name=portal-wpa2 mode=dynamic-keys authentication-types=wpa2-psk wpa2-pre-shared-key={{ password }}
/interface wireless set wlan1 mode=ap-bridge ssid={{ ssid }} security-profile=portal-wpa2 frequency={{ frequency }} channel-width={{ channel_width }} disabled=no',
'[{"name":"ssid","type":"string","default":"MikroTik-AP","description":"Wireless network name"},{"name":"password","type":"string","default":"","description":"WPA2 pre-shared key (min 8 characters)"},{"name":"frequency","type":"integer","default":"2412","description":"Wireless frequency in MHz"},{"name":"channel_width","type":"string","default":"20/40mhz-XX","description":"Channel width setting"}]'::jsonb
FROM tenants t
ON CONFLICT DO NOTHING
"""))
# 4. Initial Device Setup
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'Initial Device Setup',
'Set device identity, NTP, DNS, and disable unused services',
'/system identity set name={{ device.hostname }}
/system ntp client set enabled=yes servers={{ ntp_server }}
/ip dns set servers={{ dns_servers }} allow-remote-requests=no
/ip service disable telnet,ftp,www,api-ssl
/ip service set ssh port=22
/ip service set winbox port=8291',
'[{"name":"ntp_server","type":"ip","default":"pool.ntp.org","description":"NTP server address"},{"name":"dns_servers","type":"string","default":"8.8.8.8,8.8.4.4","description":"Comma-separated DNS servers"}]'::jsonb
FROM tenants t
ON CONFLICT DO NOTHING
"""))
def downgrade() -> None:
conn = op.get_bind()
# Drop tables in reverse dependency order
conn.execute(sa.text("DROP TABLE IF EXISTS template_push_jobs CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS config_template_tags CASCADE"))
conn.execute(sa.text("DROP TABLE IF EXISTS config_templates CASCADE"))
# Drop location columns from devices
conn.execute(sa.text("ALTER TABLE devices DROP COLUMN IF EXISTS latitude"))
conn.execute(sa.text("ALTER TABLE devices DROP COLUMN IF EXISTS longitude"))

View File

@@ -0,0 +1,82 @@
"""Create audit_logs table with RLS policy.
Revision ID: 007
Revises: 006
Create Date: 2026-03-02
This migration:
1. Creates audit_logs table for centralized audit trail.
2. Applies RLS policy for tenant isolation.
3. Creates indexes for fast paginated and filtered queries.
4. Grants SELECT, INSERT to app_user (read and write audit entries).
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "007"
down_revision: Union[str, None] = "006"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# =========================================================================
# CREATE audit_logs TABLE
# =========================================================================
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS audit_logs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
user_id UUID REFERENCES users(id) ON DELETE SET NULL,
action VARCHAR(100) NOT NULL,
resource_type VARCHAR(50),
resource_id VARCHAR(255),
device_id UUID REFERENCES devices(id) ON DELETE SET NULL,
details JSONB NOT NULL DEFAULT '{}'::jsonb,
ip_address VARCHAR(45),
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)
"""))
# =========================================================================
# RLS POLICY
# =========================================================================
conn.execute(sa.text(
"ALTER TABLE audit_logs ENABLE ROW LEVEL SECURITY"
))
conn.execute(sa.text("""
CREATE POLICY audit_logs_tenant_isolation ON audit_logs
USING (tenant_id = current_setting('app.current_tenant')::uuid)
"""))
# Grant SELECT + INSERT to app_user (no UPDATE/DELETE -- audit logs are immutable)
conn.execute(sa.text(
"GRANT SELECT, INSERT ON audit_logs TO app_user"
))
# Poller user gets full access for cross-tenant audit logging
conn.execute(sa.text(
"GRANT ALL ON audit_logs TO poller_user"
))
# =========================================================================
# INDEXES
# =========================================================================
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_audit_logs_tenant_created "
"ON audit_logs (tenant_id, created_at DESC)"
))
conn.execute(sa.text(
"CREATE INDEX IF NOT EXISTS idx_audit_logs_tenant_action "
"ON audit_logs (tenant_id, action)"
))
def downgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("DROP TABLE IF EXISTS audit_logs CASCADE"))

View File

@@ -0,0 +1,86 @@
"""Add maintenance_windows table with RLS.
Revision ID: 008
Revises: 007
Create Date: 2026-03-02
This migration:
1. Creates maintenance_windows table for scheduling maintenance periods.
2. Adds CHECK constraint (end_at > start_at).
3. Creates composite index on (tenant_id, start_at, end_at) for active window queries.
4. Applies RLS policy matching the standard tenant_id isolation pattern.
5. Grants permissions to app_user role.
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "008"
down_revision: Union[str, None] = "007"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# ── 1. Create maintenance_windows table ────────────────────────────────
conn.execute(sa.text("""
CREATE TABLE IF NOT EXISTS maintenance_windows (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
name VARCHAR(200) NOT NULL,
device_ids JSONB NOT NULL DEFAULT '[]'::jsonb,
start_at TIMESTAMPTZ NOT NULL,
end_at TIMESTAMPTZ NOT NULL,
suppress_alerts BOOLEAN NOT NULL DEFAULT true,
notes TEXT,
created_by UUID REFERENCES users(id) ON DELETE SET NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT chk_maintenance_window_dates CHECK (end_at > start_at)
)
"""))
# ── 2. Composite index for active window queries ───────────────────────
conn.execute(sa.text("""
CREATE INDEX IF NOT EXISTS idx_maintenance_windows_tenant_time
ON maintenance_windows (tenant_id, start_at, end_at)
"""))
# ── 3. RLS policy ─────────────────────────────────────────────────────
conn.execute(sa.text("ALTER TABLE maintenance_windows ENABLE ROW LEVEL SECURITY"))
conn.execute(sa.text("""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_policies
WHERE tablename = 'maintenance_windows' AND policyname = 'maintenance_windows_tenant_isolation'
) THEN
CREATE POLICY maintenance_windows_tenant_isolation ON maintenance_windows
USING (tenant_id = NULLIF(current_setting('app.current_tenant', true), '')::uuid);
END IF;
END
$$
"""))
# ── 4. Grant permissions to app_user ───────────────────────────────────
conn.execute(sa.text("""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'app_user') THEN
GRANT SELECT, INSERT, UPDATE, DELETE ON maintenance_windows TO app_user;
END IF;
END
$$
"""))
def downgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("DROP TABLE IF EXISTS maintenance_windows CASCADE"))

View File

@@ -0,0 +1,93 @@
"""Add api_keys table with RLS for tenant-scoped API key management.
Revision ID: 009
Revises: 008
Create Date: 2026-03-02
This migration:
1. Creates api_keys table (UUID PK, tenant_id FK, user_id FK, key_hash, scopes JSONB).
2. Adds unique index on key_hash for O(1) validation lookups.
3. Adds composite index on (tenant_id, revoked_at) for listing active keys.
4. Applies RLS policy on tenant_id.
5. Grants SELECT, INSERT, UPDATE to app_user.
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "009"
down_revision: Union[str, None] = "008"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# 1. Create api_keys table
conn.execute(
sa.text("""
CREATE TABLE IF NOT EXISTS api_keys (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
name VARCHAR(200) NOT NULL,
key_prefix VARCHAR(12) NOT NULL,
key_hash VARCHAR(64) NOT NULL,
scopes JSONB NOT NULL DEFAULT '[]'::jsonb,
expires_at TIMESTAMPTZ,
last_used_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
revoked_at TIMESTAMPTZ
);
""")
)
# 2. Unique index on key_hash for fast validation lookups
conn.execute(
sa.text("""
CREATE UNIQUE INDEX IF NOT EXISTS ix_api_keys_key_hash
ON api_keys (key_hash);
""")
)
# 3. Composite index for listing active keys per tenant
conn.execute(
sa.text("""
CREATE INDEX IF NOT EXISTS ix_api_keys_tenant_revoked
ON api_keys (tenant_id, revoked_at);
""")
)
# 4. Enable RLS and create tenant isolation policy
conn.execute(sa.text("ALTER TABLE api_keys ENABLE ROW LEVEL SECURITY;"))
conn.execute(sa.text("ALTER TABLE api_keys FORCE ROW LEVEL SECURITY;"))
conn.execute(
sa.text("""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_policies
WHERE tablename = 'api_keys' AND policyname = 'tenant_isolation'
) THEN
CREATE POLICY tenant_isolation ON api_keys
USING (
tenant_id::text = current_setting('app.current_tenant', true)
OR current_setting('app.current_tenant', true) = 'super_admin'
);
END IF;
END $$;
""")
)
# 5. Grant permissions to app_user role
conn.execute(sa.text("GRANT SELECT, INSERT, UPDATE ON api_keys TO app_user;"))
def downgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("DROP TABLE IF EXISTS api_keys CASCADE;"))

View File

@@ -0,0 +1,90 @@
"""Add vpn_config and vpn_peers tables for WireGuard VPN management.
Revision ID: 010
Revises: 009
Create Date: 2026-03-02
This migration:
1. Creates vpn_config table (one row per tenant — server keys, subnet, port).
2. Creates vpn_peers table (one row per device VPN connection).
3. Applies RLS policies on tenant_id.
4. Grants SELECT, INSERT, UPDATE, DELETE to app_user.
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
# revision identifiers
revision: str = "010"
down_revision: Union[str, None] = "009"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ── vpn_config: one row per tenant ──
op.create_table(
"vpn_config",
sa.Column("id", UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), primary_key=True),
sa.Column("tenant_id", UUID(as_uuid=True), sa.ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, unique=True),
sa.Column("server_private_key", sa.LargeBinary(), nullable=False), # AES-256-GCM encrypted
sa.Column("server_public_key", sa.String(64), nullable=False),
sa.Column("subnet", sa.String(32), nullable=False, server_default="10.10.0.0/24"),
sa.Column("server_port", sa.Integer(), nullable=False, server_default="51820"),
sa.Column("server_address", sa.String(32), nullable=False, server_default="10.10.0.1/24"),
sa.Column("endpoint", sa.String(255), nullable=True), # public hostname:port for devices to connect to
sa.Column("is_enabled", sa.Boolean(), nullable=False, server_default="false"),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
)
# ── vpn_peers: one per device VPN connection ──
op.create_table(
"vpn_peers",
sa.Column("id", UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), primary_key=True),
sa.Column("tenant_id", UUID(as_uuid=True), sa.ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False),
sa.Column("device_id", UUID(as_uuid=True), sa.ForeignKey("devices.id", ondelete="CASCADE"), nullable=False, unique=True),
sa.Column("peer_private_key", sa.LargeBinary(), nullable=False), # AES-256-GCM encrypted
sa.Column("peer_public_key", sa.String(64), nullable=False),
sa.Column("preshared_key", sa.LargeBinary(), nullable=True), # AES-256-GCM encrypted, optional
sa.Column("assigned_ip", sa.String(32), nullable=False), # e.g. 10.10.0.2/24
sa.Column("additional_allowed_ips", sa.String(512), nullable=True), # comma-separated subnets for site-to-site
sa.Column("is_enabled", sa.Boolean(), nullable=False, server_default="true"),
sa.Column("last_handshake", sa.DateTime(timezone=True), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
)
# Indexes
op.create_index("ix_vpn_peers_tenant_id", "vpn_peers", ["tenant_id"])
# ── RLS policies ──
op.execute("ALTER TABLE vpn_config ENABLE ROW LEVEL SECURITY")
op.execute("""
CREATE POLICY vpn_config_tenant_isolation ON vpn_config
FOR ALL
TO app_user
USING (CAST(tenant_id AS text) = current_setting('app.current_tenant', true))
""")
op.execute("ALTER TABLE vpn_peers ENABLE ROW LEVEL SECURITY")
op.execute("""
CREATE POLICY vpn_peers_tenant_isolation ON vpn_peers
FOR ALL
TO app_user
USING (CAST(tenant_id AS text) = current_setting('app.current_tenant', true))
""")
# ── Grants ──
op.execute("GRANT SELECT, INSERT, UPDATE, DELETE ON vpn_config TO app_user")
op.execute("GRANT SELECT, INSERT, UPDATE, DELETE ON vpn_peers TO app_user")
def downgrade() -> None:
op.execute("DROP POLICY IF EXISTS vpn_peers_tenant_isolation ON vpn_peers")
op.execute("DROP POLICY IF EXISTS vpn_config_tenant_isolation ON vpn_config")
op.drop_table("vpn_peers")
op.drop_table("vpn_config")

View File

@@ -0,0 +1,169 @@
"""Seed starter config templates for tenants missing them.
Revision ID: 012
Revises: 010
Create Date: 2026-03-02
Re-seeds the 4 original starter templates from 006 plus a new comprehensive
'Basic Router' template for any tenants created after migration 006 ran.
Uses ON CONFLICT (tenant_id, name) DO NOTHING so existing templates are untouched.
"""
revision = "012"
down_revision = "010"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
def upgrade() -> None:
conn = op.get_bind()
# 1. Basic Router — comprehensive starter for a typical SOHO/branch router
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'Basic Router',
'Complete SOHO/branch router setup: WAN on ether1, LAN bridge, DHCP, DNS, NAT, basic firewall',
'/interface bridge add name=bridge-lan comment="LAN bridge"
/interface bridge port add bridge=bridge-lan interface=ether2
/interface bridge port add bridge=bridge-lan interface=ether3
/interface bridge port add bridge=bridge-lan interface=ether4
/interface bridge port add bridge=bridge-lan interface=ether5
# WAN — DHCP client on ether1
/ip dhcp-client add interface={{ wan_interface }} disabled=no comment="WAN uplink"
# LAN address
/ip address add address={{ lan_gateway }}/{{ lan_cidr }} interface=bridge-lan
# DNS
/ip dns set servers={{ dns_servers }} allow-remote-requests=yes
# DHCP server for LAN
/ip pool add name=lan-pool ranges={{ dhcp_start }}-{{ dhcp_end }}
/ip dhcp-server network add address={{ lan_network }}/{{ lan_cidr }} gateway={{ lan_gateway }} dns-server={{ lan_gateway }}
/ip dhcp-server add name=lan-dhcp interface=bridge-lan address-pool=lan-pool disabled=no
# NAT masquerade
/ip firewall nat add chain=srcnat out-interface={{ wan_interface }} action=masquerade
# Firewall — input chain
/ip firewall filter
add chain=input connection-state=established,related action=accept
add chain=input connection-state=invalid action=drop
add chain=input in-interface={{ wan_interface }} action=drop comment="Drop all other WAN input"
# Firewall — forward chain
add chain=forward connection-state=established,related action=accept
add chain=forward connection-state=invalid action=drop
add chain=forward in-interface=bridge-lan out-interface={{ wan_interface }} action=accept comment="Allow LAN to WAN"
add chain=forward action=drop comment="Drop everything else"
# NTP
/system ntp client set enabled=yes servers={{ ntp_server }}
# Identity
/system identity set name={{ device.hostname }}',
'[{"name":"wan_interface","type":"string","default":"ether1","description":"WAN-facing interface"},{"name":"lan_gateway","type":"ip","default":"192.168.88.1","description":"LAN gateway IP"},{"name":"lan_cidr","type":"integer","default":"24","description":"LAN subnet mask bits"},{"name":"lan_network","type":"ip","default":"192.168.88.0","description":"LAN network address"},{"name":"dhcp_start","type":"ip","default":"192.168.88.100","description":"DHCP pool start"},{"name":"dhcp_end","type":"ip","default":"192.168.88.254","description":"DHCP pool end"},{"name":"dns_servers","type":"string","default":"8.8.8.8,8.8.4.4","description":"Upstream DNS servers"},{"name":"ntp_server","type":"string","default":"pool.ntp.org","description":"NTP server"}]'::jsonb
FROM tenants t
WHERE NOT EXISTS (
SELECT 1 FROM config_templates ct
WHERE ct.tenant_id = t.id AND ct.name = 'Basic Router'
)
"""))
# 2. Re-seed Basic Firewall (for tenants missing it)
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'Basic Firewall',
'Standard firewall ruleset with WAN protection and LAN forwarding',
'/ip firewall filter
add chain=input connection-state=established,related action=accept
add chain=input connection-state=invalid action=drop
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=8291 action=drop comment="Block Winbox from WAN"
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=22 action=drop comment="Block SSH from WAN"
add chain=forward connection-state=established,related action=accept
add chain=forward connection-state=invalid action=drop
add chain=forward src-address={{ allowed_network }} action=accept
add chain=forward action=drop',
'[{"name":"wan_interface","type":"string","default":"ether1","description":"WAN-facing interface"},{"name":"allowed_network","type":"subnet","default":"192.168.88.0/24","description":"Allowed source network"}]'::jsonb
FROM tenants t
WHERE NOT EXISTS (
SELECT 1 FROM config_templates ct
WHERE ct.tenant_id = t.id AND ct.name = 'Basic Firewall'
)
"""))
# 3. Re-seed DHCP Server Setup
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'DHCP Server Setup',
'Configure DHCP server with address pool, DNS, and gateway',
'/ip pool add name=dhcp-pool ranges={{ pool_start }}-{{ pool_end }}
/ip dhcp-server network add address={{ gateway }}/24 gateway={{ gateway }} dns-server={{ dns_server }}
/ip dhcp-server add name=dhcp1 interface={{ interface }} address-pool=dhcp-pool disabled=no',
'[{"name":"pool_start","type":"ip","default":"192.168.88.100","description":"DHCP pool start address"},{"name":"pool_end","type":"ip","default":"192.168.88.254","description":"DHCP pool end address"},{"name":"gateway","type":"ip","default":"192.168.88.1","description":"Default gateway"},{"name":"dns_server","type":"ip","default":"8.8.8.8","description":"DNS server address"},{"name":"interface","type":"string","default":"bridge-lan","description":"Interface to serve DHCP on"}]'::jsonb
FROM tenants t
WHERE NOT EXISTS (
SELECT 1 FROM config_templates ct
WHERE ct.tenant_id = t.id AND ct.name = 'DHCP Server Setup'
)
"""))
# 4. Re-seed Wireless AP Config
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'Wireless AP Config',
'Configure wireless access point with WPA2 security',
'/interface wireless security-profiles add name=portal-wpa2 mode=dynamic-keys authentication-types=wpa2-psk wpa2-pre-shared-key={{ password }}
/interface wireless set wlan1 mode=ap-bridge ssid={{ ssid }} security-profile=portal-wpa2 frequency={{ frequency }} channel-width={{ channel_width }} disabled=no',
'[{"name":"ssid","type":"string","default":"MikroTik-AP","description":"Wireless network name"},{"name":"password","type":"string","default":"","description":"WPA2 pre-shared key (min 8 characters)"},{"name":"frequency","type":"integer","default":"2412","description":"Wireless frequency in MHz"},{"name":"channel_width","type":"string","default":"20/40mhz-XX","description":"Channel width setting"}]'::jsonb
FROM tenants t
WHERE NOT EXISTS (
SELECT 1 FROM config_templates ct
WHERE ct.tenant_id = t.id AND ct.name = 'Wireless AP Config'
)
"""))
# 5. Re-seed Initial Device Setup
conn.execute(sa.text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
SELECT
gen_random_uuid(),
t.id,
'Initial Device Setup',
'Set device identity, NTP, DNS, and disable unused services',
'/system identity set name={{ device.hostname }}
/system ntp client set enabled=yes servers={{ ntp_server }}
/ip dns set servers={{ dns_servers }} allow-remote-requests=no
/ip service disable telnet,ftp,www,api-ssl
/ip service set ssh port=22
/ip service set winbox port=8291',
'[{"name":"ntp_server","type":"ip","default":"pool.ntp.org","description":"NTP server address"},{"name":"dns_servers","type":"string","default":"8.8.8.8,8.8.4.4","description":"Comma-separated DNS servers"}]'::jsonb
FROM tenants t
WHERE NOT EXISTS (
SELECT 1 FROM config_templates ct
WHERE ct.tenant_id = t.id AND ct.name = 'Initial Device Setup'
)
"""))
def downgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text(
"DELETE FROM config_templates WHERE name = 'Basic Router'"
))

View File

@@ -0,0 +1,203 @@
"""Add certificate authority and device certificate tables.
Revision ID: 013
Revises: 012
Create Date: 2026-03-03
Creates the `certificate_authorities` (one per tenant) and `device_certificates`
(one per device) tables for the Internal Certificate Authority feature.
Also adds a `tls_mode` column to the `devices` table to track per-device
TLS verification mode (insecure vs portal_ca).
Both tables have RLS policies for tenant isolation, plus poller_user read
access (the poller needs CA cert PEM to verify device TLS connections).
"""
revision = "013"
down_revision = "012"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
def upgrade() -> None:
# --- certificate_authorities table ---
op.create_table(
"certificate_authorities",
sa.Column(
"id",
UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
primary_key=True,
),
sa.Column(
"tenant_id",
UUID(as_uuid=True),
sa.ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
unique=True,
),
sa.Column("common_name", sa.String(255), nullable=False),
sa.Column("cert_pem", sa.Text(), nullable=False),
sa.Column("encrypted_private_key", sa.LargeBinary(), nullable=False),
sa.Column("serial_number", sa.String(64), nullable=False),
sa.Column("fingerprint_sha256", sa.String(95), nullable=False),
sa.Column(
"not_valid_before",
sa.DateTime(timezone=True),
nullable=False,
),
sa.Column(
"not_valid_after",
sa.DateTime(timezone=True),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
),
)
# --- device_certificates table ---
op.create_table(
"device_certificates",
sa.Column(
"id",
UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
primary_key=True,
),
sa.Column(
"tenant_id",
UUID(as_uuid=True),
sa.ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"device_id",
UUID(as_uuid=True),
sa.ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"ca_id",
UUID(as_uuid=True),
sa.ForeignKey("certificate_authorities.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("common_name", sa.String(255), nullable=False),
sa.Column("serial_number", sa.String(64), nullable=False),
sa.Column("fingerprint_sha256", sa.String(95), nullable=False),
sa.Column("cert_pem", sa.Text(), nullable=False),
sa.Column("encrypted_private_key", sa.LargeBinary(), nullable=False),
sa.Column(
"not_valid_before",
sa.DateTime(timezone=True),
nullable=False,
),
sa.Column(
"not_valid_after",
sa.DateTime(timezone=True),
nullable=False,
),
sa.Column(
"status",
sa.String(20),
nullable=False,
server_default="issued",
),
sa.Column("deployed_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
),
)
# --- Add tls_mode column to devices table ---
op.add_column(
"devices",
sa.Column(
"tls_mode",
sa.String(20),
nullable=False,
server_default="insecure",
),
)
# --- RLS policies ---
conn = op.get_bind()
# certificate_authorities RLS
conn.execute(sa.text(
"ALTER TABLE certificate_authorities ENABLE ROW LEVEL SECURITY"
))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE, DELETE ON certificate_authorities TO app_user"
))
conn.execute(sa.text(
"CREATE POLICY tenant_isolation ON certificate_authorities FOR ALL "
"USING (tenant_id = NULLIF(current_setting('app.current_tenant', true), '')::uuid) "
"WITH CHECK (tenant_id = NULLIF(current_setting('app.current_tenant', true), '')::uuid)"
))
conn.execute(sa.text(
"GRANT SELECT ON certificate_authorities TO poller_user"
))
# device_certificates RLS
conn.execute(sa.text(
"ALTER TABLE device_certificates ENABLE ROW LEVEL SECURITY"
))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE, DELETE ON device_certificates TO app_user"
))
conn.execute(sa.text(
"CREATE POLICY tenant_isolation ON device_certificates FOR ALL "
"USING (tenant_id = NULLIF(current_setting('app.current_tenant', true), '')::uuid) "
"WITH CHECK (tenant_id = NULLIF(current_setting('app.current_tenant', true), '')::uuid)"
))
conn.execute(sa.text(
"GRANT SELECT ON device_certificates TO poller_user"
))
def downgrade() -> None:
conn = op.get_bind()
# Drop RLS policies
conn.execute(sa.text(
"DROP POLICY IF EXISTS tenant_isolation ON device_certificates"
))
conn.execute(sa.text(
"DROP POLICY IF EXISTS tenant_isolation ON certificate_authorities"
))
# Revoke grants
conn.execute(sa.text(
"REVOKE ALL ON device_certificates FROM app_user"
))
conn.execute(sa.text(
"REVOKE ALL ON device_certificates FROM poller_user"
))
conn.execute(sa.text(
"REVOKE ALL ON certificate_authorities FROM app_user"
))
conn.execute(sa.text(
"REVOKE ALL ON certificate_authorities FROM poller_user"
))
# Drop tls_mode column from devices
op.drop_column("devices", "tls_mode")
# Drop tables
op.drop_table("device_certificates")
op.drop_table("certificate_authorities")

View File

@@ -0,0 +1,50 @@
"""Add TimescaleDB retention policies.
Revision ID: 014
Revises: 013
Create Date: 2026-03-03
Adds retention (drop after 90 days) on all three hypertables:
interface_metrics, health_metrics, wireless_metrics.
Note: Compression is skipped because TimescaleDB 2.17.x does not support
compression on tables with row-level security (RLS) policies.
Compression can be re-added when upgrading to TimescaleDB >= 2.19.
Without retention policies the database grows ~5 GB/month unbounded.
"""
revision = "014"
down_revision = "013"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
HYPERTABLES = [
"interface_metrics",
"health_metrics",
"wireless_metrics",
]
def upgrade() -> None:
conn = op.get_bind()
for table in HYPERTABLES:
# Drop chunks older than 90 days
conn.execute(sa.text(
f"SELECT add_retention_policy('{table}', INTERVAL '90 days')"
))
def downgrade() -> None:
conn = op.get_bind()
for table in HYPERTABLES:
# Remove retention policy
conn.execute(sa.text(
f"SELECT remove_retention_policy('{table}', if_exists => true)"
))

View File

@@ -0,0 +1,62 @@
"""Add password_reset_tokens table.
Revision ID: 015
Revises: 014
Create Date: 2026-03-03
Stores one-time password reset tokens with expiry. Tokens are hashed
with SHA-256 so a database leak doesn't expose reset links.
"""
revision = "015"
down_revision = "014"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
def upgrade() -> None:
op.create_table(
"password_reset_tokens",
sa.Column(
"id",
UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
primary_key=True,
),
sa.Column(
"user_id",
UUID(as_uuid=True),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"token_hash",
sa.String(64),
nullable=False,
unique=True,
index=True,
),
sa.Column(
"expires_at",
sa.DateTime(timezone=True),
nullable=False,
),
sa.Column(
"used_at",
sa.DateTime(timezone=True),
nullable=True,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
),
)
def downgrade() -> None:
op.drop_table("password_reset_tokens")

View File

@@ -0,0 +1,207 @@
"""Add zero-knowledge authentication schema.
Revision ID: 016
Revises: 015
Create Date: 2026-03-03
Adds SRP columns to users, creates user_key_sets table for encrypted
key bundles, creates immutable key_access_log audit trail, and adds
vault key columns to tenants (Phase 29 preparation).
Both new tables have RLS policies. key_access_log is append-only
(INSERT+SELECT only, no UPDATE/DELETE).
"""
revision = "016"
down_revision = "015"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
def upgrade() -> None:
# --- Add SRP columns to users table ---
op.add_column(
"users",
sa.Column("srp_salt", sa.LargeBinary(), nullable=True),
)
op.add_column(
"users",
sa.Column("srp_verifier", sa.LargeBinary(), nullable=True),
)
op.add_column(
"users",
sa.Column(
"auth_version",
sa.SmallInteger(),
server_default=sa.text("1"),
nullable=False,
),
)
# --- Create user_key_sets table ---
op.create_table(
"user_key_sets",
sa.Column(
"id",
UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
primary_key=True,
),
sa.Column(
"user_id",
UUID(as_uuid=True),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
unique=True,
),
sa.Column(
"tenant_id",
UUID(as_uuid=True),
sa.ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=True, # NULL for super_admin
),
sa.Column("encrypted_private_key", sa.LargeBinary(), nullable=False),
sa.Column("private_key_nonce", sa.LargeBinary(), nullable=False),
sa.Column("encrypted_vault_key", sa.LargeBinary(), nullable=False),
sa.Column("vault_key_nonce", sa.LargeBinary(), nullable=False),
sa.Column("public_key", sa.LargeBinary(), nullable=False),
sa.Column(
"pbkdf2_iterations",
sa.Integer(),
server_default=sa.text("650000"),
nullable=False,
),
sa.Column("pbkdf2_salt", sa.LargeBinary(), nullable=False),
sa.Column("hkdf_salt", sa.LargeBinary(), nullable=False),
sa.Column(
"key_version",
sa.Integer(),
server_default=sa.text("1"),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
),
)
# --- Create key_access_log table (immutable audit trail) ---
op.create_table(
"key_access_log",
sa.Column(
"id",
UUID(as_uuid=True),
server_default=sa.text("gen_random_uuid()"),
primary_key=True,
),
sa.Column(
"tenant_id",
UUID(as_uuid=True),
sa.ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"user_id",
UUID(as_uuid=True),
sa.ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column("action", sa.Text(), nullable=False),
sa.Column("resource_type", sa.Text(), nullable=True),
sa.Column("resource_id", sa.Text(), nullable=True),
sa.Column("key_version", sa.Integer(), nullable=True),
sa.Column("ip_address", sa.Text(), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
)
# --- Add vault key columns to tenants (Phase 29 preparation) ---
op.add_column(
"tenants",
sa.Column("encrypted_vault_key", sa.LargeBinary(), nullable=True),
)
op.add_column(
"tenants",
sa.Column(
"vault_key_version",
sa.Integer(),
server_default=sa.text("1"),
),
)
# --- RLS policies ---
conn = op.get_bind()
# user_key_sets RLS
conn.execute(sa.text(
"ALTER TABLE user_key_sets ENABLE ROW LEVEL SECURITY"
))
conn.execute(sa.text(
"CREATE POLICY user_key_sets_tenant_isolation ON user_key_sets "
"USING (tenant_id::text = current_setting('app.current_tenant', true) "
"OR current_setting('app.current_tenant', true) = 'super_admin')"
))
conn.execute(sa.text(
"GRANT SELECT, INSERT, UPDATE ON user_key_sets TO app_user"
))
# key_access_log RLS (append-only: INSERT+SELECT only, no UPDATE/DELETE)
conn.execute(sa.text(
"ALTER TABLE key_access_log ENABLE ROW LEVEL SECURITY"
))
conn.execute(sa.text(
"CREATE POLICY key_access_log_tenant_isolation ON key_access_log "
"USING (tenant_id::text = current_setting('app.current_tenant', true) "
"OR current_setting('app.current_tenant', true) = 'super_admin')"
))
conn.execute(sa.text(
"GRANT INSERT, SELECT ON key_access_log TO app_user"
))
# poller_user needs INSERT to log key access events when decrypting credentials
conn.execute(sa.text(
"GRANT INSERT, SELECT ON key_access_log TO poller_user"
))
def downgrade() -> None:
conn = op.get_bind()
# Drop RLS policies
conn.execute(sa.text(
"DROP POLICY IF EXISTS key_access_log_tenant_isolation ON key_access_log"
))
conn.execute(sa.text(
"DROP POLICY IF EXISTS user_key_sets_tenant_isolation ON user_key_sets"
))
# Revoke grants
conn.execute(sa.text("REVOKE ALL ON key_access_log FROM app_user"))
conn.execute(sa.text("REVOKE ALL ON key_access_log FROM poller_user"))
conn.execute(sa.text("REVOKE ALL ON user_key_sets FROM app_user"))
# Drop vault key columns from tenants
op.drop_column("tenants", "vault_key_version")
op.drop_column("tenants", "encrypted_vault_key")
# Drop tables
op.drop_table("key_access_log")
op.drop_table("user_key_sets")
# Drop SRP columns from users
op.drop_column("users", "auth_version")
op.drop_column("users", "srp_verifier")
op.drop_column("users", "srp_salt")

View File

@@ -0,0 +1,90 @@
"""OpenBao envelope encryption columns and key_access_log extensions.
Revision ID: 017
Revises: 016
Create Date: 2026-03-03
Adds Transit ciphertext columns (TEXT) alongside existing BYTEA columns
for dual-write migration strategy. Extends key_access_log with device_id,
justification, and correlation_id for Phase 29 audit trail.
"""
revision = "017"
down_revision = "016"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
def upgrade() -> None:
# --- Transit ciphertext columns (TEXT, alongside existing BYTEA) ---
# devices: store OpenBao Transit ciphertext for credentials
op.add_column(
"devices",
sa.Column("encrypted_credentials_transit", sa.Text(), nullable=True),
)
# certificate_authorities: Transit-encrypted CA private keys
op.add_column(
"certificate_authorities",
sa.Column("encrypted_private_key_transit", sa.Text(), nullable=True),
)
# device_certificates: Transit-encrypted device cert private keys
op.add_column(
"device_certificates",
sa.Column("encrypted_private_key_transit", sa.Text(), nullable=True),
)
# notification_channels: Transit-encrypted SMTP password
op.add_column(
"notification_channels",
sa.Column("smtp_password_transit", sa.Text(), nullable=True),
)
# --- Tenant OpenBao key tracking ---
op.add_column(
"tenants",
sa.Column("openbao_key_name", sa.Text(), nullable=True),
)
# --- Extend key_access_log for Phase 29 ---
op.add_column(
"key_access_log",
sa.Column("device_id", UUID(as_uuid=True), nullable=True),
)
op.add_column(
"key_access_log",
sa.Column("justification", sa.Text(), nullable=True),
)
op.add_column(
"key_access_log",
sa.Column("correlation_id", sa.Text(), nullable=True),
)
# Add FK constraint for device_id -> devices(id) (nullable, so no cascade needed)
op.create_foreign_key(
"fk_key_access_log_device_id",
"key_access_log",
"devices",
["device_id"],
["id"],
)
def downgrade() -> None:
op.drop_constraint(
"fk_key_access_log_device_id", "key_access_log", type_="foreignkey"
)
op.drop_column("key_access_log", "correlation_id")
op.drop_column("key_access_log", "justification")
op.drop_column("key_access_log", "device_id")
op.drop_column("tenants", "openbao_key_name")
op.drop_column("notification_channels", "smtp_password_transit")
op.drop_column("device_certificates", "encrypted_private_key_transit")
op.drop_column("certificate_authorities", "encrypted_private_key_transit")
op.drop_column("devices", "encrypted_credentials_transit")

View File

@@ -0,0 +1,62 @@
"""Data encryption columns for config backups and audit logs.
Revision ID: 018
Revises: 017
Create Date: 2026-03-03
Adds encryption metadata columns to config_backup_runs (encryption_tier,
encryption_nonce) and encrypted_details TEXT column to audit_logs for
Transit-encrypted audit detail storage.
"""
revision = "018"
down_revision = "017"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
def upgrade() -> None:
# --- config_backup_runs: encryption metadata ---
# NULL = plaintext, 1 = client-side AES-GCM, 2 = OpenBao Transit
op.add_column(
"config_backup_runs",
sa.Column(
"encryption_tier",
sa.SmallInteger(),
nullable=True,
comment="NULL=plaintext, 1=client-side AES-GCM, 2=OpenBao Transit",
),
)
# 12-byte AES-GCM nonce for Tier 1 (client-side) backups
op.add_column(
"config_backup_runs",
sa.Column(
"encryption_nonce",
sa.LargeBinary(),
nullable=True,
comment="12-byte AES-GCM nonce for Tier 1 backups",
),
)
# --- audit_logs: Transit-encrypted details ---
op.add_column(
"audit_logs",
sa.Column(
"encrypted_details",
sa.Text(),
nullable=True,
comment="Transit-encrypted details JSON (vault:v1:...)",
),
)
def downgrade() -> None:
op.drop_column("audit_logs", "encrypted_details")
op.drop_column("config_backup_runs", "encryption_nonce")
op.drop_column("config_backup_runs", "encryption_tier")

View File

@@ -0,0 +1,52 @@
"""Deprecate bcrypt: add must_upgrade_auth flag and make hashed_password nullable.
Revision ID: 019
Revises: 018
Create Date: 2026-03-03
Conservative migration that flags legacy bcrypt users for SRP upgrade
rather than dropping data. hashed_password is made nullable so SRP-only
users no longer need a dummy value. A future migration (post-v6.0) can
drop hashed_password once all users have upgraded.
"""
import sqlalchemy as sa
from alembic import op
revision = "019"
down_revision = "018"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add must_upgrade_auth flag
op.add_column(
"users",
sa.Column(
"must_upgrade_auth",
sa.Boolean(),
server_default="false",
nullable=False,
),
)
# Flag all bcrypt-only users for upgrade (auth_version=1 and no SRP verifier)
op.execute(
"UPDATE users SET must_upgrade_auth = true "
"WHERE auth_version = 1 AND srp_verifier IS NULL"
)
# Make hashed_password nullable (SRP users don't need it)
op.alter_column("users", "hashed_password", nullable=True)
def downgrade() -> None:
# Restore NOT NULL (set a dummy value for any NULLs first)
op.execute(
"UPDATE users SET hashed_password = '$2b$12$placeholder' "
"WHERE hashed_password IS NULL"
)
op.alter_column("users", "hashed_password", nullable=False)
op.drop_column("users", "must_upgrade_auth")

View File

@@ -0,0 +1,51 @@
"""Add opt-in plain-text TLS mode and change default from insecure to auto.
Revision ID: 020
Revises: 019
Create Date: 2026-03-04
Reclassifies tls_mode values:
- 'auto': CA-verified -> InsecureSkipVerify (NO plain-text fallback)
- 'insecure': Skip directly to InsecureSkipVerify
- 'plain': Explicit opt-in for plain-text API (dangerous)
- 'portal_ca': Existing CA-verified mode (unchanged)
Existing 'insecure' devices become 'auto' since the old behavior was
an implicit auto-fallback. portal_ca devices keep their mode.
"""
import sqlalchemy as sa
from alembic import op
revision = "020"
down_revision = "019"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Migrate existing 'insecure' devices to 'auto' (the new default).
# 'portal_ca' devices keep their mode (they already have CA verification).
op.execute("UPDATE devices SET tls_mode = 'auto' WHERE tls_mode = 'insecure'")
# Change the server default from 'insecure' to 'auto'
op.alter_column(
"devices",
"tls_mode",
server_default="auto",
)
def downgrade() -> None:
# Revert 'auto' devices back to 'insecure'
op.execute("UPDATE devices SET tls_mode = 'insecure' WHERE tls_mode = 'auto'")
# Revert 'plain' devices to 'insecure' (plain didn't exist before)
op.execute("UPDATE devices SET tls_mode = 'insecure' WHERE tls_mode = 'plain'")
# Restore old server default
op.alter_column(
"devices",
"tls_mode",
server_default="insecure",
)

View File

@@ -0,0 +1,44 @@
"""Add system tenant for super_admin audit log entries.
Revision ID: 021
Revises: 020
Create Date: 2026-03-04
The super_admin has NULL tenant_id, but audit_logs.tenant_id has a FK
to tenants and is NOT NULL. Code was using uuid.UUID(int=0) as a
substitute, but that row didn't exist — causing FK violations that
silently dropped every super_admin audit entry.
This migration inserts a sentinel 'System (Internal)' tenant so
audit_logs can reference it.
"""
from alembic import op
revision = "021"
down_revision = "020"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.execute(
"""
INSERT INTO tenants (id, name, description)
VALUES (
'00000000-0000-0000-0000-000000000000',
'System (Internal)',
'Internal tenant for super_admin audit entries'
)
ON CONFLICT (id) DO NOTHING
"""
)
def downgrade() -> None:
op.execute(
"""
DELETE FROM tenants
WHERE id = '00000000-0000-0000-0000-000000000000'
"""
)

View File

@@ -0,0 +1,49 @@
"""Add super_admin bypass to devices, device_groups, device_tags RLS policies.
Previously these tables only matched tenant_id, so super_admin context
('super_admin') returned zero rows. Users/tenants tables already had
the bypass — this brings device tables in line.
Revision ID: 022
Revises: 021
Create Date: 2026-03-07
"""
import sqlalchemy as sa
from alembic import op
revision = "022"
down_revision = "021"
branch_labels = None
depends_on = None
# Tables that need super_admin bypass added to their RLS policy
_TABLES = ["devices", "device_groups", "device_tags"]
def upgrade() -> None:
conn = op.get_bind()
for table in _TABLES:
conn.execute(sa.text(f"DROP POLICY IF EXISTS tenant_isolation ON {table}"))
conn.execute(sa.text(f"""
CREATE POLICY tenant_isolation ON {table}
USING (
tenant_id::text = current_setting('app.current_tenant', true)
OR current_setting('app.current_tenant', true) = 'super_admin'
)
WITH CHECK (
tenant_id::text = current_setting('app.current_tenant', true)
OR current_setting('app.current_tenant', true) = 'super_admin'
)
"""))
def downgrade() -> None:
conn = op.get_bind()
for table in _TABLES:
conn.execute(sa.text(f"DROP POLICY IF EXISTS tenant_isolation ON {table}"))
conn.execute(sa.text(f"""
CREATE POLICY tenant_isolation ON {table}
USING (tenant_id::text = current_setting('app.current_tenant', true))
WITH CHECK (tenant_id::text = current_setting('app.current_tenant', true))
"""))

View File

@@ -0,0 +1,21 @@
"""Add Slack notification channel support.
Revision ID: 023
Revises: 022
"""
from alembic import op
import sqlalchemy as sa
revision = "023"
down_revision = "022"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column("notification_channels", sa.Column("slack_webhook_url", sa.Text(), nullable=True))
def downgrade() -> None:
op.drop_column("notification_channels", "slack_webhook_url")

View File

@@ -0,0 +1,41 @@
"""Add contact_email to tenants and seed device_offline default alert rule.
Revision ID: 024
Revises: 023
"""
from alembic import op
import sqlalchemy as sa
revision = "024"
down_revision = "023"
def upgrade() -> None:
conn = op.get_bind()
# 1. Add contact_email column to tenants
op.add_column("tenants", sa.Column("contact_email", sa.String(255), nullable=True))
# 2. Seed device_offline default alert rule for all existing tenants
conn.execute(sa.text("""
INSERT INTO alert_rules (id, tenant_id, name, metric, operator, threshold, duration_polls, severity, enabled, is_default)
SELECT gen_random_uuid(), t.id, 'Device Offline', 'device_offline', 'eq', 1, 1, 'critical', TRUE, TRUE
FROM tenants t
WHERE t.id != '00000000-0000-0000-0000-000000000000'
AND NOT EXISTS (
SELECT 1 FROM alert_rules ar
WHERE ar.tenant_id = t.id AND ar.metric = 'device_offline' AND ar.is_default = TRUE
)
"""))
def downgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("""
DELETE FROM alert_rules WHERE metric = 'device_offline' AND is_default = TRUE
"""))
op.drop_column("tenants", "contact_email")

View File

@@ -0,0 +1,37 @@
"""Fix key_access_log device_id FK to SET NULL on delete.
Revision ID: 025
Revises: 024
"""
from alembic import op
revision = "025"
down_revision = "024"
def upgrade() -> None:
op.drop_constraint(
"fk_key_access_log_device_id", "key_access_log", type_="foreignkey"
)
op.create_foreign_key(
"fk_key_access_log_device_id",
"key_access_log",
"devices",
["device_id"],
["id"],
ondelete="SET NULL",
)
def downgrade() -> None:
op.drop_constraint(
"fk_key_access_log_device_id", "key_access_log", type_="foreignkey"
)
op.create_foreign_key(
"fk_key_access_log_device_id",
"key_access_log",
"devices",
["device_id"],
["id"],
)

View File

@@ -0,0 +1,41 @@
"""Add system_settings table for instance-wide configuration.
Revision ID: 026
Revises: 025
Create Date: 2026-03-08
"""
revision = "026"
down_revision = "025"
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
def upgrade() -> None:
op.create_table(
"system_settings",
sa.Column("key", sa.String(255), primary_key=True),
sa.Column("value", sa.Text, nullable=True),
sa.Column("encrypted_value", sa.LargeBinary, nullable=True),
sa.Column("encrypted_value_transit", sa.Text, nullable=True),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.Column(
"updated_by",
UUID(as_uuid=True),
sa.ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
),
)
def downgrade() -> None:
op.drop_table("system_settings")

1
backend/app/__init__.py Normal file
View File

@@ -0,0 +1 @@
# TOD Backend

177
backend/app/config.py Normal file
View File

@@ -0,0 +1,177 @@
"""Application configuration using Pydantic Settings."""
import base64
import sys
from functools import lru_cache
from typing import Optional
from pydantic import field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
# Known insecure default values that MUST NOT be used in non-dev environments.
# If any of these are detected in production/staging, the app refuses to start.
KNOWN_INSECURE_DEFAULTS: dict[str, list[str]] = {
"JWT_SECRET_KEY": [
"change-this-in-production-use-a-long-random-string",
"dev-jwt-secret-change-in-production",
"CHANGE_ME_IN_PRODUCTION",
],
"CREDENTIAL_ENCRYPTION_KEY": [
"LLLjnfBZTSycvL2U07HDSxUeTtLxb9cZzryQl0R9E4w=",
"CHANGE_ME_IN_PRODUCTION",
],
"OPENBAO_TOKEN": [
"dev-openbao-token",
"CHANGE_ME_IN_PRODUCTION",
],
}
def validate_production_settings(settings: "Settings") -> None:
"""Reject known-insecure defaults in non-dev environments.
Called during app startup. Exits with code 1 and clear error message
if production is running with dev secrets.
"""
if settings.ENVIRONMENT == "dev":
return
for field, insecure_values in KNOWN_INSECURE_DEFAULTS.items():
actual = getattr(settings, field, None)
if actual in insecure_values:
print(
f"FATAL: {field} uses a known insecure default in '{settings.ENVIRONMENT}' environment.\n"
f"Generate a secure value and set it in your .env.prod file.\n"
f"For JWT_SECRET_KEY: python -c \"import secrets; print(secrets.token_urlsafe(64))\"\n"
f"For CREDENTIAL_ENCRYPTION_KEY: python -c \"import secrets, base64; print(base64.b64encode(secrets.token_bytes(32)).decode())\"",
file=sys.stderr,
)
sys.exit(1)
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# Environment (dev | staging | production)
ENVIRONMENT: str = "dev"
# Database
DATABASE_URL: str = "postgresql+asyncpg://postgres:postgres@localhost:5432/mikrotik"
# Sync URL used by Alembic only
SYNC_DATABASE_URL: str = "postgresql+psycopg2://postgres:postgres@localhost:5432/mikrotik"
# App user for RLS enforcement (cannot bypass RLS)
APP_USER_DATABASE_URL: str = "postgresql+asyncpg://app_user:app_password@localhost:5432/mikrotik"
# Database connection pool
DB_POOL_SIZE: int = 20
DB_MAX_OVERFLOW: int = 40
DB_ADMIN_POOL_SIZE: int = 10
DB_ADMIN_MAX_OVERFLOW: int = 20
# Redis
REDIS_URL: str = "redis://localhost:6379/0"
# NATS JetStream
NATS_URL: str = "nats://localhost:4222"
# JWT configuration
JWT_SECRET_KEY: str = "change-this-in-production-use-a-long-random-string"
JWT_ALGORITHM: str = "HS256"
JWT_ACCESS_TOKEN_EXPIRE_MINUTES: int = 15
JWT_REFRESH_TOKEN_EXPIRE_DAYS: int = 7
# Credential encryption key — must be 32 bytes, base64-encoded in env
# Generate with: python -c "import secrets, base64; print(base64.b64encode(secrets.token_bytes(32)).decode())"
CREDENTIAL_ENCRYPTION_KEY: str = "LLLjnfBZTSycvL2U07HDSxUeTtLxb9cZzryQl0R9E4w="
# OpenBao Transit (KMS for per-tenant credential encryption)
OPENBAO_ADDR: str = "http://localhost:8200"
OPENBAO_TOKEN: str = "dev-openbao-token"
# First admin bootstrap
FIRST_ADMIN_EMAIL: Optional[str] = None
FIRST_ADMIN_PASSWORD: Optional[str] = None
# CORS origins (comma-separated)
CORS_ORIGINS: str = "http://localhost:3000,http://localhost:5173,http://localhost:8080"
# Git store — PVC mount for bare git repos (one per tenant).
# In production: /data/git-store (Kubernetes PVC ReadWriteMany).
# In local dev: ./git-store (relative to cwd, created on first use).
GIT_STORE_PATH: str = "./git-store"
# WireGuard config path — shared volume with the WireGuard container
WIREGUARD_CONFIG_PATH: str = "/data/wireguard"
# Firmware cache
FIRMWARE_CACHE_DIR: str = "/data/firmware-cache" # PVC mount path
FIRMWARE_CHECK_INTERVAL_HOURS: int = 24 # How often to check for new versions
# SMTP settings for transactional email (password reset, etc.)
SMTP_HOST: str = "localhost"
SMTP_PORT: int = 587
SMTP_USER: Optional[str] = None
SMTP_PASSWORD: Optional[str] = None
SMTP_USE_TLS: bool = False
SMTP_FROM_ADDRESS: str = "noreply@mikrotik-portal.local"
# Password reset
PASSWORD_RESET_TOKEN_EXPIRE_MINUTES: int = 30
APP_BASE_URL: str = "http://localhost:5173"
# App settings
APP_NAME: str = "TOD - The Other Dude"
APP_VERSION: str = "0.1.0"
DEBUG: bool = False
@field_validator("CREDENTIAL_ENCRYPTION_KEY")
@classmethod
def validate_encryption_key(cls, v: str) -> str:
"""Ensure the key decodes to exactly 32 bytes.
Note: CHANGE_ME_IN_PRODUCTION is allowed through this validator
because it fails the base64 length check. The production safety
check in validate_production_settings() catches it separately.
"""
if v == "CHANGE_ME_IN_PRODUCTION":
# Allow the placeholder through field validation -- the production
# safety check will reject it in non-dev environments.
return v
try:
key_bytes = base64.b64decode(v)
if len(key_bytes) != 32:
raise ValueError(
f"CREDENTIAL_ENCRYPTION_KEY must decode to exactly 32 bytes, got {len(key_bytes)}"
)
except Exception as e:
raise ValueError(f"Invalid CREDENTIAL_ENCRYPTION_KEY: {e}") from e
return v
def get_encryption_key_bytes(self) -> bytes:
"""Return the encryption key as raw bytes."""
return base64.b64decode(self.CREDENTIAL_ENCRYPTION_KEY)
def get_cors_origins(self) -> list[str]:
"""Return CORS origins as a list."""
return [origin.strip() for origin in self.CORS_ORIGINS.split(",") if origin.strip()]
@lru_cache()
def get_settings() -> Settings:
"""Return cached settings instance.
Validates that production environments do not use insecure defaults.
This runs once (cached) at startup before the app accepts requests.
"""
s = Settings()
validate_production_settings(s)
return s
settings = get_settings()

114
backend/app/database.py Normal file
View File

@@ -0,0 +1,114 @@
"""Database engine, session factory, and dependency injection."""
import uuid
from collections.abc import AsyncGenerator
from typing import Optional
from sqlalchemy import text
from sqlalchemy.ext.asyncio import (
AsyncSession,
async_sessionmaker,
create_async_engine,
)
from sqlalchemy.orm import DeclarativeBase
from app.config import settings
class Base(DeclarativeBase):
"""Base class for all SQLAlchemy ORM models."""
pass
# Primary engine using postgres superuser (for migrations/admin)
engine = create_async_engine(
settings.DATABASE_URL,
echo=settings.DEBUG,
pool_pre_ping=True,
pool_size=settings.DB_ADMIN_POOL_SIZE,
max_overflow=settings.DB_ADMIN_MAX_OVERFLOW,
)
# App user engine (enforces RLS — no superuser bypass)
app_engine = create_async_engine(
settings.APP_USER_DATABASE_URL,
echo=settings.DEBUG,
pool_pre_ping=True,
pool_size=settings.DB_POOL_SIZE,
max_overflow=settings.DB_MAX_OVERFLOW,
)
# Session factory for the app_user connection (RLS enforced)
AsyncSessionLocal = async_sessionmaker(
app_engine,
class_=AsyncSession,
expire_on_commit=False,
autocommit=False,
autoflush=False,
)
# Admin session factory (for bootstrap/migrations only)
AdminAsyncSessionLocal = async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
autocommit=False,
autoflush=False,
)
async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""
Dependency that yields an async database session using app_user (RLS enforced).
The tenant context (SET LOCAL app.current_tenant) must be set by
tenant_context middleware before any tenant-scoped queries.
"""
async with AsyncSessionLocal() as session:
try:
yield session
await session.commit()
except Exception:
await session.rollback()
raise
finally:
await session.close()
async def get_admin_db() -> AsyncGenerator[AsyncSession, None]:
"""
Dependency that yields an admin database session (bypasses RLS).
USE ONLY for bootstrap operations and internal system tasks.
"""
async with AdminAsyncSessionLocal() as session:
try:
yield session
await session.commit()
except Exception:
await session.rollback()
raise
finally:
await session.close()
async def set_tenant_context(session: AsyncSession, tenant_id: Optional[str]) -> None:
"""
Set the PostgreSQL session variable for RLS enforcement.
This MUST be called before any tenant-scoped query to activate RLS policies.
Uses SET LOCAL so the context resets at transaction end.
"""
if tenant_id:
# Allow 'super_admin' as a special RLS context value for cross-tenant access.
# Otherwise validate tenant_id is a valid UUID to prevent SQL injection.
# SET LOCAL cannot use parameterized queries in PostgreSQL.
if tenant_id != "super_admin":
try:
uuid.UUID(tenant_id)
except ValueError:
raise ValueError(f"Invalid tenant_id format: {tenant_id!r}")
await session.execute(text(f"SET LOCAL app.current_tenant = '{tenant_id}'"))
else:
# For super_admin users: set empty string which will not match any tenant
# The super_admin uses the admin engine which bypasses RLS
await session.execute(text("SET LOCAL app.current_tenant = ''"))

View File

@@ -0,0 +1,81 @@
"""Structured logging configuration for the FastAPI backend.
Uses structlog with two rendering modes:
- Dev mode (ENVIRONMENT=dev or DEBUG=true): colored console output
- Prod mode: machine-parseable JSON output
Must be called once during app startup (in lifespan), NOT at module import time,
so tests can override the configuration.
"""
import logging
import os
import structlog
def configure_logging() -> None:
"""Configure structlog for the FastAPI application.
Dev mode: colored console output with human-readable formatting.
Prod mode: JSON output with machine-parseable fields.
Must be called once during app startup (in lifespan), NOT at module import time,
so tests can override the configuration.
"""
is_dev = os.getenv("ENVIRONMENT", "dev") == "dev"
log_level_name = os.getenv("LOG_LEVEL", "debug" if is_dev else "info").upper()
log_level = getattr(logging, log_level_name, logging.INFO)
shared_processors: list[structlog.types.Processor] = [
structlog.contextvars.merge_contextvars,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.UnicodeDecoder(),
]
if is_dev:
renderer = structlog.dev.ConsoleRenderer()
else:
renderer = structlog.processors.JSONRenderer()
structlog.configure(
processors=[
*shared_processors,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
# Capture stdlib loggers (uvicorn, SQLAlchemy, alembic) into structlog pipeline
formatter = structlog.stdlib.ProcessorFormatter(
processors=[
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
renderer,
],
)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
root_logger = logging.getLogger()
root_logger.handlers.clear()
root_logger.addHandler(handler)
root_logger.setLevel(log_level)
# Quiet down noisy libraries in dev
if is_dev:
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
"""Get a structlog bound logger.
Use this instead of logging.getLogger() throughout the application.
"""
return structlog.get_logger(name)

330
backend/app/main.py Normal file
View File

@@ -0,0 +1,330 @@
"""FastAPI application entry point."""
from contextlib import asynccontextmanager
from typing import AsyncGenerator
import structlog
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from starlette.responses import JSONResponse
from app.config import settings
from app.logging_config import configure_logging
from app.middleware.rate_limit import setup_rate_limiting
from app.middleware.request_id import RequestIDMiddleware
from app.middleware.security_headers import SecurityHeadersMiddleware
from app.observability import check_health_ready, setup_instrumentator
logger = structlog.get_logger(__name__)
async def run_migrations() -> None:
"""Run Alembic migrations on startup."""
import os
import subprocess
import sys
result = subprocess.run(
[sys.executable, "-m", "alembic", "upgrade", "head"],
capture_output=True,
text=True,
cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
)
if result.returncode != 0:
logger.error("migration failed", stderr=result.stderr)
raise RuntimeError(f"Database migration failed: {result.stderr}")
logger.info("migrations applied successfully")
async def bootstrap_first_admin() -> None:
"""Create the first super_admin user if no users exist."""
if not settings.FIRST_ADMIN_EMAIL or not settings.FIRST_ADMIN_PASSWORD:
logger.info("FIRST_ADMIN_EMAIL/PASSWORD not set, skipping bootstrap")
return
from sqlalchemy import select
from app.database import AdminAsyncSessionLocal
from app.models.user import User, UserRole
from app.services.auth import hash_password
async with AdminAsyncSessionLocal() as session:
# Check if any users exist (bypass RLS with admin session)
result = await session.execute(select(User).limit(1))
existing_user = result.scalar_one_or_none()
if existing_user:
logger.info("users already exist, skipping first admin bootstrap")
return
# Create the first super_admin with bcrypt password.
# must_upgrade_auth=True triggers the SRP registration flow on first login.
admin = User(
email=settings.FIRST_ADMIN_EMAIL,
hashed_password=hash_password(settings.FIRST_ADMIN_PASSWORD),
name="Super Admin",
role=UserRole.SUPER_ADMIN.value,
tenant_id=None, # super_admin has no tenant
is_active=True,
must_upgrade_auth=True,
)
session.add(admin)
await session.commit()
logger.info("created first super_admin", email=settings.FIRST_ADMIN_EMAIL)
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
"""Application lifespan: run migrations and bootstrap on startup."""
from app.services.backup_scheduler import start_backup_scheduler, stop_backup_scheduler
from app.services.firmware_subscriber import start_firmware_subscriber, stop_firmware_subscriber
from app.services.metrics_subscriber import start_metrics_subscriber, stop_metrics_subscriber
from app.services.nats_subscriber import start_nats_subscriber, stop_nats_subscriber
from app.services.sse_manager import ensure_sse_streams
# Configure structured logging FIRST -- before any other startup work
configure_logging()
logger.info("starting TOD API")
# Run database migrations
await run_migrations()
# Bootstrap first admin user
await bootstrap_first_admin()
# Start NATS subscriber for device status events.
# Wrapped in try/except so NATS failure doesn't prevent API startup --
# allows running the API locally without NATS during frontend development.
nats_connection = None
try:
nats_connection = await start_nats_subscriber()
except Exception as exc:
logger.warning(
"NATS status subscriber could not start (API will run without it)",
error=str(exc),
)
# Start NATS subscriber for device metrics events (separate NATS connection).
# Same pattern -- failure is non-fatal so the API starts without full NATS stack.
metrics_nc = None
try:
metrics_nc = await start_metrics_subscriber()
except Exception as exc:
logger.warning(
"NATS metrics subscriber could not start (API will run without it)",
error=str(exc),
)
# Start NATS subscriber for device firmware events (separate NATS connection).
firmware_nc = None
try:
firmware_nc = await start_firmware_subscriber()
except Exception as exc:
logger.warning(
"NATS firmware subscriber could not start (API will run without it)",
error=str(exc),
)
# Ensure NATS streams for SSE event delivery exist (ALERT_EVENTS, OPERATION_EVENTS).
# Non-fatal -- API starts without SSE streams; they'll be created on first SSE connection.
try:
await ensure_sse_streams()
except Exception as exc:
logger.warning(
"SSE NATS streams could not be created (SSE will retry on connection)",
error=str(exc),
)
# Start APScheduler for automated nightly config backups.
# Non-fatal -- API starts and serves requests even without the scheduler.
try:
await start_backup_scheduler()
except Exception as exc:
logger.warning("backup scheduler could not start", error=str(exc))
# Register daily firmware version check (3am UTC) on the same scheduler.
try:
from app.services.firmware_service import schedule_firmware_checks
schedule_firmware_checks()
except Exception as exc:
logger.warning("firmware check scheduler could not start", error=str(exc))
# Provision OpenBao Transit keys for existing tenants and migrate legacy credentials.
# Non-blocking: if OpenBao is unavailable, the dual-read path handles fallback.
if settings.OPENBAO_ADDR:
try:
from app.database import AdminAsyncSessionLocal
from app.services.key_service import provision_existing_tenants
async with AdminAsyncSessionLocal() as openbao_session:
counts = await provision_existing_tenants(openbao_session)
logger.info(
"openbao tenant provisioning complete",
**{k: v for k, v in counts.items()},
)
except Exception as exc:
logger.warning(
"openbao tenant provisioning failed (will retry on next restart)",
error=str(exc),
)
# Recover stale push operations from previous API instance
try:
from app.services.restore_service import recover_stale_push_operations
from app.database import AdminAsyncSessionLocal as _AdminSession
async with _AdminSession() as session:
await recover_stale_push_operations(session)
logger.info("push operation recovery check complete")
except Exception as e:
logger.error("push operation recovery failed (non-fatal): %s", e)
# Config change subscriber (event-driven backups)
config_change_nc = None
try:
from app.services.config_change_subscriber import (
start_config_change_subscriber,
stop_config_change_subscriber,
)
config_change_nc = await start_config_change_subscriber()
except Exception as e:
logger.error("Config change subscriber failed to start (non-fatal): %s", e)
# Push rollback/alert subscriber
push_rollback_nc = None
try:
from app.services.push_rollback_subscriber import (
start_push_rollback_subscriber,
stop_push_rollback_subscriber,
)
push_rollback_nc = await start_push_rollback_subscriber()
except Exception as e:
logger.error("Push rollback subscriber failed to start (non-fatal): %s", e)
logger.info("startup complete, ready to serve requests")
yield
# Shutdown
logger.info("shutting down TOD API")
await stop_backup_scheduler()
await stop_nats_subscriber(nats_connection)
await stop_metrics_subscriber(metrics_nc)
await stop_firmware_subscriber(firmware_nc)
if config_change_nc:
await stop_config_change_subscriber()
if push_rollback_nc:
await stop_push_rollback_subscriber()
# Dispose database engine connections to release all pooled connections cleanly.
from app.database import app_engine, engine
await app_engine.dispose()
await engine.dispose()
logger.info("database connections closed")
def create_app() -> FastAPI:
"""Create and configure the FastAPI application."""
app = FastAPI(
title=settings.APP_NAME,
version=settings.APP_VERSION,
description="The Other Dude — Fleet Management API",
docs_url="/docs" if settings.ENVIRONMENT == "dev" else None,
redoc_url="/redoc" if settings.ENVIRONMENT == "dev" else None,
lifespan=lifespan,
)
# Starlette processes middleware in LIFO order (last added = first to run).
# We want: Request -> RequestID -> CORS -> Route handler
# So add CORS first, then RequestID (it will wrap CORS).
app.add_middleware(
CORSMiddleware,
allow_origins=settings.get_cors_origins(),
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
allow_headers=["Authorization", "Content-Type", "X-Request-ID"],
)
app.add_middleware(SecurityHeadersMiddleware, environment=settings.ENVIRONMENT)
setup_rate_limiting(app) # Register 429 exception handler (no middleware added)
app.add_middleware(RequestIDMiddleware)
# Include routers
from app.routers.alerts import router as alerts_router
from app.routers.auth import router as auth_router
from app.routers.sse import router as sse_router
from app.routers.config_backups import router as config_router
from app.routers.config_editor import router as config_editor_router
from app.routers.device_groups import router as device_groups_router
from app.routers.device_tags import router as device_tags_router
from app.routers.devices import router as devices_router
from app.routers.firmware import router as firmware_router
from app.routers.metrics import router as metrics_router
from app.routers.events import router as events_router
from app.routers.clients import router as clients_router
from app.routers.device_logs import router as device_logs_router
from app.routers.templates import router as templates_router
from app.routers.tenants import router as tenants_router
from app.routers.reports import router as reports_router
from app.routers.topology import router as topology_router
from app.routers.users import router as users_router
from app.routers.audit_logs import router as audit_logs_router
from app.routers.api_keys import router as api_keys_router
from app.routers.maintenance_windows import router as maintenance_windows_router
from app.routers.vpn import router as vpn_router
from app.routers.certificates import router as certificates_router
from app.routers.transparency import router as transparency_router
from app.routers.settings import router as settings_router
app.include_router(auth_router, prefix="/api")
app.include_router(tenants_router, prefix="/api")
app.include_router(users_router, prefix="/api")
app.include_router(devices_router, prefix="/api")
app.include_router(device_groups_router, prefix="/api")
app.include_router(device_tags_router, prefix="/api")
app.include_router(metrics_router, prefix="/api")
app.include_router(config_router, prefix="/api")
app.include_router(firmware_router, prefix="/api")
app.include_router(alerts_router, prefix="/api")
app.include_router(config_editor_router, prefix="/api")
app.include_router(events_router, prefix="/api")
app.include_router(device_logs_router, prefix="/api")
app.include_router(templates_router, prefix="/api")
app.include_router(clients_router, prefix="/api")
app.include_router(topology_router, prefix="/api")
app.include_router(sse_router, prefix="/api")
app.include_router(audit_logs_router, prefix="/api")
app.include_router(reports_router, prefix="/api")
app.include_router(api_keys_router, prefix="/api")
app.include_router(maintenance_windows_router, prefix="/api")
app.include_router(vpn_router, prefix="/api")
app.include_router(certificates_router, prefix="/api/certificates", tags=["certificates"])
app.include_router(transparency_router, prefix="/api")
app.include_router(settings_router, prefix="/api")
# Health check endpoints
@app.get("/health", tags=["health"])
async def health_check() -> dict:
"""Liveness probe -- returns 200 if the process is alive."""
return {"status": "ok", "version": settings.APP_VERSION}
@app.get("/health/ready", tags=["health"])
async def health_ready() -> JSONResponse:
"""Readiness probe -- returns 200 only when PostgreSQL, Redis, and NATS are healthy."""
result = await check_health_ready()
status_code = 200 if result["status"] == "healthy" else 503
return JSONResponse(content=result, status_code=status_code)
@app.get("/api/health", tags=["health"])
async def api_health_check() -> dict:
"""Backward-compatible health endpoint under /api prefix."""
return {"status": "ok", "version": settings.APP_VERSION}
# Prometheus metrics instrumentation -- MUST be after routers so all routes are captured
setup_instrumentator(app)
return app
app = create_app()

View File

@@ -0,0 +1 @@
"""FastAPI middleware and dependencies for auth, tenant context, and RBAC."""

View File

@@ -0,0 +1,48 @@
"""Rate limiting middleware using slowapi with Redis backend.
Per-route rate limits only -- no global limits to avoid blocking the
Go poller, NATS subscribers, and health check endpoints.
Rate limit data uses Redis DB 1 (separate from app data in DB 0).
"""
from fastapi import FastAPI
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from slowapi.util import get_remote_address
from app.config import settings
def _get_redis_url() -> str:
"""Return Redis URL pointing to DB 1 for rate limit storage.
Keeps rate limit counters separate from application data in DB 0.
"""
url = settings.REDIS_URL
if url.endswith("/0"):
return url[:-2] + "/1"
# If no DB specified or different DB, append /1
if url.rstrip("/").split("/")[-1].isdigit():
# Replace existing DB number
parts = url.rsplit("/", 1)
return parts[0] + "/1"
return url.rstrip("/") + "/1"
limiter = Limiter(
key_func=get_remote_address,
storage_uri=_get_redis_url(),
default_limits=[], # No global limits -- per-route only
)
def setup_rate_limiting(app: FastAPI) -> None:
"""Register the rate limiter on the FastAPI app.
This sets app.state.limiter (required by slowapi) and registers
the 429 exception handler. It does NOT add middleware -- the
@limiter.limit() decorators handle actual limiting per-route.
"""
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

View File

@@ -0,0 +1,186 @@
"""
Role-Based Access Control (RBAC) middleware.
Provides dependency factories for enforcing role-based access control
on FastAPI routes. Roles are hierarchical:
super_admin > tenant_admin > operator > viewer
Role permissions per plan TENANT-04/05/06:
- viewer: GET endpoints only (read-only)
- operator: GET + device/config management endpoints
- tenant_admin: full access within their tenant
- super_admin: full access across all tenants
"""
from typing import Callable
from fastapi import Depends, HTTPException, Request, status
from fastapi.params import Depends as DependsClass
from app.middleware.tenant_context import CurrentUser, get_current_user
# Role hierarchy (higher index = more privilege)
# api_key is at operator level for RBAC checks; fine-grained access controlled by scopes.
ROLE_HIERARCHY = {
"viewer": 0,
"api_key": 1,
"operator": 1,
"tenant_admin": 2,
"super_admin": 3,
}
def _get_role_level(role: str) -> int:
"""Return numeric privilege level for a role string."""
return ROLE_HIERARCHY.get(role, -1)
def require_role(*allowed_roles: str) -> Callable:
"""
FastAPI dependency factory that checks the current user's role.
Usage:
@router.post("/items", dependencies=[Depends(require_role("tenant_admin", "super_admin"))])
Args:
*allowed_roles: Role strings that are permitted to access the endpoint
Returns:
FastAPI dependency that raises 403 if the role is insufficient
"""
async def dependency(
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
if current_user.role not in allowed_roles:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Access denied. Required roles: {', '.join(allowed_roles)}. "
f"Your role: {current_user.role}",
)
return current_user
return dependency
def require_min_role(min_role: str) -> Callable:
"""
Dependency factory that allows any role at or above the minimum level.
Usage:
@router.get("/items", dependencies=[Depends(require_min_role("operator"))])
# Allows: operator, tenant_admin, super_admin
# Denies: viewer
"""
min_level = _get_role_level(min_role)
async def dependency(
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
user_level = _get_role_level(current_user.role)
if user_level < min_level:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Access denied. Minimum required role: {min_role}. "
f"Your role: {current_user.role}",
)
return current_user
return dependency
def require_write_access() -> Callable:
"""
Dependency that enforces viewer read-only restriction.
Viewers are NOT allowed on POST/PUT/PATCH/DELETE endpoints.
Call this on any mutating endpoint to deny viewers.
"""
async def dependency(
request: Request,
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
if request.method in ("POST", "PUT", "PATCH", "DELETE"):
if current_user.role == "viewer":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Viewers have read-only access. "
"Contact your administrator to request elevated permissions.",
)
return current_user
return dependency
def require_scope(scope: str) -> DependsClass:
"""FastAPI dependency that checks API key scopes.
No-op for regular users (JWT auth) -- scopes only apply to API keys.
For API key users: checks that the required scope is in the key's scope list.
Returns a Depends() instance so it can be used in dependency lists:
@router.get("/items", dependencies=[require_scope("devices:read")])
Args:
scope: Required scope string (e.g. "devices:read", "config:write").
Raises:
HTTPException 403 if the API key is missing the required scope.
"""
async def _check_scope(
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
if current_user.role == "api_key":
if not current_user.scopes or scope not in current_user.scopes:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"API key missing required scope: {scope}",
)
return current_user
return Depends(_check_scope)
# Pre-built convenience dependencies
async def require_super_admin(
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
"""Require super_admin role (portal-wide admin)."""
if current_user.role != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied. Super admin role required.",
)
return current_user
async def require_tenant_admin_or_above(
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
"""Require tenant_admin or super_admin role."""
if current_user.role not in ("tenant_admin", "super_admin"):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied. Tenant admin or higher role required.",
)
return current_user
async def require_operator_or_above(
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
"""Require operator, tenant_admin, or super_admin role."""
if current_user.role not in ("operator", "tenant_admin", "super_admin"):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied. Operator or higher role required.",
)
return current_user
async def require_authenticated(
current_user: CurrentUser = Depends(get_current_user),
) -> CurrentUser:
"""Require any authenticated user (viewer and above)."""
return current_user

View File

@@ -0,0 +1,67 @@
"""Request ID middleware for structured logging context.
Generates or extracts a request ID for every incoming request and binds it
(along with tenant_id from JWT) to structlog's contextvars so that all log
lines emitted during the request include these correlation fields.
"""
import uuid
import structlog
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
class RequestIDMiddleware(BaseHTTPMiddleware):
"""Middleware that binds request_id and tenant_id to structlog context."""
async def dispatch(self, request: Request, call_next):
# CRITICAL: Clear stale context from previous request to prevent leaks
structlog.contextvars.clear_contextvars()
# Generate or extract request ID
request_id = request.headers.get("X-Request-ID", str(uuid.uuid4()))
# Best-effort tenant_id extraction from JWT (does not fail if no token)
tenant_id = self._extract_tenant_id(request)
# Bind to structlog context -- all subsequent log calls include these fields
structlog.contextvars.bind_contextvars(
request_id=request_id,
tenant_id=tenant_id,
)
response: Response = await call_next(request)
response.headers["X-Request-ID"] = request_id
return response
def _extract_tenant_id(self, request: Request) -> str | None:
"""Best-effort extraction of tenant_id from JWT.
Looks in cookies first (access_token), then Authorization header.
Returns None if no valid token is found -- this is fine for
unauthenticated endpoints like /login.
"""
token = request.cookies.get("access_token")
if not token:
auth_header = request.headers.get("Authorization", "")
if auth_header.startswith("Bearer "):
token = auth_header[7:]
if not token:
return None
try:
from jose import jwt as jose_jwt
from app.config import settings
payload = jose_jwt.decode(
token,
settings.JWT_SECRET_KEY,
algorithms=[settings.JWT_ALGORITHM],
)
return payload.get("tenant_id")
except Exception:
return None

View File

@@ -0,0 +1,79 @@
"""Security response headers middleware.
Adds standard security headers to all API responses:
- X-Content-Type-Options: nosniff (prevent MIME sniffing)
- X-Frame-Options: DENY (prevent clickjacking)
- Referrer-Policy: strict-origin-when-cross-origin
- Cache-Control: no-store (prevent browser caching of API responses)
- Strict-Transport-Security (HSTS, production only -- breaks plain HTTP dev)
- Content-Security-Policy (strict in production, relaxed for dev HMR)
CSP directives:
- script-src 'self' (production) blocks inline scripts -- XSS mitigation
- style-src 'unsafe-inline' required for Tailwind, Framer Motion, Radix, Sonner
- connect-src includes wss:/ws: for SSE and WebSocket connections
- Dev mode adds 'unsafe-inline' and 'unsafe-eval' for Vite HMR
"""
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
# Production CSP: strict -- no inline scripts allowed
_CSP_PRODUCTION = "; ".join([
"default-src 'self'",
"script-src 'self'",
"style-src 'self' 'unsafe-inline'",
"img-src 'self' data: blob:",
"font-src 'self'",
"connect-src 'self' wss: ws:",
"worker-src 'self'",
"frame-ancestors 'none'",
"base-uri 'self'",
"form-action 'self'",
])
# Development CSP: relaxed for Vite HMR (hot module replacement)
_CSP_DEV = "; ".join([
"default-src 'self'",
"script-src 'self' 'unsafe-inline' 'unsafe-eval'",
"style-src 'self' 'unsafe-inline'",
"img-src 'self' data: blob:",
"font-src 'self'",
"connect-src 'self' http://localhost:* ws://localhost:* wss:",
"worker-src 'self' blob:",
"frame-ancestors 'none'",
"base-uri 'self'",
"form-action 'self'",
])
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
"""Add security headers to every API response."""
def __init__(self, app, environment: str = "dev"):
super().__init__(app)
self.is_production = environment != "dev"
async def dispatch(self, request: Request, call_next) -> Response:
response = await call_next(request)
# Always-on security headers
response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["X-Frame-Options"] = "DENY"
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
response.headers["Cache-Control"] = "no-store"
# Content-Security-Policy (environment-aware)
if self.is_production:
response.headers["Content-Security-Policy"] = _CSP_PRODUCTION
else:
response.headers["Content-Security-Policy"] = _CSP_DEV
# HSTS only in production (plain HTTP in dev would be blocked)
if self.is_production:
response.headers["Strict-Transport-Security"] = (
"max-age=31536000; includeSubDomains"
)
return response

View File

@@ -0,0 +1,177 @@
"""
Tenant context middleware and current user dependency.
Extracts JWT from Authorization header (Bearer token) or httpOnly cookie,
validates it, and provides current user context for request handlers.
For tenant-scoped users: sets SET LOCAL app.current_tenant on the DB session.
For super_admin: uses special 'super_admin' context that grants cross-tenant access.
"""
import uuid
from typing import Annotated, Optional
from fastapi import Cookie, Depends, HTTPException, Request, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.services.auth import verify_token
# Optional HTTP Bearer scheme (won't raise 403 automatically — we handle auth ourselves)
bearer_scheme = HTTPBearer(auto_error=False)
class CurrentUser:
"""Represents the currently authenticated user extracted from JWT or API key."""
def __init__(
self,
user_id: uuid.UUID,
tenant_id: Optional[uuid.UUID],
role: str,
scopes: Optional[list[str]] = None,
) -> None:
self.user_id = user_id
self.tenant_id = tenant_id
self.role = role
self.scopes = scopes
@property
def is_super_admin(self) -> bool:
return self.role == "super_admin"
@property
def is_api_key(self) -> bool:
return self.role == "api_key"
def __repr__(self) -> str:
return f"<CurrentUser user_id={self.user_id} role={self.role} tenant_id={self.tenant_id}>"
def _extract_token(
request: Request,
credentials: Optional[HTTPAuthorizationCredentials],
access_token: Optional[str],
) -> Optional[str]:
"""
Extract JWT token from Authorization header or httpOnly cookie.
Priority: Authorization header > cookie.
"""
if credentials and credentials.scheme.lower() == "bearer":
return credentials.credentials
if access_token:
return access_token
return None
async def get_current_user(
request: Request,
credentials: Annotated[Optional[HTTPAuthorizationCredentials], Depends(bearer_scheme)] = None,
access_token: Annotated[Optional[str], Cookie()] = None,
db: AsyncSession = Depends(get_db),
) -> CurrentUser:
"""
FastAPI dependency that extracts and validates the current user from JWT.
Supports both Bearer token (Authorization header) and httpOnly cookie.
Sets the tenant context on the database session for RLS enforcement.
Raises:
HTTPException 401: If no token provided or token is invalid
"""
token = _extract_token(request, credentials, access_token)
if not token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Not authenticated",
headers={"WWW-Authenticate": "Bearer"},
)
# API key authentication: detect mktp_ prefix and validate via api_key_service
if token.startswith("mktp_"):
from app.services.api_key_service import validate_api_key
key_data = await validate_api_key(token)
if not key_data:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid, expired, or revoked API key",
headers={"WWW-Authenticate": "Bearer"},
)
tenant_id = key_data["tenant_id"]
# Set tenant context on the request-scoped DB session for RLS
await set_tenant_context(db, str(tenant_id))
return CurrentUser(
user_id=key_data["user_id"],
tenant_id=tenant_id,
role="api_key",
scopes=key_data["scopes"],
)
# Decode and validate the JWT
payload = verify_token(token, expected_type="access")
user_id_str = payload.get("sub")
tenant_id_str = payload.get("tenant_id")
role = payload.get("role")
if not user_id_str or not role:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token payload",
headers={"WWW-Authenticate": "Bearer"},
)
try:
user_id = uuid.UUID(user_id_str)
except ValueError:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token payload",
)
tenant_id: Optional[uuid.UUID] = None
if tenant_id_str:
try:
tenant_id = uuid.UUID(tenant_id_str)
except ValueError:
pass
# Set the tenant context on the database session for RLS enforcement
if role == "super_admin":
# super_admin uses special context that grants cross-tenant access
await set_tenant_context(db, "super_admin")
elif tenant_id:
await set_tenant_context(db, str(tenant_id))
else:
# Non-super_admin without tenant — deny access
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token: no tenant context",
)
return CurrentUser(
user_id=user_id,
tenant_id=tenant_id,
role=role,
)
async def get_optional_current_user(
request: Request,
credentials: Annotated[Optional[HTTPAuthorizationCredentials], Depends(bearer_scheme)] = None,
access_token: Annotated[Optional[str], Cookie()] = None,
db: AsyncSession = Depends(get_db),
) -> Optional[CurrentUser]:
"""Same as get_current_user but returns None instead of raising 401."""
try:
return await get_current_user(request, credentials, access_token, db)
except HTTPException:
return None

View File

@@ -0,0 +1,35 @@
"""SQLAlchemy ORM models."""
from app.models.tenant import Tenant
from app.models.user import User, UserRole
from app.models.device import Device, DeviceGroup, DeviceTag, DeviceGroupMembership, DeviceTagAssignment, DeviceStatus
from app.models.alert import AlertRule, NotificationChannel, AlertRuleChannel, AlertEvent
from app.models.firmware import FirmwareVersion, FirmwareUpgradeJob
from app.models.config_template import ConfigTemplate, ConfigTemplateTag, TemplatePushJob
from app.models.audit_log import AuditLog
from app.models.maintenance_window import MaintenanceWindow
from app.models.api_key import ApiKey
__all__ = [
"Tenant",
"User",
"UserRole",
"Device",
"DeviceGroup",
"DeviceTag",
"DeviceGroupMembership",
"DeviceTagAssignment",
"DeviceStatus",
"AlertRule",
"NotificationChannel",
"AlertRuleChannel",
"AlertEvent",
"FirmwareVersion",
"FirmwareUpgradeJob",
"ConfigTemplate",
"ConfigTemplateTag",
"TemplatePushJob",
"AuditLog",
"MaintenanceWindow",
"ApiKey",
]

177
backend/app/models/alert.py Normal file
View File

@@ -0,0 +1,177 @@
"""Alert system ORM models: rules, notification channels, and alert events."""
import uuid
from datetime import datetime
from sqlalchemy import (
Boolean,
DateTime,
ForeignKey,
Integer,
LargeBinary,
Numeric,
Text,
func,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class AlertRule(Base):
"""Configurable alert threshold rule.
Rules can be tenant-wide (device_id=NULL), device-specific, or group-scoped.
When a metric breaches the threshold for duration_polls consecutive polls,
an alert fires.
"""
__tablename__ = "alert_rules"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
device_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=True,
)
group_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("device_groups.id", ondelete="SET NULL"),
nullable=True,
)
name: Mapped[str] = mapped_column(Text, nullable=False)
metric: Mapped[str] = mapped_column(Text, nullable=False)
operator: Mapped[str] = mapped_column(Text, nullable=False)
threshold: Mapped[float] = mapped_column(Numeric, nullable=False)
duration_polls: Mapped[int] = mapped_column(Integer, nullable=False, default=1, server_default="1")
severity: Mapped[str] = mapped_column(Text, nullable=False)
enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True, server_default="true")
is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default="false")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return f"<AlertRule id={self.id} name={self.name!r} metric={self.metric}>"
class NotificationChannel(Base):
"""Email, webhook, or Slack notification destination."""
__tablename__ = "notification_channels"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
name: Mapped[str] = mapped_column(Text, nullable=False)
channel_type: Mapped[str] = mapped_column(Text, nullable=False) # "email", "webhook", or "slack"
# SMTP fields (email channels)
smtp_host: Mapped[str | None] = mapped_column(Text, nullable=True)
smtp_port: Mapped[int | None] = mapped_column(Integer, nullable=True)
smtp_user: Mapped[str | None] = mapped_column(Text, nullable=True)
smtp_password: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True) # AES-256-GCM encrypted
smtp_use_tls: Mapped[bool] = mapped_column(Boolean, default=False, server_default="false")
from_address: Mapped[str | None] = mapped_column(Text, nullable=True)
to_address: Mapped[str | None] = mapped_column(Text, nullable=True)
# Webhook fields
webhook_url: Mapped[str | None] = mapped_column(Text, nullable=True)
# Slack fields
slack_webhook_url: Mapped[str | None] = mapped_column(Text, nullable=True)
# OpenBao Transit ciphertext (dual-write migration)
smtp_password_transit: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return f"<NotificationChannel id={self.id} name={self.name!r} type={self.channel_type}>"
class AlertRuleChannel(Base):
"""Many-to-many association between alert rules and notification channels."""
__tablename__ = "alert_rule_channels"
rule_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("alert_rules.id", ondelete="CASCADE"),
primary_key=True,
)
channel_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("notification_channels.id", ondelete="CASCADE"),
primary_key=True,
)
class AlertEvent(Base):
"""Record of an alert firing, resolving, or flapping.
rule_id is NULL for system-level alerts (e.g., device offline).
"""
__tablename__ = "alert_events"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
rule_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("alert_rules.id", ondelete="SET NULL"),
nullable=True,
)
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
status: Mapped[str] = mapped_column(Text, nullable=False) # "firing", "resolved", "flapping"
severity: Mapped[str] = mapped_column(Text, nullable=False)
metric: Mapped[str | None] = mapped_column(Text, nullable=True)
value: Mapped[float | None] = mapped_column(Numeric, nullable=True)
threshold: Mapped[float | None] = mapped_column(Numeric, nullable=True)
message: Mapped[str | None] = mapped_column(Text, nullable=True)
is_flapping: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default="false")
acknowledged_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
acknowledged_by: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
)
silenced_until: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
fired_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
resolved_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
def __repr__(self) -> str:
return f"<AlertEvent id={self.id} status={self.status} severity={self.severity}>"

View File

@@ -0,0 +1,60 @@
"""API key ORM model for tenant-scoped programmatic access."""
import uuid
from datetime import datetime
from typing import Optional
from sqlalchemy import DateTime, ForeignKey, Text, func
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class ApiKey(Base):
"""Tracks API keys for programmatic access to the portal.
Keys are stored as SHA-256 hashes (never plaintext).
Scoped permissions limit what each key can do.
Revocation is soft-delete (sets revoked_at, row preserved for audit).
"""
__tablename__ = "api_keys"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
user_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
)
name: Mapped[str] = mapped_column(Text, nullable=False)
key_prefix: Mapped[str] = mapped_column(Text, nullable=False)
key_hash: Mapped[str] = mapped_column(Text, nullable=False, unique=True)
scopes: Mapped[list] = mapped_column(JSONB, nullable=False, server_default="'[]'::jsonb")
expires_at: Mapped[Optional[datetime]] = mapped_column(
DateTime(timezone=True), nullable=True
)
last_used_at: Mapped[Optional[datetime]] = mapped_column(
DateTime(timezone=True), nullable=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
revoked_at: Mapped[Optional[datetime]] = mapped_column(
DateTime(timezone=True), nullable=True
)
def __repr__(self) -> str:
return f"<ApiKey id={self.id} name={self.name} prefix={self.key_prefix}>"

View File

@@ -0,0 +1,59 @@
"""Audit log model for centralized audit trail."""
import uuid
from datetime import datetime
from typing import Any
from sqlalchemy import DateTime, ForeignKey, String, Text, func
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class AuditLog(Base):
"""Records all auditable actions in the system (config changes, CRUD, auth events)."""
__tablename__ = "audit_logs"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
user_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
)
action: Mapped[str] = mapped_column(String(100), nullable=False)
resource_type: Mapped[str | None] = mapped_column(String(50), nullable=True)
resource_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
device_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="SET NULL"),
nullable=True,
)
details: Mapped[dict[str, Any]] = mapped_column(
JSONB,
nullable=False,
server_default="{}",
)
# Transit-encrypted details JSON (vault:v1:...) — set when details are encrypted
encrypted_details: Mapped[str | None] = mapped_column(Text, nullable=True)
ip_address: Mapped[str | None] = mapped_column(String(45), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return f"<AuditLog id={self.id} action={self.action!r} tenant_id={self.tenant_id}>"

View File

@@ -0,0 +1,140 @@
"""Certificate Authority and Device Certificate ORM models.
Supports the Internal Certificate Authority feature:
- CertificateAuthority: one per tenant, stores encrypted CA private key + public cert
- DeviceCertificate: per-device signed certificate with lifecycle status tracking
"""
import uuid
from datetime import datetime
from sqlalchemy import DateTime, ForeignKey, LargeBinary, String, Text, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class CertificateAuthority(Base):
"""Per-tenant root Certificate Authority.
Each tenant has at most one CA. The CA private key is encrypted with
AES-256-GCM before storage (using the same pattern as device credentials).
The public cert_pem is not sensitive and can be distributed freely.
"""
__tablename__ = "certificate_authorities"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
common_name: Mapped[str] = mapped_column(String(255), nullable=False)
cert_pem: Mapped[str] = mapped_column(Text, nullable=False)
encrypted_private_key: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
serial_number: Mapped[str] = mapped_column(String(64), nullable=False)
fingerprint_sha256: Mapped[str] = mapped_column(String(95), nullable=False)
not_valid_before: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False
)
not_valid_after: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False
)
# OpenBao Transit ciphertext (dual-write migration)
encrypted_private_key_transit: Mapped[str | None] = mapped_column(
Text, nullable=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return (
f"<CertificateAuthority id={self.id} "
f"cn={self.common_name!r} tenant={self.tenant_id}>"
)
class DeviceCertificate(Base):
"""Per-device TLS certificate signed by the tenant's CA.
Status lifecycle:
issued -> deploying -> deployed -> expiring -> expired
\\-> revoked
\\-> superseded (when rotated)
"""
__tablename__ = "device_certificates"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
)
ca_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("certificate_authorities.id", ondelete="CASCADE"),
nullable=False,
)
common_name: Mapped[str] = mapped_column(String(255), nullable=False)
serial_number: Mapped[str] = mapped_column(String(64), nullable=False)
fingerprint_sha256: Mapped[str] = mapped_column(String(95), nullable=False)
cert_pem: Mapped[str] = mapped_column(Text, nullable=False)
encrypted_private_key: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
not_valid_before: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False
)
not_valid_after: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False
)
# OpenBao Transit ciphertext (dual-write migration)
encrypted_private_key_transit: Mapped[str | None] = mapped_column(
Text, nullable=True
)
status: Mapped[str] = mapped_column(
String(20), nullable=False, server_default="issued"
)
deployed_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return (
f"<DeviceCertificate id={self.id} "
f"cn={self.common_name!r} status={self.status}>"
)

View File

@@ -0,0 +1,178 @@
"""SQLAlchemy models for config backup tables."""
import uuid
from datetime import datetime
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, LargeBinary, SmallInteger, String, Text, UniqueConstraint, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class ConfigBackupRun(Base):
"""Metadata for a single config backup run.
The actual config content (export.rsc and backup.bin) lives in the tenant's
bare git repository at GIT_STORE_PATH/{tenant_id}.git. This table provides
the timeline view and per-run metadata without duplicating file content in
PostgreSQL.
"""
__tablename__ = "config_backup_runs"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Git commit hash in the tenant's bare repo where this backup is stored.
commit_sha: Mapped[str] = mapped_column(Text, nullable=False)
# Trigger type: 'scheduled' | 'manual' | 'pre-restore' | 'checkpoint' | 'config-change'
trigger_type: Mapped[str] = mapped_column(String(20), nullable=False)
# Lines added/removed vs the prior export.rsc for this device.
# NULL for the first backup (no prior version to diff against).
lines_added: Mapped[int | None] = mapped_column(Integer, nullable=True)
lines_removed: Mapped[int | None] = mapped_column(Integer, nullable=True)
# Encryption metadata: NULL=plaintext, 1=client-side AES-GCM, 2=OpenBao Transit
encryption_tier: Mapped[int | None] = mapped_column(SmallInteger, nullable=True)
# 12-byte AES-GCM nonce for Tier 1 (client-side) backups; NULL for plaintext/Transit
encryption_nonce: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return (
f"<ConfigBackupRun id={self.id} device_id={self.device_id} "
f"trigger={self.trigger_type!r} sha={self.commit_sha[:8]!r}>"
)
class ConfigBackupSchedule(Base):
"""Per-tenant default and per-device override backup schedule config.
A row with device_id=NULL is the tenant-level default (daily at 2am).
A row with a specific device_id overrides the tenant default for that device.
"""
__tablename__ = "config_backup_schedules"
__table_args__ = (
UniqueConstraint("tenant_id", "device_id", name="uq_backup_schedule_tenant_device"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# NULL = tenant-level default schedule; non-NULL = device-specific override.
device_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=True,
)
# Standard cron expression (5 fields). Default: daily at 2am UTC.
cron_expression: Mapped[str] = mapped_column(
String(100),
nullable=False,
default="0 2 * * *",
server_default="0 2 * * *",
)
enabled: Mapped[bool] = mapped_column(
Boolean,
nullable=False,
default=True,
server_default="TRUE",
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
scope = f"device={self.device_id}" if self.device_id else f"tenant={self.tenant_id}"
return f"<ConfigBackupSchedule {scope} cron={self.cron_expression!r} enabled={self.enabled}>"
class ConfigPushOperation(Base):
"""Tracks pending two-phase config push operations for panic-revert recovery.
Before pushing a config, a row is inserted with status='pending_verification'.
If the API pod restarts during the 60-second verification window, the startup
handler checks this table and either commits (deletes the RouterOS scheduler
job) or marks the operation as 'failed'. This prevents the panic-revert
scheduler from firing and reverting a successful push after an API restart.
See Pitfall 6 in 04-RESEARCH.md for the full failure scenario.
"""
__tablename__ = "config_push_operations"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Git commit SHA we'd revert to if the push fails.
pre_push_commit_sha: Mapped[str] = mapped_column(Text, nullable=False)
# RouterOS scheduler job name created on the device for panic-revert.
scheduler_name: Mapped[str] = mapped_column(String(255), nullable=False)
# 'pending_verification' | 'committed' | 'reverted' | 'failed'
status: Mapped[str] = mapped_column(
String(30),
nullable=False,
default="pending_verification",
server_default="pending_verification",
)
started_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
completed_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
def __repr__(self) -> str:
return (
f"<ConfigPushOperation id={self.id} device_id={self.device_id} "
f"status={self.status!r}>"
)

View File

@@ -0,0 +1,153 @@
"""Config template, template tag, and template push job models."""
import uuid
from datetime import datetime
from sqlalchemy import (
DateTime,
Float,
ForeignKey,
String,
Text,
UniqueConstraint,
func,
)
from sqlalchemy.dialects.postgresql import JSON, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class ConfigTemplate(Base):
__tablename__ = "config_templates"
__table_args__ = (
UniqueConstraint("tenant_id", "name", name="uq_config_templates_tenant_name"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
name: Mapped[str] = mapped_column(Text, nullable=False)
description: Mapped[str | None] = mapped_column(Text, nullable=True)
content: Mapped[str] = mapped_column(Text, nullable=False)
variables: Mapped[list] = mapped_column(JSON, nullable=False, default=list, server_default="[]")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False,
)
# Relationships
tenant: Mapped["Tenant"] = relationship("Tenant") # type: ignore[name-defined]
tags: Mapped[list["ConfigTemplateTag"]] = relationship(
"ConfigTemplateTag", back_populates="template", cascade="all, delete-orphan"
)
def __repr__(self) -> str:
return f"<ConfigTemplate id={self.id} name={self.name!r} tenant_id={self.tenant_id}>"
class ConfigTemplateTag(Base):
__tablename__ = "config_template_tags"
__table_args__ = (
UniqueConstraint("template_id", "name", name="uq_config_template_tags_template_name"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
name: Mapped[str] = mapped_column(String(100), nullable=False)
template_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("config_templates.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Relationships
template: Mapped["ConfigTemplate"] = relationship(
"ConfigTemplate", back_populates="tags"
)
def __repr__(self) -> str:
return f"<ConfigTemplateTag id={self.id} name={self.name!r} template_id={self.template_id}>"
class TemplatePushJob(Base):
__tablename__ = "template_push_jobs"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
template_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("config_templates.id", ondelete="SET NULL"),
nullable=True,
)
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
)
rollout_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
nullable=True,
)
rendered_content: Mapped[str] = mapped_column(Text, nullable=False)
status: Mapped[str] = mapped_column(
Text,
nullable=False,
default="pending",
server_default="pending",
)
pre_push_backup_sha: Mapped[str | None] = mapped_column(Text, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
started_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
completed_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
# Relationships
template: Mapped["ConfigTemplate | None"] = relationship("ConfigTemplate")
device: Mapped["Device"] = relationship("Device") # type: ignore[name-defined]
def __repr__(self) -> str:
return f"<TemplatePushJob id={self.id} status={self.status!r} device_id={self.device_id}>"

View File

@@ -0,0 +1,214 @@
"""Device, DeviceGroup, DeviceTag, and membership models."""
import uuid
from datetime import datetime
from enum import Enum
from sqlalchemy import (
Boolean,
DateTime,
Float,
ForeignKey,
Integer,
LargeBinary,
String,
Text,
UniqueConstraint,
func,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class DeviceStatus(str, Enum):
"""Device connection status."""
UNKNOWN = "unknown"
ONLINE = "online"
OFFLINE = "offline"
class Device(Base):
__tablename__ = "devices"
__table_args__ = (
UniqueConstraint("tenant_id", "hostname", name="uq_devices_tenant_hostname"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
hostname: Mapped[str] = mapped_column(String(255), nullable=False)
ip_address: Mapped[str] = mapped_column(String(45), nullable=False) # IPv4 or IPv6
api_port: Mapped[int] = mapped_column(Integer, default=8728, nullable=False)
api_ssl_port: Mapped[int] = mapped_column(Integer, default=8729, nullable=False)
model: Mapped[str | None] = mapped_column(String(255), nullable=True)
serial_number: Mapped[str | None] = mapped_column(String(255), nullable=True)
firmware_version: Mapped[str | None] = mapped_column(String(100), nullable=True)
routeros_version: Mapped[str | None] = mapped_column(String(100), nullable=True)
routeros_major_version: Mapped[int | None] = mapped_column(Integer, nullable=True)
uptime_seconds: Mapped[int | None] = mapped_column(Integer, nullable=True)
last_cpu_load: Mapped[int | None] = mapped_column(Integer, nullable=True)
last_memory_used_pct: Mapped[int | None] = mapped_column(Integer, nullable=True)
architecture: Mapped[str | None] = mapped_column(Text, nullable=True) # CPU arch (arm, arm64, mipsbe, etc.)
preferred_channel: Mapped[str] = mapped_column(
Text, default="stable", server_default="stable", nullable=False
) # Firmware release channel
last_seen: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
# AES-256-GCM encrypted credentials (username + password JSON)
encrypted_credentials: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
# OpenBao Transit ciphertext (dual-write migration)
encrypted_credentials_transit: Mapped[str | None] = mapped_column(Text, nullable=True)
latitude: Mapped[float | None] = mapped_column(Float, nullable=True)
longitude: Mapped[float | None] = mapped_column(Float, nullable=True)
status: Mapped[str] = mapped_column(
String(20),
default=DeviceStatus.UNKNOWN.value,
nullable=False,
)
tls_mode: Mapped[str] = mapped_column(
String(20),
default="auto",
server_default="auto",
nullable=False,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False,
)
# Relationships
tenant: Mapped["Tenant"] = relationship("Tenant", back_populates="devices") # type: ignore[name-defined]
group_memberships: Mapped[list["DeviceGroupMembership"]] = relationship(
"DeviceGroupMembership", back_populates="device", cascade="all, delete-orphan"
)
tag_assignments: Mapped[list["DeviceTagAssignment"]] = relationship(
"DeviceTagAssignment", back_populates="device", cascade="all, delete-orphan"
)
def __repr__(self) -> str:
return f"<Device id={self.id} hostname={self.hostname!r} tenant_id={self.tenant_id}>"
class DeviceGroup(Base):
__tablename__ = "device_groups"
__table_args__ = (
UniqueConstraint("tenant_id", "name", name="uq_device_groups_tenant_name"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
name: Mapped[str] = mapped_column(String(255), nullable=False)
description: Mapped[str | None] = mapped_column(Text, nullable=True)
preferred_channel: Mapped[str] = mapped_column(
Text, default="stable", server_default="stable", nullable=False
) # Firmware release channel for the group
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
# Relationships
tenant: Mapped["Tenant"] = relationship("Tenant", back_populates="device_groups") # type: ignore[name-defined]
memberships: Mapped[list["DeviceGroupMembership"]] = relationship(
"DeviceGroupMembership", back_populates="group", cascade="all, delete-orphan"
)
def __repr__(self) -> str:
return f"<DeviceGroup id={self.id} name={self.name!r} tenant_id={self.tenant_id}>"
class DeviceTag(Base):
__tablename__ = "device_tags"
__table_args__ = (
UniqueConstraint("tenant_id", "name", name="uq_device_tags_tenant_name"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
name: Mapped[str] = mapped_column(String(100), nullable=False)
color: Mapped[str | None] = mapped_column(String(7), nullable=True) # hex color e.g. #FF5733
# Relationships
tenant: Mapped["Tenant"] = relationship("Tenant", back_populates="device_tags") # type: ignore[name-defined]
assignments: Mapped[list["DeviceTagAssignment"]] = relationship(
"DeviceTagAssignment", back_populates="tag", cascade="all, delete-orphan"
)
def __repr__(self) -> str:
return f"<DeviceTag id={self.id} name={self.name!r} tenant_id={self.tenant_id}>"
class DeviceGroupMembership(Base):
__tablename__ = "device_group_memberships"
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
primary_key=True,
)
group_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("device_groups.id", ondelete="CASCADE"),
primary_key=True,
)
# Relationships
device: Mapped["Device"] = relationship("Device", back_populates="group_memberships")
group: Mapped["DeviceGroup"] = relationship("DeviceGroup", back_populates="memberships")
class DeviceTagAssignment(Base):
__tablename__ = "device_tag_assignments"
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
primary_key=True,
)
tag_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("device_tags.id", ondelete="CASCADE"),
primary_key=True,
)
# Relationships
device: Mapped["Device"] = relationship("Device", back_populates="tag_assignments")
tag: Mapped["DeviceTag"] = relationship("DeviceTag", back_populates="assignments")

View File

@@ -0,0 +1,102 @@
"""Firmware version tracking and upgrade job ORM models."""
import uuid
from datetime import datetime
from sqlalchemy import (
BigInteger,
Boolean,
DateTime,
Integer,
Text,
UniqueConstraint,
func,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey
from app.database import Base
class FirmwareVersion(Base):
"""Cached firmware version from MikroTik download server or poller discovery.
Not tenant-scoped — firmware versions are global data shared across all tenants.
"""
__tablename__ = "firmware_versions"
__table_args__ = (
UniqueConstraint("architecture", "channel", "version"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
architecture: Mapped[str] = mapped_column(Text, nullable=False)
channel: Mapped[str] = mapped_column(Text, nullable=False) # "stable", "long-term", "testing"
version: Mapped[str] = mapped_column(Text, nullable=False)
npk_url: Mapped[str] = mapped_column(Text, nullable=False)
npk_local_path: Mapped[str | None] = mapped_column(Text, nullable=True)
npk_size_bytes: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
checked_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return f"<FirmwareVersion arch={self.architecture} ch={self.channel} ver={self.version}>"
class FirmwareUpgradeJob(Base):
"""Tracks a firmware upgrade operation for a single device.
Multiple jobs can share a rollout_group_id for mass upgrades.
"""
__tablename__ = "firmware_upgrade_jobs"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
)
rollout_group_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
nullable=True,
)
target_version: Mapped[str] = mapped_column(Text, nullable=False)
architecture: Mapped[str] = mapped_column(Text, nullable=False)
channel: Mapped[str] = mapped_column(Text, nullable=False)
status: Mapped[str] = mapped_column(
Text, nullable=False, default="pending", server_default="pending"
)
pre_upgrade_backup_sha: Mapped[str | None] = mapped_column(Text, nullable=True)
scheduled_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
confirmed_major_upgrade: Mapped[bool] = mapped_column(
Boolean, nullable=False, default=False, server_default="false"
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return f"<FirmwareUpgradeJob id={self.id} status={self.status} target={self.target_version}>"

View File

@@ -0,0 +1,134 @@
"""Key set and key access log models for zero-knowledge architecture."""
import uuid
from datetime import datetime
from sqlalchemy import DateTime, ForeignKey, Integer, LargeBinary, Text, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class UserKeySet(Base):
"""Encrypted key bundle for a user.
Stores the RSA private key (wrapped by AUK), tenant vault key
(wrapped by AUK), RSA public key, and key derivation salts.
One key set per user (UNIQUE on user_id).
"""
__tablename__ = "user_key_sets"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
user_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
tenant_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=True, # NULL for super_admin
)
encrypted_private_key: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
private_key_nonce: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
encrypted_vault_key: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
vault_key_nonce: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
public_key: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
pbkdf2_iterations: Mapped[int] = mapped_column(
Integer,
server_default=func.literal_column("650000"),
nullable=False,
)
pbkdf2_salt: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
hkdf_salt: Mapped[bytes] = mapped_column(
LargeBinary, nullable=False
)
key_version: Mapped[int] = mapped_column(
Integer,
server_default=func.literal_column("1"),
nullable=False,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
# Relationships
user: Mapped["User"] = relationship("User") # type: ignore[name-defined]
tenant: Mapped["Tenant | None"] = relationship("Tenant") # type: ignore[name-defined]
def __repr__(self) -> str:
return f"<UserKeySet id={self.id} user_id={self.user_id} version={self.key_version}>"
class KeyAccessLog(Base):
"""Immutable audit trail for key operations.
Append-only: INSERT+SELECT only, no UPDATE/DELETE via RLS.
"""
__tablename__ = "key_access_log"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
user_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
)
action: Mapped[str] = mapped_column(Text, nullable=False)
resource_type: Mapped[str | None] = mapped_column(Text, nullable=True)
resource_id: Mapped[str | None] = mapped_column(Text, nullable=True)
key_version: Mapped[int | None] = mapped_column(Integer, nullable=True)
ip_address: Mapped[str | None] = mapped_column(Text, nullable=True)
# Phase 29 extensions for device credential access tracking
device_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id"),
nullable=True,
)
justification: Mapped[str | None] = mapped_column(Text, nullable=True)
correlation_id: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return f"<KeyAccessLog id={self.id} action={self.action!r}>"

View File

@@ -0,0 +1,74 @@
"""Maintenance window ORM model for scheduled maintenance periods.
Maintenance windows allow operators to define time periods during which
alerts are suppressed for specific devices (or all devices in a tenant).
"""
import uuid
from datetime import datetime
from sqlalchemy import Boolean, DateTime, ForeignKey, Text, VARCHAR, func
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class MaintenanceWindow(Base):
"""Scheduled maintenance window with optional alert suppression.
device_ids is a JSONB array of device UUID strings.
An empty array means "all devices in tenant".
"""
__tablename__ = "maintenance_windows"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
)
name: Mapped[str] = mapped_column(VARCHAR(200), nullable=False)
device_ids: Mapped[list] = mapped_column(
JSONB,
nullable=False,
server_default="'[]'::jsonb",
)
start_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
)
end_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
)
suppress_alerts: Mapped[bool] = mapped_column(
Boolean,
nullable=False,
default=True,
server_default="true",
)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
created_by: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="SET NULL"),
nullable=True,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
def __repr__(self) -> str:
return f"<MaintenanceWindow id={self.id} name={self.name!r}>"

View File

@@ -0,0 +1,49 @@
"""Tenant model — represents an MSP client organization."""
import uuid
from datetime import datetime
from sqlalchemy import DateTime, LargeBinary, Integer, String, Text, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class Tenant(Base):
__tablename__ = "tenants"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
name: Mapped[str] = mapped_column(String(255), unique=True, nullable=False, index=True)
description: Mapped[str | None] = mapped_column(Text, nullable=True)
contact_email: Mapped[str | None] = mapped_column(String(255), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False,
)
# Zero-knowledge key management (Phase 28+29)
encrypted_vault_key: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
vault_key_version: Mapped[int | None] = mapped_column(Integer, nullable=True)
openbao_key_name: Mapped[str | None] = mapped_column(Text, nullable=True)
# Relationships — passive_deletes=True lets the DB ON DELETE CASCADE handle cleanup
users: Mapped[list["User"]] = relationship("User", back_populates="tenant", passive_deletes=True) # type: ignore[name-defined]
devices: Mapped[list["Device"]] = relationship("Device", back_populates="tenant", passive_deletes=True) # type: ignore[name-defined]
device_groups: Mapped[list["DeviceGroup"]] = relationship("DeviceGroup", back_populates="tenant", passive_deletes=True) # type: ignore[name-defined]
device_tags: Mapped[list["DeviceTag"]] = relationship("DeviceTag", back_populates="tenant", passive_deletes=True) # type: ignore[name-defined]
def __repr__(self) -> str:
return f"<Tenant id={self.id} name={self.name!r}>"

View File

@@ -0,0 +1,74 @@
"""User model with role-based access control."""
import uuid
from datetime import datetime
from enum import Enum
from sqlalchemy import Boolean, DateTime, ForeignKey, LargeBinary, SmallInteger, String, func, text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class UserRole(str, Enum):
"""User roles with increasing privilege levels."""
SUPER_ADMIN = "super_admin"
TENANT_ADMIN = "tenant_admin"
OPERATOR = "operator"
VIEWER = "viewer"
class User(Base):
__tablename__ = "users"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
email: Mapped[str] = mapped_column(String(255), unique=True, nullable=False, index=True)
hashed_password: Mapped[str | None] = mapped_column(String(255), nullable=True)
name: Mapped[str] = mapped_column(String(255), nullable=False)
role: Mapped[str] = mapped_column(
String(50),
nullable=False,
default=UserRole.VIEWER.value,
)
# tenant_id is nullable for super_admin users (portal-wide role)
tenant_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
# SRP zero-knowledge authentication columns (nullable during migration period)
srp_salt: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
srp_verifier: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
auth_version: Mapped[int] = mapped_column(
SmallInteger, server_default=text("1"), nullable=False
) # 1=bcrypt legacy, 2=SRP
must_upgrade_auth: Mapped[bool] = mapped_column(
Boolean, server_default=text("false"), nullable=False
) # True for bcrypt users who need SRP upgrade
is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
last_login: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False,
)
# Relationships
tenant: Mapped["Tenant | None"] = relationship("Tenant", back_populates="users") # type: ignore[name-defined]
def __repr__(self) -> str:
return f"<User id={self.id} email={self.email!r} role={self.role!r}>"

85
backend/app/models/vpn.py Normal file
View File

@@ -0,0 +1,85 @@
"""VPN configuration and peer models for WireGuard management."""
import uuid
from datetime import datetime
from typing import Optional
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, LargeBinary, String, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class VpnConfig(Base):
"""Per-tenant WireGuard server configuration."""
__tablename__ = "vpn_config"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
server_private_key: Mapped[bytes] = mapped_column(LargeBinary, nullable=False)
server_public_key: Mapped[str] = mapped_column(String(64), nullable=False)
subnet: Mapped[str] = mapped_column(String(32), nullable=False, server_default="10.10.0.0/24")
server_port: Mapped[int] = mapped_column(Integer, nullable=False, server_default="51820")
server_address: Mapped[str] = mapped_column(String(32), nullable=False, server_default="10.10.0.1/24")
endpoint: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
is_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="false")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False, onupdate=func.now()
)
# Peers are queried separately via tenant_id — no ORM relationship needed
class VpnPeer(Base):
"""WireGuard peer representing a device's VPN connection."""
__tablename__ = "vpn_peers"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
tenant_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("tenants.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
device_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("devices.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
peer_private_key: Mapped[bytes] = mapped_column(LargeBinary, nullable=False)
peer_public_key: Mapped[str] = mapped_column(String(64), nullable=False)
preshared_key: Mapped[Optional[bytes]] = mapped_column(LargeBinary, nullable=True)
assigned_ip: Mapped[str] = mapped_column(String(32), nullable=False)
additional_allowed_ips: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
is_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="true")
last_handshake: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False, onupdate=func.now()
)
# Config is queried separately via tenant_id — no ORM relationship needed

View File

@@ -0,0 +1,140 @@
"""Prometheus metrics and health check infrastructure.
Provides:
- setup_instrumentator(): Configures Prometheus auto-instrumentation for FastAPI
- check_health_ready(): Verifies PostgreSQL, Redis, and NATS connectivity for readiness probes
"""
import asyncio
import time
import structlog
from fastapi import FastAPI
from prometheus_fastapi_instrumentator import Instrumentator
logger = structlog.get_logger(__name__)
def setup_instrumentator(app: FastAPI) -> Instrumentator:
"""Configure and mount Prometheus metrics instrumentation.
Auto-instruments all HTTP endpoints with:
- http_requests_total (counter) by method, handler, status_code
- http_request_duration_seconds (histogram) by method, handler
- http_requests_in_progress (gauge)
The /metrics endpoint is mounted at root level (not under /api prefix).
Labels use handler templates (e.g., /api/tenants/{tenant_id}/...) not
resolved paths, ensuring bounded cardinality.
Must be called AFTER all routers are included so all routes are captured.
"""
instrumentator = Instrumentator(
should_group_status_codes=False,
should_ignore_untemplated=True,
excluded_handlers=["/health", "/health/ready", "/metrics", "/api/health"],
should_respect_env_var=False,
)
instrumentator.instrument(app)
instrumentator.expose(app, include_in_schema=False, should_gzip=True)
logger.info("prometheus instrumentation enabled", endpoint="/metrics")
return instrumentator
async def check_health_ready() -> dict:
"""Check readiness by verifying all critical dependencies.
Checks PostgreSQL, Redis, and NATS connectivity with 5-second timeouts.
Returns a structured result with per-dependency status and latency.
Returns:
dict with "status" ("healthy"|"unhealthy"), "version", and "checks"
containing per-dependency results.
"""
from app.config import settings
checks: dict[str, dict] = {}
all_healthy = True
# PostgreSQL check
checks["postgres"] = await _check_postgres()
if checks["postgres"]["status"] != "up":
all_healthy = False
# Redis check
checks["redis"] = await _check_redis(settings.REDIS_URL)
if checks["redis"]["status"] != "up":
all_healthy = False
# NATS check
checks["nats"] = await _check_nats(settings.NATS_URL)
if checks["nats"]["status"] != "up":
all_healthy = False
return {
"status": "healthy" if all_healthy else "unhealthy",
"version": settings.APP_VERSION,
"checks": checks,
}
async def _check_postgres() -> dict:
"""Verify PostgreSQL connectivity via the admin engine."""
start = time.monotonic()
try:
from sqlalchemy import text
from app.database import engine
async with engine.connect() as conn:
await asyncio.wait_for(
conn.execute(text("SELECT 1")),
timeout=5.0,
)
latency_ms = round((time.monotonic() - start) * 1000)
return {"status": "up", "latency_ms": latency_ms, "error": None}
except Exception as exc:
latency_ms = round((time.monotonic() - start) * 1000)
logger.warning("health check: postgres failed", error=str(exc))
return {"status": "down", "latency_ms": latency_ms, "error": str(exc)}
async def _check_redis(redis_url: str) -> dict:
"""Verify Redis connectivity."""
start = time.monotonic()
try:
import redis.asyncio as aioredis
client = aioredis.from_url(redis_url, socket_connect_timeout=5)
try:
await asyncio.wait_for(client.ping(), timeout=5.0)
finally:
await client.aclose()
latency_ms = round((time.monotonic() - start) * 1000)
return {"status": "up", "latency_ms": latency_ms, "error": None}
except Exception as exc:
latency_ms = round((time.monotonic() - start) * 1000)
logger.warning("health check: redis failed", error=str(exc))
return {"status": "down", "latency_ms": latency_ms, "error": str(exc)}
async def _check_nats(nats_url: str) -> dict:
"""Verify NATS connectivity."""
start = time.monotonic()
try:
import nats
nc = await asyncio.wait_for(
nats.connect(nats_url),
timeout=5.0,
)
try:
await nc.drain()
except Exception:
pass
latency_ms = round((time.monotonic() - start) * 1000)
return {"status": "up", "latency_ms": latency_ms, "error": None}
except Exception as exc:
latency_ms = round((time.monotonic() - start) * 1000)
logger.warning("health check: nats failed", error=str(exc))
return {"status": "down", "latency_ms": latency_ms, "error": str(exc)}

View File

@@ -0,0 +1 @@
"""FastAPI routers for all API endpoints."""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,172 @@
"""API key management endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/api-keys:
- List all keys (active + revoked)
- Create new key (returns plaintext once)
- Revoke key (soft delete)
RBAC: tenant_admin or above for all operations.
RLS enforced via get_db() (app_user engine with tenant context).
"""
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services.api_key_service import (
ALLOWED_SCOPES,
create_api_key,
list_api_keys,
revoke_api_key,
)
router = APIRouter(tags=["api-keys"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
# ---------------------------------------------------------------------------
# Request/response schemas
# ---------------------------------------------------------------------------
class ApiKeyCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
scopes: list[str]
expires_at: Optional[datetime] = None
class ApiKeyResponse(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: str
name: str
key_prefix: str
scopes: list[str]
expires_at: Optional[str] = None
last_used_at: Optional[str] = None
created_at: str
revoked_at: Optional[str] = None
class ApiKeyCreateResponse(ApiKeyResponse):
"""Extended response that includes the plaintext key (shown once)."""
key: str
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("/tenants/{tenant_id}/api-keys", response_model=list[ApiKeyResponse])
async def list_keys(
tenant_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
) -> list[dict]:
"""List all API keys for a tenant."""
await _check_tenant_access(current_user, tenant_id, db)
keys = await list_api_keys(db, tenant_id)
# Convert UUID ids to strings for response
for k in keys:
k["id"] = str(k["id"])
return keys
@router.post(
"/tenants/{tenant_id}/api-keys",
response_model=ApiKeyCreateResponse,
status_code=status.HTTP_201_CREATED,
)
async def create_key(
tenant_id: uuid.UUID,
body: ApiKeyCreate,
db: AsyncSession = Depends(get_db),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
) -> dict:
"""Create a new API key. The plaintext key is returned only once."""
await _check_tenant_access(current_user, tenant_id, db)
# Validate scopes against allowed list
invalid_scopes = set(body.scopes) - ALLOWED_SCOPES
if invalid_scopes:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid scopes: {', '.join(sorted(invalid_scopes))}. "
f"Allowed: {', '.join(sorted(ALLOWED_SCOPES))}",
)
if not body.scopes:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="At least one scope is required.",
)
result = await create_api_key(
db=db,
tenant_id=tenant_id,
user_id=current_user.user_id,
name=body.name,
scopes=body.scopes,
expires_at=body.expires_at,
)
return {
"id": str(result["id"]),
"name": result["name"],
"key_prefix": result["key_prefix"],
"key": result["key"],
"scopes": result["scopes"],
"expires_at": result["expires_at"].isoformat() if result["expires_at"] else None,
"last_used_at": None,
"created_at": result["created_at"].isoformat() if result["created_at"] else None,
"revoked_at": None,
}
@router.delete("/tenants/{tenant_id}/api-keys/{key_id}", status_code=status.HTTP_200_OK)
async def revoke_key(
tenant_id: uuid.UUID,
key_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
) -> dict:
"""Revoke an API key (soft delete -- sets revoked_at timestamp)."""
await _check_tenant_access(current_user, tenant_id, db)
success = await revoke_api_key(db, tenant_id, key_id)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="API key not found or already revoked.",
)
return {"status": "revoked", "key_id": str(key_id)}

View File

@@ -0,0 +1,294 @@
"""Audit log API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/ for:
- Paginated, filterable audit log listing
- CSV export of audit logs
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: operator and above can view audit logs.
Phase 30: Audit log details are encrypted at rest via Transit (Tier 2).
When encrypted_details is set, the router decrypts via Transit on-demand
and returns the plaintext details in the response. Structural fields
(action, resource_type, timestamp, ip_address) are always plaintext.
"""
import asyncio
import csv
import io
import json
import logging
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy import and_, func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["audit-logs"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_operator(current_user: CurrentUser) -> None:
"""Raise 403 if user does not have at least operator role."""
allowed = {"super_admin", "admin", "operator"}
if current_user.role not in allowed:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="At least operator role required to view audit logs.",
)
async def _decrypt_audit_details(
encrypted_details: str | None,
plaintext_details: dict[str, Any] | None,
tenant_id: str,
) -> dict[str, Any]:
"""Decrypt encrypted audit log details via Transit, falling back to plaintext.
Priority:
1. If encrypted_details is set, decrypt via Transit and parse as JSON.
2. If decryption fails, return plaintext details as fallback.
3. If neither available, return empty dict.
"""
if encrypted_details:
try:
from app.services.crypto import decrypt_data_transit
decrypted_json = await decrypt_data_transit(encrypted_details, tenant_id)
return json.loads(decrypted_json)
except Exception:
logger.warning(
"Failed to decrypt audit details for tenant %s, using plaintext fallback",
tenant_id,
exc_info=True,
)
# Fall through to plaintext
return plaintext_details if plaintext_details else {}
async def _decrypt_details_batch(
rows: list[Any],
tenant_id: str,
) -> list[dict[str, Any]]:
"""Decrypt encrypted_details for a batch of audit log rows concurrently.
Uses asyncio.gather with limited concurrency to avoid overwhelming OpenBao.
Rows without encrypted_details return their plaintext details directly.
"""
semaphore = asyncio.Semaphore(10) # Limit concurrent Transit calls
async def _decrypt_one(row: Any) -> dict[str, Any]:
async with semaphore:
return await _decrypt_audit_details(
row.get("encrypted_details"),
row.get("details"),
tenant_id,
)
return list(await asyncio.gather(*[_decrypt_one(row) for row in rows]))
# ---------------------------------------------------------------------------
# Response models
# ---------------------------------------------------------------------------
class AuditLogItem(BaseModel):
id: str
user_email: Optional[str] = None
action: str
resource_type: Optional[str] = None
resource_id: Optional[str] = None
device_name: Optional[str] = None
details: dict[str, Any] = {}
ip_address: Optional[str] = None
created_at: str
class AuditLogResponse(BaseModel):
items: list[AuditLogItem]
total: int
page: int
per_page: int
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/audit-logs",
response_model=AuditLogResponse,
summary="List audit logs with pagination and filters",
)
async def list_audit_logs(
tenant_id: uuid.UUID,
page: int = Query(default=1, ge=1),
per_page: int = Query(default=50, ge=1, le=100),
action: Optional[str] = Query(default=None),
user_id: Optional[uuid.UUID] = Query(default=None),
device_id: Optional[uuid.UUID] = Query(default=None),
date_from: Optional[datetime] = Query(default=None),
date_to: Optional[datetime] = Query(default=None),
format: Optional[str] = Query(default=None, description="Set to 'csv' for CSV export"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Any:
_require_operator(current_user)
await _check_tenant_access(current_user, tenant_id, db)
# Build filter conditions using parameterized text fragments
conditions = [text("a.tenant_id = :tenant_id")]
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
if action:
conditions.append(text("a.action = :action"))
params["action"] = action
if user_id:
conditions.append(text("a.user_id = :user_id"))
params["user_id"] = str(user_id)
if device_id:
conditions.append(text("a.device_id = :device_id"))
params["device_id"] = str(device_id)
if date_from:
conditions.append(text("a.created_at >= :date_from"))
params["date_from"] = date_from.isoformat()
if date_to:
conditions.append(text("a.created_at <= :date_to"))
params["date_to"] = date_to.isoformat()
where_clause = and_(*conditions)
# Shared SELECT columns for data queries
_data_columns = text(
"a.id, u.email AS user_email, a.action, a.resource_type, "
"a.resource_id, d.hostname AS device_name, a.details, "
"a.encrypted_details, a.ip_address, a.created_at"
)
_data_from = text(
"audit_logs a "
"LEFT JOIN users u ON a.user_id = u.id "
"LEFT JOIN devices d ON a.device_id = d.id"
)
# Count total
count_result = await db.execute(
select(func.count()).select_from(text("audit_logs a")).where(where_clause),
params,
)
total = count_result.scalar() or 0
# CSV export -- no pagination limit
if format == "csv":
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("a.created_at DESC")),
params,
)
all_rows = result.mappings().all()
# Decrypt encrypted details concurrently
decrypted_details = await _decrypt_details_batch(
all_rows, str(tenant_id)
)
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"ID", "User Email", "Action", "Resource Type",
"Resource ID", "Device", "Details", "IP Address", "Timestamp",
])
for row, details in zip(all_rows, decrypted_details):
details_str = json.dumps(details) if details else "{}"
writer.writerow([
str(row["id"]),
row["user_email"] or "",
row["action"],
row["resource_type"] or "",
row["resource_id"] or "",
row["device_name"] or "",
details_str,
row["ip_address"] or "",
str(row["created_at"]),
])
output.seek(0)
return StreamingResponse(
iter([output.getvalue()]),
media_type="text/csv",
headers={"Content-Disposition": "attachment; filename=audit-logs.csv"},
)
# Paginated query
offset = (page - 1) * per_page
params["limit"] = per_page
params["offset"] = offset
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("a.created_at DESC"))
.limit(per_page)
.offset(offset),
params,
)
rows = result.mappings().all()
# Decrypt encrypted details concurrently (skips rows without encrypted_details)
decrypted_details = await _decrypt_details_batch(rows, str(tenant_id))
items = [
AuditLogItem(
id=str(row["id"]),
user_email=row["user_email"],
action=row["action"],
resource_type=row["resource_type"],
resource_id=row["resource_id"],
device_name=row["device_name"],
details=details,
ip_address=row["ip_address"],
created_at=row["created_at"].isoformat() if row["created_at"] else "",
)
for row, details in zip(rows, decrypted_details)
]
return AuditLogResponse(
items=items,
total=total,
page=page,
per_page=per_page,
)

1052
backend/app/routers/auth.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,763 @@
"""Certificate Authority management API endpoints.
Provides the full certificate lifecycle for tenant CAs:
- CA initialization and info retrieval
- Per-device certificate signing
- Certificate deployment via NATS to Go poller (SFTP + RouterOS import)
- Bulk deployment across multiple devices
- Certificate rotation and revocation
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read-only (GET); tenant_admin and above = mutating actions.
"""
import json
import logging
import uuid
from datetime import datetime, timezone
import nats
import nats.aio.client
import nats.errors
import structlog
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from fastapi.responses import PlainTextResponse
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.certificate import CertificateAuthority, DeviceCertificate
from app.models.device import Device
from app.schemas.certificate import (
BulkCertDeployRequest,
CACreateRequest,
CAResponse,
CertDeployResponse,
CertSignRequest,
DeviceCertResponse,
)
from app.services.audit_service import log_action
from app.services.ca_service import (
generate_ca,
get_ca_for_tenant,
get_cert_for_deploy,
get_device_certs,
sign_device_cert,
update_cert_status,
)
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["certificates"])
# Module-level NATS connection for cert deployment (lazy initialized)
_nc: nats.aio.client.Client | None = None
async def _get_nats() -> nats.aio.client.Client:
"""Get or create a NATS connection for certificate deployment requests."""
global _nc
if _nc is None or _nc.is_closed:
_nc = await nats.connect(settings.NATS_URL)
logger.info("Certificate NATS connection established")
return _nc
async def _deploy_cert_via_nats(
device_id: str,
cert_pem: str,
key_pem: str,
cert_name: str,
ssh_port: int = 22,
) -> dict:
"""Send a certificate deployment request to the Go poller via NATS.
Args:
device_id: Target device UUID string.
cert_pem: PEM-encoded device certificate.
key_pem: PEM-encoded device private key (decrypted).
cert_name: Name for the cert on the device (e.g., "portal-device-cert").
ssh_port: SSH port for SFTP upload (default 22).
Returns:
Dict with success, cert_name_on_device, and error fields.
"""
nc = await _get_nats()
payload = json.dumps({
"device_id": device_id,
"cert_pem": cert_pem,
"key_pem": key_pem,
"cert_name": cert_name,
"ssh_port": ssh_port,
}).encode()
try:
reply = await nc.request(
f"cert.deploy.{device_id}",
payload,
timeout=60.0,
)
return json.loads(reply.data)
except nats.errors.TimeoutError:
return {
"success": False,
"error": "Certificate deployment timed out -- device may be offline or unreachable",
}
except Exception as exc:
logger.error("NATS cert deploy request failed", device_id=device_id, error=str(exc))
return {"success": False, "error": str(exc)}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _get_device_for_tenant(
db: AsyncSession, device_id: uuid.UUID, current_user: CurrentUser
) -> Device:
"""Fetch a device and verify tenant ownership."""
result = await db.execute(
select(Device).where(Device.id == device_id)
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
return device
async def _get_tenant_id(
current_user: CurrentUser,
db: AsyncSession,
tenant_id_override: uuid.UUID | None = None,
) -> uuid.UUID:
"""Extract tenant_id from the current user, handling super_admin.
Super admins must provide tenant_id_override (from query param).
Regular users use their own tenant_id.
"""
if current_user.is_super_admin:
if tenant_id_override is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Super admin must provide tenant_id query parameter.",
)
# Set RLS context for the selected tenant
await set_tenant_context(db, str(tenant_id_override))
return tenant_id_override
if current_user.tenant_id is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No tenant context available.",
)
return current_user.tenant_id
async def _get_cert_with_tenant_check(
db: AsyncSession, cert_id: uuid.UUID, tenant_id: uuid.UUID
) -> DeviceCertificate:
"""Fetch a device certificate and verify tenant ownership."""
result = await db.execute(
select(DeviceCertificate).where(DeviceCertificate.id == cert_id)
)
cert = result.scalar_one_or_none()
if cert is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Certificate {cert_id} not found",
)
# RLS should enforce this, but double-check
if cert.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Certificate {cert_id} not found",
)
return cert
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.post(
"/ca",
response_model=CAResponse,
status_code=status.HTTP_201_CREATED,
summary="Initialize a Certificate Authority for the tenant",
)
@limiter.limit("5/minute")
async def create_ca(
request: Request,
body: CACreateRequest,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> CAResponse:
"""Generate a self-signed root CA for the tenant.
Each tenant may have at most one CA. Returns 409 if a CA already exists.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
# Check if CA already exists
existing = await get_ca_for_tenant(db, tenant_id)
if existing is not None:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Tenant already has a Certificate Authority. Delete it before creating a new one.",
)
ca = await generate_ca(
db,
tenant_id,
body.common_name,
body.validity_years,
settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "ca_create",
resource_type="certificate_authority", resource_id=str(ca.id),
details={"common_name": body.common_name, "validity_years": body.validity_years},
)
except Exception:
pass
logger.info("CA created", tenant_id=str(tenant_id), ca_id=str(ca.id))
return CAResponse.model_validate(ca)
@router.get(
"/ca",
response_model=CAResponse,
summary="Get tenant CA information",
)
async def get_ca(
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> CAResponse:
"""Return the tenant's CA public information (no private key)."""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured for this tenant.",
)
return CAResponse.model_validate(ca)
@router.get(
"/ca/pem",
response_class=PlainTextResponse,
summary="Download the CA public certificate (PEM)",
)
async def get_ca_pem(
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> PlainTextResponse:
"""Return the CA's public certificate in PEM format.
Users can import this into their trust store to validate device connections.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured for this tenant.",
)
return PlainTextResponse(
content=ca.cert_pem,
media_type="application/x-pem-file",
headers={"Content-Disposition": "attachment; filename=portal-ca.pem"},
)
@router.post(
"/sign",
response_model=DeviceCertResponse,
status_code=status.HTTP_201_CREATED,
summary="Sign a certificate for a device",
)
@limiter.limit("20/minute")
async def sign_cert(
request: Request,
body: CertSignRequest,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> DeviceCertResponse:
"""Sign a per-device TLS certificate using the tenant's CA.
The device must belong to the tenant. The cert uses CN=hostname, SAN=IP+DNS.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
# Verify device belongs to tenant (RLS enforces, but also get device data)
device = await _get_device_for_tenant(db, body.device_id, current_user)
# Get tenant CA
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured. Initialize a CA first.",
)
cert = await sign_device_cert(
db,
ca,
body.device_id,
device.hostname,
device.ip_address,
body.validity_days,
settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_sign",
resource_type="device_certificate", resource_id=str(cert.id),
device_id=body.device_id,
details={"hostname": device.hostname, "validity_days": body.validity_days},
)
except Exception:
pass
logger.info("Device cert signed", device_id=str(body.device_id), cert_id=str(cert.id))
return DeviceCertResponse.model_validate(cert)
@router.post(
"/{cert_id}/deploy",
response_model=CertDeployResponse,
summary="Deploy a signed certificate to a device",
)
@limiter.limit("20/minute")
async def deploy_cert(
request: Request,
cert_id: uuid.UUID,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> CertDeployResponse:
"""Deploy a signed certificate to a device via NATS/SFTP.
The Go poller receives the cert, uploads it via SFTP, imports it,
and assigns it to the api-ssl service on the RouterOS device.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
cert = await _get_cert_with_tenant_check(db, cert_id, tenant_id)
# Update status to deploying
try:
await update_cert_status(db, cert_id, "deploying")
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=str(e),
)
# Get decrypted cert data for deployment
try:
cert_pem, key_pem, _ca_cert_pem = await get_cert_for_deploy(
db, cert_id, settings.get_encryption_key_bytes()
)
except ValueError as e:
# Rollback status
await update_cert_status(db, cert_id, "issued")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to prepare cert for deployment: {e}",
)
# Flush DB changes before NATS call so deploying status is persisted
await db.flush()
# Send deployment command via NATS
result = await _deploy_cert_via_nats(
device_id=str(cert.device_id),
cert_pem=cert_pem,
key_pem=key_pem,
cert_name="portal-device-cert",
)
if result.get("success"):
# Update cert status to deployed
await update_cert_status(db, cert_id, "deployed")
# Update device tls_mode to portal_ca
device_result = await db.execute(
select(Device).where(Device.id == cert.device_id)
)
device = device_result.scalar_one_or_none()
if device is not None:
device.tls_mode = "portal_ca"
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_deploy",
resource_type="device_certificate", resource_id=str(cert_id),
device_id=cert.device_id,
details={"cert_name_on_device": result.get("cert_name_on_device")},
)
except Exception:
pass
logger.info(
"Certificate deployed successfully",
cert_id=str(cert_id),
device_id=str(cert.device_id),
cert_name_on_device=result.get("cert_name_on_device"),
)
return CertDeployResponse(
success=True,
device_id=cert.device_id,
cert_name_on_device=result.get("cert_name_on_device"),
)
else:
# Rollback status to issued
await update_cert_status(db, cert_id, "issued")
logger.warning(
"Certificate deployment failed",
cert_id=str(cert_id),
device_id=str(cert.device_id),
error=result.get("error"),
)
return CertDeployResponse(
success=False,
device_id=cert.device_id,
error=result.get("error"),
)
@router.post(
"/deploy/bulk",
response_model=list[CertDeployResponse],
summary="Bulk deploy certificates to multiple devices",
)
@limiter.limit("5/minute")
async def bulk_deploy(
request: Request,
body: BulkCertDeployRequest,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> list[CertDeployResponse]:
"""Deploy certificates to multiple devices sequentially.
For each device: signs a cert if none exists (status=issued), then deploys.
Sequential deployment per project patterns (no concurrent NATS calls).
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
# Get tenant CA
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured. Initialize a CA first.",
)
results: list[CertDeployResponse] = []
for device_id in body.device_ids:
try:
# Get device info
device = await _get_device_for_tenant(db, device_id, current_user)
# Check if device already has an issued cert
existing_certs = await get_device_certs(db, tenant_id, device_id)
issued_cert = None
for c in existing_certs:
if c.status == "issued":
issued_cert = c
break
# Sign a new cert if none exists in issued state
if issued_cert is None:
issued_cert = await sign_device_cert(
db,
ca,
device_id,
device.hostname,
device.ip_address,
730, # Default 2 years
settings.get_encryption_key_bytes(),
)
await db.flush()
# Deploy the cert
await update_cert_status(db, issued_cert.id, "deploying")
cert_pem, key_pem, _ca_cert_pem = await get_cert_for_deploy(
db, issued_cert.id, settings.get_encryption_key_bytes()
)
await db.flush()
result = await _deploy_cert_via_nats(
device_id=str(device_id),
cert_pem=cert_pem,
key_pem=key_pem,
cert_name="portal-device-cert",
)
if result.get("success"):
await update_cert_status(db, issued_cert.id, "deployed")
device.tls_mode = "portal_ca"
results.append(CertDeployResponse(
success=True,
device_id=device_id,
cert_name_on_device=result.get("cert_name_on_device"),
))
else:
await update_cert_status(db, issued_cert.id, "issued")
results.append(CertDeployResponse(
success=False,
device_id=device_id,
error=result.get("error"),
))
except HTTPException as e:
results.append(CertDeployResponse(
success=False,
device_id=device_id,
error=e.detail,
))
except Exception as e:
logger.error("Bulk deploy error", device_id=str(device_id), error=str(e))
results.append(CertDeployResponse(
success=False,
device_id=device_id,
error=str(e),
))
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_bulk_deploy",
resource_type="device_certificate",
details={
"device_count": len(body.device_ids),
"successful": sum(1 for r in results if r.success),
"failed": sum(1 for r in results if not r.success),
},
)
except Exception:
pass
return results
@router.get(
"/devices",
response_model=list[DeviceCertResponse],
summary="List device certificates",
)
async def list_device_certs(
device_id: uuid.UUID | None = Query(None, description="Filter by device ID"),
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> list[DeviceCertResponse]:
"""List device certificates for the tenant.
Optionally filter by device_id. Excludes superseded certs.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
certs = await get_device_certs(db, tenant_id, device_id)
return [DeviceCertResponse.model_validate(c) for c in certs]
@router.post(
"/{cert_id}/revoke",
response_model=DeviceCertResponse,
summary="Revoke a device certificate",
)
@limiter.limit("5/minute")
async def revoke_cert(
request: Request,
cert_id: uuid.UUID,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> DeviceCertResponse:
"""Revoke a device certificate and reset the device TLS mode to insecure."""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
cert = await _get_cert_with_tenant_check(db, cert_id, tenant_id)
try:
updated_cert = await update_cert_status(db, cert_id, "revoked")
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=str(e),
)
# Reset device tls_mode to insecure
device_result = await db.execute(
select(Device).where(Device.id == cert.device_id)
)
device = device_result.scalar_one_or_none()
if device is not None:
device.tls_mode = "insecure"
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_revoke",
resource_type="device_certificate", resource_id=str(cert_id),
device_id=cert.device_id,
)
except Exception:
pass
logger.info("Certificate revoked", cert_id=str(cert_id), device_id=str(cert.device_id))
return DeviceCertResponse.model_validate(updated_cert)
@router.post(
"/{cert_id}/rotate",
response_model=CertDeployResponse,
summary="Rotate a device certificate",
)
@limiter.limit("5/minute")
async def rotate_cert(
request: Request,
cert_id: uuid.UUID,
tenant_id: uuid.UUID | None = Query(None, description="Tenant ID (required for super_admin)"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("tenant_admin")),
db: AsyncSession = Depends(get_db),
) -> CertDeployResponse:
"""Rotate a device certificate: supersede the old cert, sign a new one, and deploy it.
This is equivalent to: mark old cert as superseded, sign new cert, deploy new cert.
"""
tenant_id = await _get_tenant_id(current_user, db, tenant_id)
old_cert = await _get_cert_with_tenant_check(db, cert_id, tenant_id)
# Get the device for hostname/IP
device_result = await db.execute(
select(Device).where(Device.id == old_cert.device_id)
)
device = device_result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {old_cert.device_id} not found",
)
# Get tenant CA
ca = await get_ca_for_tenant(db, tenant_id)
if ca is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Certificate Authority configured.",
)
# Mark old cert as superseded
try:
await update_cert_status(db, cert_id, "superseded")
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=str(e),
)
# Sign new cert
new_cert = await sign_device_cert(
db,
ca,
old_cert.device_id,
device.hostname,
device.ip_address,
730, # Default 2 years
settings.get_encryption_key_bytes(),
)
await db.flush()
# Deploy new cert
await update_cert_status(db, new_cert.id, "deploying")
cert_pem, key_pem, _ca_cert_pem = await get_cert_for_deploy(
db, new_cert.id, settings.get_encryption_key_bytes()
)
await db.flush()
result = await _deploy_cert_via_nats(
device_id=str(old_cert.device_id),
cert_pem=cert_pem,
key_pem=key_pem,
cert_name="portal-device-cert",
)
if result.get("success"):
await update_cert_status(db, new_cert.id, "deployed")
device.tls_mode = "portal_ca"
try:
await log_action(
db, tenant_id, current_user.user_id, "cert_rotate",
resource_type="device_certificate", resource_id=str(new_cert.id),
device_id=old_cert.device_id,
details={
"old_cert_id": str(cert_id),
"cert_name_on_device": result.get("cert_name_on_device"),
},
)
except Exception:
pass
logger.info(
"Certificate rotated successfully",
old_cert_id=str(cert_id),
new_cert_id=str(new_cert.id),
device_id=str(old_cert.device_id),
)
return CertDeployResponse(
success=True,
device_id=old_cert.device_id,
cert_name_on_device=result.get("cert_name_on_device"),
)
else:
# Rollback: mark new cert as issued (deploy failed)
await update_cert_status(db, new_cert.id, "issued")
logger.warning(
"Certificate rotation deploy failed",
new_cert_id=str(new_cert.id),
device_id=str(old_cert.device_id),
error=result.get("error"),
)
return CertDeployResponse(
success=False,
device_id=old_cert.device_id,
error=result.get("error"),
)

View File

@@ -0,0 +1,297 @@
"""
Client device discovery API endpoint.
Fetches ARP, DHCP lease, and wireless registration data from a RouterOS device
via the NATS command proxy, merges by MAC address, and returns a unified client list.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/devices/{device_id}/clients
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer and above (read-only operation).
"""
import asyncio
import uuid
from datetime import datetime, timezone
from typing import Any
import structlog
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.services import routeros_proxy
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["clients"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
async def _check_device_online(
db: AsyncSession, device_id: uuid.UUID
) -> Device:
"""Verify the device exists and is online. Returns the Device object."""
result = await db.execute(
select(Device).where(Device.id == device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
if device.status != "online":
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Device is offline -- client discovery requires a live connection.",
)
return device
# ---------------------------------------------------------------------------
# MAC-address merge logic
# ---------------------------------------------------------------------------
def _normalize_mac(mac: str) -> str:
"""Normalize a MAC address to uppercase colon-separated format."""
return mac.strip().upper().replace("-", ":")
def _merge_client_data(
arp_data: list[dict[str, Any]],
dhcp_data: list[dict[str, Any]],
wireless_data: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Merge ARP, DHCP lease, and wireless registration data by MAC address.
ARP entries are the base. DHCP enriches with hostname. Wireless enriches
with signal/tx/rx/uptime and marks the client as wireless.
"""
# Index DHCP leases by MAC
dhcp_by_mac: dict[str, dict[str, Any]] = {}
for lease in dhcp_data:
mac_raw = lease.get("mac-address") or lease.get("active-mac-address", "")
if mac_raw:
dhcp_by_mac[_normalize_mac(mac_raw)] = lease
# Index wireless registrations by MAC
wireless_by_mac: dict[str, dict[str, Any]] = {}
for reg in wireless_data:
mac_raw = reg.get("mac-address", "")
if mac_raw:
wireless_by_mac[_normalize_mac(mac_raw)] = reg
# Track which MACs we've already processed (from ARP)
seen_macs: set[str] = set()
clients: list[dict[str, Any]] = []
# Start with ARP entries as base
for entry in arp_data:
mac_raw = entry.get("mac-address", "")
if not mac_raw:
continue
mac = _normalize_mac(mac_raw)
if mac in seen_macs:
continue
seen_macs.add(mac)
# Determine status: ARP complete flag or dynamic flag
is_complete = entry.get("complete", "true").lower() == "true"
arp_status = "reachable" if is_complete else "stale"
client: dict[str, Any] = {
"mac": mac,
"ip": entry.get("address", ""),
"interface": entry.get("interface", ""),
"hostname": None,
"status": arp_status,
"signal_strength": None,
"tx_rate": None,
"rx_rate": None,
"uptime": None,
"is_wireless": False,
}
# Enrich with DHCP data
dhcp = dhcp_by_mac.get(mac)
if dhcp:
client["hostname"] = dhcp.get("host-name") or None
dhcp_status = dhcp.get("status", "")
if dhcp_status:
client["dhcp_status"] = dhcp_status
# Enrich with wireless data
wireless = wireless_by_mac.get(mac)
if wireless:
client["is_wireless"] = True
client["signal_strength"] = wireless.get("signal-strength") or None
client["tx_rate"] = wireless.get("tx-rate") or None
client["rx_rate"] = wireless.get("rx-rate") or None
client["uptime"] = wireless.get("uptime") or None
clients.append(client)
# Also include DHCP-only entries (no ARP match -- e.g. expired leases)
for mac, lease in dhcp_by_mac.items():
if mac in seen_macs:
continue
seen_macs.add(mac)
client = {
"mac": mac,
"ip": lease.get("active-address") or lease.get("address", ""),
"interface": lease.get("active-server") or "",
"hostname": lease.get("host-name") or None,
"status": "stale", # No ARP entry = not actively reachable
"signal_strength": None,
"tx_rate": None,
"rx_rate": None,
"uptime": None,
"is_wireless": mac in wireless_by_mac,
}
wireless = wireless_by_mac.get(mac)
if wireless:
client["signal_strength"] = wireless.get("signal-strength") or None
client["tx_rate"] = wireless.get("tx-rate") or None
client["rx_rate"] = wireless.get("rx-rate") or None
client["uptime"] = wireless.get("uptime") or None
clients.append(client)
return clients
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/clients",
summary="List connected client devices (ARP + DHCP + wireless)",
)
async def list_clients(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Discover all client devices connected to a MikroTik device.
Fetches ARP table, DHCP server leases, and wireless registration table
in parallel, then merges by MAC address into a unified client list.
Wireless fetch failure is non-fatal (device may not have wireless interfaces).
DHCP fetch failure is non-fatal (device may not run a DHCP server).
ARP fetch failure is fatal (core data source).
"""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
device_id_str = str(device_id)
# Fetch all three sources in parallel
arp_result, dhcp_result, wireless_result = await asyncio.gather(
routeros_proxy.execute_command(device_id_str, "/ip/arp/print"),
routeros_proxy.execute_command(device_id_str, "/ip/dhcp-server/lease/print"),
routeros_proxy.execute_command(
device_id_str, "/interface/wireless/registration-table/print"
),
return_exceptions=True,
)
# ARP is required -- if it failed, return 502
if isinstance(arp_result, Exception):
logger.error("ARP fetch exception", device_id=device_id_str, error=str(arp_result))
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Failed to fetch ARP table: {arp_result}",
)
if not arp_result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=arp_result.get("error", "Failed to fetch ARP table"),
)
arp_data: list[dict[str, Any]] = arp_result.get("data", [])
# DHCP is optional -- log warning and continue with empty data
dhcp_data: list[dict[str, Any]] = []
if isinstance(dhcp_result, Exception):
logger.warning(
"DHCP fetch exception (continuing without DHCP data)",
device_id=device_id_str,
error=str(dhcp_result),
)
elif not dhcp_result.get("success"):
logger.warning(
"DHCP fetch failed (continuing without DHCP data)",
device_id=device_id_str,
error=dhcp_result.get("error"),
)
else:
dhcp_data = dhcp_result.get("data", [])
# Wireless is optional -- many devices have no wireless interfaces
wireless_data: list[dict[str, Any]] = []
if isinstance(wireless_result, Exception):
logger.warning(
"Wireless fetch exception (device may not have wireless interfaces)",
device_id=device_id_str,
error=str(wireless_result),
)
elif not wireless_result.get("success"):
logger.warning(
"Wireless fetch failed (device may not have wireless interfaces)",
device_id=device_id_str,
error=wireless_result.get("error"),
)
else:
wireless_data = wireless_result.get("data", [])
# Merge by MAC address
clients = _merge_client_data(arp_data, dhcp_data, wireless_data)
logger.info(
"client_discovery_complete",
device_id=device_id_str,
tenant_id=str(tenant_id),
arp_count=len(arp_data),
dhcp_count=len(dhcp_data),
wireless_count=len(wireless_data),
merged_count=len(clients),
)
return {
"clients": clients,
"device_id": device_id_str,
"timestamp": datetime.now(timezone.utc).isoformat(),
}

View File

@@ -0,0 +1,745 @@
"""
Config backup API endpoints.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/devices/{device_id}/config/
Provides:
- GET /backups — list backup timeline
- POST /backups — trigger manual backup
- POST /checkpoint — create a checkpoint (restore point)
- GET /backups/{sha}/export — retrieve export.rsc text
- GET /backups/{sha}/binary — download backup.bin
- POST /preview-restore — preview impact analysis before restore
- POST /restore — restore a config version (two-phase panic-revert)
- POST /emergency-rollback — rollback to most recent pre-push backup
- GET /schedules — view effective backup schedule
- PUT /schedules — create/update device-specific schedule override
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read-only (GET); operator and above = write (POST/PUT).
"""
import asyncio
import logging
import uuid
from datetime import timezone, datetime
from typing import Any
from fastapi import APIRouter, Depends, HTTPException, Request, status
from fastapi.responses import Response
from pydantic import BaseModel, ConfigDict
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role, require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.config_backup import ConfigBackupRun, ConfigBackupSchedule
from app.config import settings
from app.models.device import Device
from app.services import backup_service, git_store
from app.services import restore_service
from app.services.crypto import decrypt_credentials_hybrid
from app.services.rsc_parser import parse_rsc, validate_rsc, compute_impact
logger = logging.getLogger(__name__)
router = APIRouter(tags=["config-backups"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""
Verify the current user is allowed to access the given tenant.
- super_admin can access any tenant — re-sets DB tenant context to target tenant.
- All other roles must match their own tenant_id.
"""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# ---------------------------------------------------------------------------
# Request/Response schemas
# ---------------------------------------------------------------------------
class RestoreRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
commit_sha: str
class ScheduleUpdate(BaseModel):
model_config = ConfigDict(extra="forbid")
cron_expression: str
enabled: bool
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/backups",
summary="List backup timeline for a device",
dependencies=[require_scope("config:read")],
)
async def list_backups(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return backup timeline for a device, newest first.
Each entry includes: id, commit_sha, trigger_type, lines_added,
lines_removed, and created_at.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigBackupRun)
.where(
ConfigBackupRun.device_id == device_id, # type: ignore[arg-type]
ConfigBackupRun.tenant_id == tenant_id, # type: ignore[arg-type]
)
.order_by(ConfigBackupRun.created_at.desc())
)
runs = result.scalars().all()
return [
{
"id": str(run.id),
"commit_sha": run.commit_sha,
"trigger_type": run.trigger_type,
"lines_added": run.lines_added,
"lines_removed": run.lines_removed,
"encryption_tier": run.encryption_tier,
"created_at": run.created_at.isoformat(),
}
for run in runs
]
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/backups",
summary="Trigger a manual config backup",
status_code=status.HTTP_201_CREATED,
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def trigger_backup(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Trigger an immediate manual backup for a device.
Captures export.rsc and backup.bin via SSH, commits to the tenant's
git store, and records a ConfigBackupRun with trigger_type='manual'.
Returns the backup metadata dict.
"""
await _check_tenant_access(current_user, tenant_id, db)
try:
result = await backup_service.run_backup(
device_id=str(device_id),
tenant_id=str(tenant_id),
trigger_type="manual",
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Manual backup failed for device %s tenant %s: %s",
device_id,
tenant_id,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Backup failed: {exc}",
) from exc
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/checkpoint",
summary="Create a checkpoint (restore point) of the current config",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def create_checkpoint(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Create a checkpoint (restore point) of the current device config.
Identical to a manual backup but tagged with trigger_type='checkpoint'.
Checkpoints serve as named restore points that operators create before
making risky changes, so they can easily roll back.
"""
await _check_tenant_access(current_user, tenant_id, db)
try:
result = await backup_service.run_backup(
device_id=str(device_id),
tenant_id=str(tenant_id),
trigger_type="checkpoint",
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Checkpoint backup failed for device %s tenant %s: %s",
device_id,
tenant_id,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Checkpoint failed: {exc}",
) from exc
return result
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/backups/{commit_sha}/export",
summary="Get export.rsc text for a specific backup",
response_class=Response,
dependencies=[require_scope("config:read")],
)
async def get_export(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
commit_sha: str,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> Response:
"""Return the raw /export compact text for a specific backup version.
For encrypted backups (encryption_tier != NULL), the Transit ciphertext
stored in git is decrypted on-demand before returning plaintext.
Legacy plaintext backups (encryption_tier = NULL) are returned as-is.
Content-Type: text/plain
"""
await _check_tenant_access(current_user, tenant_id, db)
loop = asyncio.get_event_loop()
try:
content_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
commit_sha,
str(device_id),
"export.rsc",
)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Backup version not found: {exc}",
) from exc
# Check if this backup is encrypted — decrypt via Transit if so
result = await db.execute(
select(ConfigBackupRun).where(
ConfigBackupRun.commit_sha == commit_sha,
ConfigBackupRun.device_id == device_id,
)
)
backup_run = result.scalar_one_or_none()
if backup_run and backup_run.encryption_tier:
try:
from app.services.crypto import decrypt_data_transit
plaintext = await decrypt_data_transit(
content_bytes.decode("utf-8"), str(tenant_id)
)
content_bytes = plaintext.encode("utf-8")
except Exception as dec_err:
logger.error(
"Failed to decrypt export for device %s sha %s: %s",
device_id, commit_sha, dec_err,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to decrypt backup content",
) from dec_err
return Response(content=content_bytes, media_type="text/plain")
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/backups/{commit_sha}/binary",
summary="Download backup.bin for a specific backup",
response_class=Response,
dependencies=[require_scope("config:read")],
)
async def get_binary(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
commit_sha: str,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> Response:
"""Download the RouterOS binary backup file for a specific backup version.
For encrypted backups, the Transit ciphertext is decrypted and the
base64-encoded binary is decoded back to raw bytes before returning.
Legacy plaintext backups are returned as-is.
Content-Type: application/octet-stream (attachment download).
"""
await _check_tenant_access(current_user, tenant_id, db)
loop = asyncio.get_event_loop()
try:
content_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
commit_sha,
str(device_id),
"backup.bin",
)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Backup version not found: {exc}",
) from exc
# Check if this backup is encrypted — decrypt via Transit if so
result = await db.execute(
select(ConfigBackupRun).where(
ConfigBackupRun.commit_sha == commit_sha,
ConfigBackupRun.device_id == device_id,
)
)
backup_run = result.scalar_one_or_none()
if backup_run and backup_run.encryption_tier:
try:
import base64 as b64
from app.services.crypto import decrypt_data_transit
# Transit ciphertext -> base64-encoded binary -> raw bytes
b64_plaintext = await decrypt_data_transit(
content_bytes.decode("utf-8"), str(tenant_id)
)
content_bytes = b64.b64decode(b64_plaintext)
except Exception as dec_err:
logger.error(
"Failed to decrypt binary backup for device %s sha %s: %s",
device_id, commit_sha, dec_err,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to decrypt backup content",
) from dec_err
return Response(
content=content_bytes,
media_type="application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="backup-{commit_sha[:8]}.bin"'
},
)
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/preview-restore",
summary="Preview the impact of restoring a config backup",
dependencies=[require_scope("config:read")],
)
@limiter.limit("20/minute")
async def preview_restore(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: RestoreRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Preview the impact of restoring a config backup before executing.
Reads the target config from the git backup, fetches the current config
from the live device (falling back to the latest backup if unreachable),
and returns a diff with categories, risk levels, warnings, and validation.
"""
await _check_tenant_access(current_user, tenant_id, db)
loop = asyncio.get_event_loop()
# 1. Read target export from git
try:
target_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
body.commit_sha,
str(device_id),
"export.rsc",
)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Backup export not found: {exc}",
) from exc
target_text = target_bytes.decode("utf-8", errors="replace")
# 2. Get current export from device (live) or fallback to latest backup
current_text = ""
try:
result = await db.execute(
select(Device).where(Device.id == device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device and (device.encrypted_credentials_transit or device.encrypted_credentials):
key = settings.get_encryption_key_bytes()
creds_json = await decrypt_credentials_hybrid(
device.encrypted_credentials_transit,
device.encrypted_credentials,
str(tenant_id),
key,
)
import json
creds = json.loads(creds_json)
current_text = await backup_service.capture_export(
device.ip_address,
username=creds.get("username", "admin"),
password=creds.get("password", ""),
)
except Exception:
# Fallback to latest backup in git
logger.debug(
"Live export failed for device %s, falling back to latest backup",
device_id,
)
latest = await db.execute(
select(ConfigBackupRun)
.where(
ConfigBackupRun.device_id == device_id, # type: ignore[arg-type]
)
.order_by(ConfigBackupRun.created_at.desc())
.limit(1)
)
latest_run = latest.scalar_one_or_none()
if latest_run:
try:
current_bytes = await loop.run_in_executor(
None,
git_store.read_file,
str(tenant_id),
latest_run.commit_sha,
str(device_id),
"export.rsc",
)
current_text = current_bytes.decode("utf-8", errors="replace")
except Exception:
current_text = ""
# 3. Parse and analyze
current_parsed = parse_rsc(current_text)
target_parsed = parse_rsc(target_text)
validation = validate_rsc(target_text)
impact = compute_impact(current_parsed, target_parsed)
return {
"diff": impact["diff"],
"categories": impact["categories"],
"warnings": impact["warnings"],
"validation": validation,
}
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/restore",
summary="Restore a config version (two-phase push with panic-revert)",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def restore_config_endpoint(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: RestoreRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Restore a device config to a specific backup version.
Implements two-phase push with panic-revert:
1. Pre-backup is taken on device (mandatory before any push)
2. RouterOS scheduler is installed as safety net (auto-reverts if unreachable)
3. Config is pushed via /import
4. Wait 60s for config to settle
5. Reachability check — remove scheduler if device is reachable
6. Return committed/reverted/failed status
Returns: {"status": str, "message": str, "pre_backup_sha": str}
"""
await _check_tenant_access(current_user, tenant_id, db)
try:
result = await restore_service.restore_config(
device_id=str(device_id),
tenant_id=str(tenant_id),
commit_sha=body.commit_sha,
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Restore failed for device %s tenant %s commit %s: %s",
device_id,
tenant_id,
body.commit_sha,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Restore failed: {exc}",
) from exc
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config/emergency-rollback",
summary="Emergency rollback to most recent pre-push backup",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def emergency_rollback(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Emergency rollback: restore the most recent pre-push backup.
Used when a device goes offline after a config push.
Finds the latest 'pre-restore', 'checkpoint', or 'pre-template-push'
backup and restores it via the two-phase panic-revert process.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigBackupRun)
.where(
ConfigBackupRun.device_id == device_id, # type: ignore[arg-type]
ConfigBackupRun.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupRun.trigger_type.in_(
["pre-restore", "checkpoint", "pre-template-push"]
),
)
.order_by(ConfigBackupRun.created_at.desc())
.limit(1)
)
backup = result.scalar_one_or_none()
if not backup:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No pre-push backup found for rollback",
)
try:
restore_result = await restore_service.restore_config(
device_id=str(device_id),
tenant_id=str(tenant_id),
commit_sha=backup.commit_sha,
db_session=db,
)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except Exception as exc:
logger.error(
"Emergency rollback failed for device %s tenant %s: %s",
device_id,
tenant_id,
exc,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Emergency rollback failed: {exc}",
) from exc
return {
**restore_result,
"rolled_back_to": backup.commit_sha,
"rolled_back_to_date": backup.created_at.isoformat(),
}
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config/schedules",
summary="Get effective backup schedule for a device",
dependencies=[require_scope("config:read")],
)
async def get_schedule(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Return the effective backup schedule for a device.
Returns the device-specific override if it exists; falls back to the
tenant-level default. If no schedule is configured, returns a synthetic
default (2am UTC daily, enabled=True).
"""
await _check_tenant_access(current_user, tenant_id, db)
# Check for device-specific override first
result = await db.execute(
select(ConfigBackupSchedule).where(
ConfigBackupSchedule.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupSchedule.device_id == device_id, # type: ignore[arg-type]
)
)
schedule = result.scalar_one_or_none()
if schedule is None:
# Fall back to tenant-level default
result = await db.execute(
select(ConfigBackupSchedule).where(
ConfigBackupSchedule.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupSchedule.device_id.is_(None), # type: ignore[union-attr]
)
)
schedule = result.scalar_one_or_none()
if schedule is None:
# No schedule configured — return synthetic default
return {
"id": None,
"tenant_id": str(tenant_id),
"device_id": str(device_id),
"cron_expression": "0 2 * * *",
"enabled": True,
"is_default": True,
}
is_device_specific = schedule.device_id is not None
return {
"id": str(schedule.id),
"tenant_id": str(schedule.tenant_id),
"device_id": str(schedule.device_id) if schedule.device_id else None,
"cron_expression": schedule.cron_expression,
"enabled": schedule.enabled,
"is_default": not is_device_specific,
}
@router.put(
"/tenants/{tenant_id}/devices/{device_id}/config/schedules",
summary="Create or update the device-specific backup schedule",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def update_schedule(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: ScheduleUpdate,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
"""Create or update the device-specific backup schedule override.
If no device-specific schedule exists, creates one. If one exists, updates
its cron_expression and enabled fields.
Returns the updated schedule.
"""
await _check_tenant_access(current_user, tenant_id, db)
# Look for existing device-specific schedule
result = await db.execute(
select(ConfigBackupSchedule).where(
ConfigBackupSchedule.tenant_id == tenant_id, # type: ignore[arg-type]
ConfigBackupSchedule.device_id == device_id, # type: ignore[arg-type]
)
)
schedule = result.scalar_one_or_none()
if schedule is None:
# Create new device-specific schedule
schedule = ConfigBackupSchedule(
tenant_id=tenant_id,
device_id=device_id,
cron_expression=body.cron_expression,
enabled=body.enabled,
)
db.add(schedule)
else:
# Update existing schedule
schedule.cron_expression = body.cron_expression
schedule.enabled = body.enabled
await db.flush()
# Hot-reload the scheduler so changes take effect immediately
from app.services.backup_scheduler import on_schedule_change
await on_schedule_change(tenant_id, device_id)
return {
"id": str(schedule.id),
"tenant_id": str(schedule.tenant_id),
"device_id": str(schedule.device_id),
"cron_expression": schedule.cron_expression,
"enabled": schedule.enabled,
"is_default": False,
}

View File

@@ -0,0 +1,371 @@
"""
Dynamic RouterOS config editor API endpoints.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/devices/{device_id}/config-editor/
Proxies commands to the Go poller's CmdResponder via the RouterOS proxy service.
Provides:
- GET /browse -- browse a RouterOS menu path
- POST /add -- add a new entry
- POST /set -- edit an existing entry
- POST /remove -- delete an entry
- POST /execute -- execute an arbitrary CLI command
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read-only (GET browse); operator and above = write (POST).
"""
import uuid
import structlog
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role, require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.security.command_blocklist import check_command_safety, check_path_safety
from app.services import routeros_proxy
from app.services.audit_service import log_action
logger = structlog.get_logger(__name__)
audit_logger = structlog.get_logger("audit")
router = APIRouter(tags=["config-editor"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
from app.database import set_tenant_context
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# Set RLS context for regular users too
await set_tenant_context(db, str(tenant_id))
async def _check_device_online(
db: AsyncSession, device_id: uuid.UUID
) -> Device:
"""Verify the device exists and is online. Returns the Device object."""
result = await db.execute(
select(Device).where(Device.id == device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
if device.status != "online":
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Device is offline \u2014 config editor requires a live connection.",
)
return device
# ---------------------------------------------------------------------------
# Request schemas
# ---------------------------------------------------------------------------
class AddEntryRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
properties: dict[str, str]
class SetEntryRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
entry_id: str | None = None # Optional for singleton paths (e.g. /ip/dns)
properties: dict[str, str]
class RemoveEntryRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
path: str
entry_id: str
class ExecuteRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
command: str
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/browse",
summary="Browse a RouterOS menu path",
dependencies=[require_scope("config:read")],
)
async def browse_menu(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
path: str = Query("/interface", description="RouterOS menu path to browse"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Browse a RouterOS menu path and return all entries at that path."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(path)
result = await routeros_proxy.browse_menu(str(device_id), path)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to browse menu path"),
)
audit_logger.info(
"routeros_config_browsed",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
path=path,
)
return {
"success": True,
"entries": result.get("data", []),
"error": None,
"path": path,
}
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/add",
summary="Add a new entry to a RouterOS menu path",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def add_entry(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: AddEntryRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Add a new entry to a RouterOS menu path with the given properties."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(body.path, write=True)
result = await routeros_proxy.add_entry(str(device_id), body.path, body.properties)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to add entry"),
)
audit_logger.info(
"routeros_config_added",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
path=body.path,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_add",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"path": body.path, "properties": body.properties},
)
except Exception:
pass
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/set",
summary="Edit an existing entry in a RouterOS menu path",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def set_entry(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: SetEntryRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Update an existing entry's properties on the device."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(body.path, write=True)
result = await routeros_proxy.update_entry(
str(device_id), body.path, body.entry_id, body.properties
)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to update entry"),
)
audit_logger.info(
"routeros_config_modified",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
path=body.path,
entry_id=body.entry_id,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_set",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"path": body.path, "entry_id": body.entry_id, "properties": body.properties},
)
except Exception:
pass
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/remove",
summary="Delete an entry from a RouterOS menu path",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def remove_entry(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: RemoveEntryRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Remove an entry from a RouterOS menu path."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_path_safety(body.path, write=True)
result = await routeros_proxy.remove_entry(
str(device_id), body.path, body.entry_id
)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to remove entry"),
)
audit_logger.info(
"routeros_config_removed",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
path=body.path,
entry_id=body.entry_id,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_remove",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"path": body.path, "entry_id": body.entry_id},
)
except Exception:
pass
return result
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/config-editor/execute",
summary="Execute an arbitrary RouterOS CLI command",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def execute_command(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: ExecuteRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Execute an arbitrary RouterOS CLI command on the device."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_online(db, device_id)
check_command_safety(body.command)
result = await routeros_proxy.execute_cli(str(device_id), body.command)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=result.get("error", "Failed to execute command"),
)
audit_logger.info(
"routeros_command_executed",
device_id=str(device_id),
tenant_id=str(tenant_id),
user_id=str(current_user.user_id),
user_role=current_user.role,
command=body.command,
success=result.get("success", False),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "config_execute",
resource_type="config", resource_id=str(device_id),
device_id=device_id,
details={"command": body.command},
)
except Exception:
pass
return result

View File

@@ -0,0 +1,94 @@
"""
Device group management API endpoints.
Routes: /api/tenants/{tenant_id}/device-groups
RBAC:
- viewer: GET (read-only)
- operator: POST, PUT (write)
- tenant_admin/admin: DELETE
"""
import uuid
from fastapi import APIRouter, Depends, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_operator_or_above, require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.routers.devices import _check_tenant_access
from app.schemas.device import DeviceGroupCreate, DeviceGroupResponse, DeviceGroupUpdate
from app.services import device as device_service
router = APIRouter(tags=["device-groups"])
@router.get(
"/tenants/{tenant_id}/device-groups",
response_model=list[DeviceGroupResponse],
summary="List device groups",
)
async def list_groups(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[DeviceGroupResponse]:
"""List all device groups for a tenant. Viewer role and above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.get_groups(db=db, tenant_id=tenant_id)
@router.post(
"/tenants/{tenant_id}/device-groups",
response_model=DeviceGroupResponse,
status_code=status.HTTP_201_CREATED,
summary="Create a device group",
dependencies=[Depends(require_operator_or_above)],
)
async def create_group(
tenant_id: uuid.UUID,
data: DeviceGroupCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceGroupResponse:
"""Create a new device group. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.create_group(db=db, tenant_id=tenant_id, data=data)
@router.put(
"/tenants/{tenant_id}/device-groups/{group_id}",
response_model=DeviceGroupResponse,
summary="Update a device group",
dependencies=[Depends(require_operator_or_above)],
)
async def update_group(
tenant_id: uuid.UUID,
group_id: uuid.UUID,
data: DeviceGroupUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceGroupResponse:
"""Update a device group. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.update_group(
db=db, tenant_id=tenant_id, group_id=group_id, data=data
)
@router.delete(
"/tenants/{tenant_id}/device-groups/{group_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a device group",
dependencies=[Depends(require_tenant_admin_or_above)],
)
async def delete_group(
tenant_id: uuid.UUID,
group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Delete a device group. Requires tenant_admin or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.delete_group(db=db, tenant_id=tenant_id, group_id=group_id)

View File

@@ -0,0 +1,150 @@
"""
Device syslog fetch endpoint via NATS RouterOS proxy.
Provides:
- GET /tenants/{tenant_id}/devices/{device_id}/logs -- fetch device log entries
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: viewer and above can read logs.
"""
import uuid
import structlog
from fastapi import APIRouter, Depends, HTTPException, Query, status
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services import routeros_proxy
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["device-logs"])
# ---------------------------------------------------------------------------
# Helpers (same pattern as config_editor.py)
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
async def _check_device_exists(
db: AsyncSession, device_id: uuid.UUID
) -> None:
"""Verify the device exists (does not require online status for logs)."""
from sqlalchemy import select
from app.models.device import Device
result = await db.execute(
select(Device).where(Device.id == device_id)
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id} not found",
)
# ---------------------------------------------------------------------------
# Response model
# ---------------------------------------------------------------------------
class LogEntry(BaseModel):
time: str
topics: str
message: str
class LogsResponse(BaseModel):
logs: list[LogEntry]
device_id: str
count: int
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/logs",
response_model=LogsResponse,
summary="Fetch device syslog entries via RouterOS API",
dependencies=[Depends(require_min_role("viewer"))],
)
async def get_device_logs(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
limit: int = Query(default=100, ge=1, le=500),
topic: str | None = Query(default=None, description="Filter by log topic"),
search: str | None = Query(default=None, description="Search in message/topics"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> LogsResponse:
"""Fetch device log entries via the RouterOS /log/print command."""
await _check_tenant_access(current_user, tenant_id, db)
await _check_device_exists(db, device_id)
# Build RouterOS command args
args = [f"=count={limit}"]
if topic:
args.append(f"?topics={topic}")
result = await routeros_proxy.execute_command(
str(device_id), "/log/print", args=args, timeout=15.0
)
if not result.get("success"):
error_msg = result.get("error", "Unknown error fetching logs")
logger.warning(
"failed to fetch device logs",
device_id=str(device_id),
error=error_msg,
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Failed to fetch device logs: {error_msg}",
)
# Parse log entries from RouterOS response
raw_entries = result.get("data", [])
logs: list[LogEntry] = []
for entry in raw_entries:
log_entry = LogEntry(
time=entry.get("time", ""),
topics=entry.get("topics", ""),
message=entry.get("message", ""),
)
# Apply search filter (case-insensitive) if provided
if search:
search_lower = search.lower()
if (
search_lower not in log_entry.message.lower()
and search_lower not in log_entry.topics.lower()
):
continue
logs.append(log_entry)
return LogsResponse(
logs=logs,
device_id=str(device_id),
count=len(logs),
)

View File

@@ -0,0 +1,94 @@
"""
Device tag management API endpoints.
Routes: /api/tenants/{tenant_id}/device-tags
RBAC:
- viewer: GET (read-only)
- operator: POST, PUT (write)
- tenant_admin/admin: DELETE
"""
import uuid
from fastapi import APIRouter, Depends, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.rbac import require_operator_or_above, require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.routers.devices import _check_tenant_access
from app.schemas.device import DeviceTagCreate, DeviceTagResponse, DeviceTagUpdate
from app.services import device as device_service
router = APIRouter(tags=["device-tags"])
@router.get(
"/tenants/{tenant_id}/device-tags",
response_model=list[DeviceTagResponse],
summary="List device tags",
)
async def list_tags(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[DeviceTagResponse]:
"""List all device tags for a tenant. Viewer role and above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.get_tags(db=db, tenant_id=tenant_id)
@router.post(
"/tenants/{tenant_id}/device-tags",
response_model=DeviceTagResponse,
status_code=status.HTTP_201_CREATED,
summary="Create a device tag",
dependencies=[Depends(require_operator_or_above)],
)
async def create_tag(
tenant_id: uuid.UUID,
data: DeviceTagCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceTagResponse:
"""Create a new device tag. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.create_tag(db=db, tenant_id=tenant_id, data=data)
@router.put(
"/tenants/{tenant_id}/device-tags/{tag_id}",
response_model=DeviceTagResponse,
summary="Update a device tag",
dependencies=[Depends(require_operator_or_above)],
)
async def update_tag(
tenant_id: uuid.UUID,
tag_id: uuid.UUID,
data: DeviceTagUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceTagResponse:
"""Update a device tag. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.update_tag(
db=db, tenant_id=tenant_id, tag_id=tag_id, data=data
)
@router.delete(
"/tenants/{tenant_id}/device-tags/{tag_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a device tag",
dependencies=[Depends(require_tenant_admin_or_above)],
)
async def delete_tag(
tenant_id: uuid.UUID,
tag_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Delete a device tag. Requires tenant_admin or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.delete_tag(db=db, tenant_id=tenant_id, tag_id=tag_id)

View File

@@ -0,0 +1,452 @@
"""
Device management API endpoints.
All routes are tenant-scoped under /api/tenants/{tenant_id}/devices.
RLS is enforced via PostgreSQL — the app_user engine automatically filters
cross-tenant data based on the SET LOCAL app.current_tenant context set by
get_current_user dependency.
RBAC:
- viewer: GET (read-only)
- operator: POST, PUT (write)
- admin/tenant_admin: DELETE
"""
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.services.audit_service import log_action
from app.middleware.rbac import (
require_min_role,
require_operator_or_above,
require_scope,
require_tenant_admin_or_above,
)
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.schemas.device import (
BulkAddRequest,
BulkAddResult,
DeviceCreate,
DeviceListResponse,
DeviceResponse,
DeviceUpdate,
SubnetScanRequest,
SubnetScanResponse,
)
from app.services import device as device_service
from app.services.scanner import scan_subnet
router = APIRouter(tags=["devices"])
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""
Verify the current user is allowed to access the given tenant.
- super_admin can access any tenant — re-sets DB tenant context to target tenant.
- All other roles must match their own tenant_id.
"""
if current_user.is_super_admin:
# Re-set tenant context to the target tenant so RLS allows the operation
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# ---------------------------------------------------------------------------
# Device CRUD
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices",
response_model=DeviceListResponse,
summary="List devices with pagination and filtering",
dependencies=[require_scope("devices:read")],
)
async def list_devices(
tenant_id: uuid.UUID,
page: int = Query(1, ge=1, description="Page number (1-based)"),
page_size: int = Query(25, ge=1, le=100, description="Items per page (1-100)"),
status_filter: Optional[str] = Query(None, alias="status"),
search: Optional[str] = Query(None, description="Text search on hostname or IP"),
tag_id: Optional[uuid.UUID] = Query(None),
group_id: Optional[uuid.UUID] = Query(None),
sort_by: str = Query("created_at", description="Field to sort by"),
sort_order: str = Query("desc", description="asc or desc"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceListResponse:
"""List devices for a tenant with optional pagination, filtering, and sorting."""
await _check_tenant_access(current_user, tenant_id, db)
items, total = await device_service.get_devices(
db=db,
tenant_id=tenant_id,
page=page,
page_size=page_size,
status=status_filter,
search=search,
tag_id=tag_id,
group_id=group_id,
sort_by=sort_by,
sort_order=sort_order,
)
return DeviceListResponse(items=items, total=total, page=page, page_size=page_size)
@router.post(
"/tenants/{tenant_id}/devices",
response_model=DeviceResponse,
status_code=status.HTTP_201_CREATED,
summary="Add a device (validates TCP connectivity first)",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def create_device(
request: Request,
tenant_id: uuid.UUID,
data: DeviceCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceResponse:
"""
Create a new device. Requires operator role or above.
The device IP/port is TCP-probed before the record is saved.
Credentials are encrypted with AES-256-GCM before storage and never returned.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await device_service.create_device(
db=db,
tenant_id=tenant_id,
data=data,
encryption_key=settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_create",
resource_type="device", resource_id=str(result.id),
details={"hostname": data.hostname, "ip_address": data.ip_address},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
return result
@router.get(
"/tenants/{tenant_id}/devices/{device_id}",
response_model=DeviceResponse,
summary="Get a single device",
dependencies=[require_scope("devices:read")],
)
async def get_device(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceResponse:
"""Get device details. Viewer role and above."""
await _check_tenant_access(current_user, tenant_id, db)
return await device_service.get_device(db=db, tenant_id=tenant_id, device_id=device_id)
@router.put(
"/tenants/{tenant_id}/devices/{device_id}",
response_model=DeviceResponse,
summary="Update a device",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def update_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
data: DeviceUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> DeviceResponse:
"""Update device fields. Requires operator role or above."""
await _check_tenant_access(current_user, tenant_id, db)
result = await device_service.update_device(
db=db,
tenant_id=tenant_id,
device_id=device_id,
data=data,
encryption_key=settings.get_encryption_key_bytes(),
)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_update",
resource_type="device", resource_id=str(device_id),
device_id=device_id,
details={"changes": data.model_dump(exclude_unset=True)},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
return result
@router.delete(
"/tenants/{tenant_id}/devices/{device_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a device",
dependencies=[Depends(require_tenant_admin_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def delete_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Hard-delete a device. Requires tenant_admin or above."""
await _check_tenant_access(current_user, tenant_id, db)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_delete",
resource_type="device", resource_id=str(device_id),
device_id=device_id,
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
await device_service.delete_device(db=db, tenant_id=tenant_id, device_id=device_id)
# ---------------------------------------------------------------------------
# Subnet scan and bulk add
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/devices/scan",
response_model=SubnetScanResponse,
summary="Scan a subnet for MikroTik devices",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def scan_devices(
request: Request,
tenant_id: uuid.UUID,
data: SubnetScanRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> SubnetScanResponse:
"""
Scan a CIDR subnet for hosts with open RouterOS API ports (8728/8729).
Returns a list of discovered IPs for the user to review and selectively
import — does NOT automatically add devices.
Requires operator role or above.
"""
if not current_user.is_super_admin and current_user.tenant_id != tenant_id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
discovered = await scan_subnet(data.cidr)
import ipaddress
network = ipaddress.ip_network(data.cidr, strict=False)
total_scanned = network.num_addresses - 2 if network.num_addresses > 2 else network.num_addresses
# Audit log the scan (fire-and-forget — never breaks the response)
try:
await log_action(
db, tenant_id, current_user.user_id, "subnet_scan",
resource_type="network", resource_id=data.cidr,
details={
"cidr": data.cidr,
"devices_found": len(discovered),
"ip": request.client.host if request.client else None,
},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
return SubnetScanResponse(
cidr=data.cidr,
discovered=discovered,
total_scanned=total_scanned,
total_discovered=len(discovered),
)
@router.post(
"/tenants/{tenant_id}/devices/bulk-add",
response_model=BulkAddResult,
status_code=status.HTTP_201_CREATED,
summary="Bulk-add devices from scan results",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def bulk_add_devices(
request: Request,
tenant_id: uuid.UUID,
data: BulkAddRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> BulkAddResult:
"""
Add multiple devices at once from scan results.
Per-device credentials take precedence over shared credentials.
Devices that fail connectivity checks or validation are reported in `failed`.
Requires operator role or above.
"""
await _check_tenant_access(current_user, tenant_id, db)
added = []
failed = []
encryption_key = settings.get_encryption_key_bytes()
for dev_data in data.devices:
# Resolve credentials: per-device first, then shared
username = dev_data.username or data.shared_username
password = dev_data.password or data.shared_password
if not username or not password:
failed.append({
"ip_address": dev_data.ip_address,
"error": "No credentials provided (set per-device or shared credentials)",
})
continue
create_data = DeviceCreate(
hostname=dev_data.hostname or dev_data.ip_address,
ip_address=dev_data.ip_address,
api_port=dev_data.api_port,
api_ssl_port=dev_data.api_ssl_port,
username=username,
password=password,
)
try:
device = await device_service.create_device(
db=db,
tenant_id=tenant_id,
data=create_data,
encryption_key=encryption_key,
)
added.append(device)
try:
await log_action(
db, tenant_id, current_user.user_id, "device_adopt",
resource_type="device", resource_id=str(device.id),
details={"hostname": create_data.hostname, "ip_address": create_data.ip_address},
ip_address=request.client.host if request.client else None,
)
except Exception:
pass
except HTTPException as exc:
failed.append({"ip_address": dev_data.ip_address, "error": exc.detail})
except Exception as exc:
failed.append({"ip_address": dev_data.ip_address, "error": str(exc)})
return BulkAddResult(added=added, failed=failed)
# ---------------------------------------------------------------------------
# Group assignment
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/groups/{group_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Add device to a group",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def add_device_to_group(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Assign a device to a group. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.assign_device_to_group(db, tenant_id, device_id, group_id)
@router.delete(
"/tenants/{tenant_id}/devices/{device_id}/groups/{group_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Remove device from a group",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def remove_device_from_group(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Remove a device from a group. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.remove_device_from_group(db, tenant_id, device_id, group_id)
# ---------------------------------------------------------------------------
# Tag assignment
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/devices/{device_id}/tags/{tag_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Add tag to a device",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("20/minute")
async def add_tag_to_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
tag_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Assign a tag to a device. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.assign_tag_to_device(db, tenant_id, device_id, tag_id)
@router.delete(
"/tenants/{tenant_id}/devices/{device_id}/tags/{tag_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Remove tag from a device",
dependencies=[Depends(require_operator_or_above), require_scope("devices:write")],
)
@limiter.limit("5/minute")
async def remove_tag_from_device(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
tag_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
"""Remove a tag from a device. Requires operator or above."""
await _check_tenant_access(current_user, tenant_id, db)
await device_service.remove_tag_from_device(db, tenant_id, device_id, tag_id)

View File

@@ -0,0 +1,164 @@
"""Unified events timeline API endpoint.
Provides a single GET endpoint that unions alert events, device status changes,
and config backup runs into a unified timeline for the dashboard.
RLS enforced via get_db() (app_user engine with tenant context).
"""
import logging
import uuid
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["events"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
# ---------------------------------------------------------------------------
# Unified events endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/events",
summary="List unified events (alerts, status changes, config backups)",
)
async def list_events(
tenant_id: uuid.UUID,
limit: int = Query(50, ge=1, le=200, description="Max events to return"),
event_type: Optional[str] = Query(
None,
description="Filter by event type: alert, status_change, config_backup",
),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return a unified list of recent events across alerts, device status, and config backups.
Events are ordered by timestamp descending, limited to `limit` (default 50).
RLS automatically filters to the tenant's data via the app_user session.
"""
await _check_tenant_access(current_user, tenant_id, db)
if event_type and event_type not in ("alert", "status_change", "config_backup"):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="event_type must be one of: alert, status_change, config_backup",
)
events: list[dict[str, Any]] = []
# 1. Alert events
if not event_type or event_type == "alert":
alert_result = await db.execute(
text("""
SELECT ae.id, ae.status, ae.severity, ae.metric, ae.message,
ae.fired_at, ae.device_id, d.hostname
FROM alert_events ae
LEFT JOIN devices d ON d.id = ae.device_id
ORDER BY ae.fired_at DESC
LIMIT :limit
"""),
{"limit": limit},
)
for row in alert_result.fetchall():
alert_status = row[1] or "firing"
metric = row[3] or "unknown"
events.append({
"id": str(row[0]),
"event_type": "alert",
"severity": row[2],
"title": f"{alert_status}: {metric}",
"description": row[4] or f"Alert {alert_status} for {metric}",
"device_hostname": row[7],
"device_id": str(row[6]) if row[6] else None,
"timestamp": row[5].isoformat() if row[5] else None,
})
# 2. Device status changes (inferred from current status + last_seen)
if not event_type or event_type == "status_change":
status_result = await db.execute(
text("""
SELECT d.id, d.hostname, d.status, d.last_seen
FROM devices d
WHERE d.last_seen IS NOT NULL
ORDER BY d.last_seen DESC
LIMIT :limit
"""),
{"limit": limit},
)
for row in status_result.fetchall():
device_status = row[2] or "unknown"
hostname = row[1] or "Unknown device"
severity = "info" if device_status == "online" else "warning"
events.append({
"id": f"status-{row[0]}",
"event_type": "status_change",
"severity": severity,
"title": f"Device {device_status}",
"description": f"{hostname} is now {device_status}",
"device_hostname": hostname,
"device_id": str(row[0]),
"timestamp": row[3].isoformat() if row[3] else None,
})
# 3. Config backup runs
if not event_type or event_type == "config_backup":
backup_result = await db.execute(
text("""
SELECT cbr.id, cbr.trigger_type, cbr.created_at,
cbr.device_id, d.hostname
FROM config_backup_runs cbr
LEFT JOIN devices d ON d.id = cbr.device_id
ORDER BY cbr.created_at DESC
LIMIT :limit
"""),
{"limit": limit},
)
for row in backup_result.fetchall():
trigger_type = row[1] or "manual"
hostname = row[4] or "Unknown device"
events.append({
"id": str(row[0]),
"event_type": "config_backup",
"severity": "info",
"title": "Config backup",
"description": f"{trigger_type} backup completed for {hostname}",
"device_hostname": hostname,
"device_id": str(row[3]) if row[3] else None,
"timestamp": row[2].isoformat() if row[2] else None,
})
# Sort all events by timestamp descending, then apply final limit
events.sort(
key=lambda e: e["timestamp"] or "",
reverse=True,
)
return events[:limit]

View File

@@ -0,0 +1,712 @@
"""Firmware API endpoints for version overview, cache management, preferred channel,
and firmware upgrade orchestration.
Tenant-scoped routes under /api/tenants/{tenant_id}/firmware/*.
Global routes under /api/firmware/* for version listing and admin actions.
"""
import asyncio
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services.audit_service import log_action
router = APIRouter(tags=["firmware"])
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
class PreferredChannelRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
preferred_channel: str # "stable", "long-term", "testing"
class FirmwareDownloadRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
architecture: str
channel: str
version: str
# =========================================================================
# TENANT-SCOPED ENDPOINTS
# =========================================================================
@router.get(
"/tenants/{tenant_id}/firmware/overview",
summary="Get firmware status for all devices in tenant",
dependencies=[require_scope("firmware:write")],
)
async def get_firmware_overview(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
from app.services.firmware_service import get_firmware_overview as _get_overview
return await _get_overview(str(tenant_id))
@router.patch(
"/tenants/{tenant_id}/devices/{device_id}/preferred-channel",
summary="Set preferred firmware channel for a device",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def set_device_preferred_channel(
request: Request,
tenant_id: uuid.UUID,
device_id: uuid.UUID,
body: PreferredChannelRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if body.preferred_channel not in ("stable", "long-term", "testing"):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="preferred_channel must be one of: stable, long-term, testing",
)
result = await db.execute(
text("""
UPDATE devices SET preferred_channel = :channel, updated_at = NOW()
WHERE id = :device_id
RETURNING id
"""),
{"channel": body.preferred_channel, "device_id": str(device_id)},
)
if not result.fetchone():
raise HTTPException(status_code=404, detail="Device not found")
await db.commit()
return {"status": "ok", "preferred_channel": body.preferred_channel}
@router.patch(
"/tenants/{tenant_id}/device-groups/{group_id}/preferred-channel",
summary="Set preferred firmware channel for a device group",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def set_group_preferred_channel(
request: Request,
tenant_id: uuid.UUID,
group_id: uuid.UUID,
body: PreferredChannelRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if body.preferred_channel not in ("stable", "long-term", "testing"):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="preferred_channel must be one of: stable, long-term, testing",
)
result = await db.execute(
text("""
UPDATE device_groups SET preferred_channel = :channel
WHERE id = :group_id
RETURNING id
"""),
{"channel": body.preferred_channel, "group_id": str(group_id)},
)
if not result.fetchone():
raise HTTPException(status_code=404, detail="Device group not found")
await db.commit()
return {"status": "ok", "preferred_channel": body.preferred_channel}
# =========================================================================
# GLOBAL ENDPOINTS (firmware versions are not tenant-scoped)
# =========================================================================
@router.get(
"/firmware/versions",
summary="List all known firmware versions from cache",
)
async def list_firmware_versions(
architecture: Optional[str] = Query(None),
channel: Optional[str] = Query(None),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
filters = []
params: dict[str, Any] = {}
if architecture:
filters.append("architecture = :arch")
params["arch"] = architecture
if channel:
filters.append("channel = :channel")
params["channel"] = channel
where = f"WHERE {' AND '.join(filters)}" if filters else ""
result = await db.execute(
text(f"""
SELECT id, architecture, channel, version, npk_url,
npk_local_path, npk_size_bytes, checked_at
FROM firmware_versions
{where}
ORDER BY architecture, channel, checked_at DESC
"""),
params,
)
return [
{
"id": str(row[0]),
"architecture": row[1],
"channel": row[2],
"version": row[3],
"npk_url": row[4],
"npk_local_path": row[5],
"npk_size_bytes": row[6],
"checked_at": row[7].isoformat() if row[7] else None,
}
for row in result.fetchall()
]
@router.post(
"/firmware/check",
summary="Trigger immediate firmware version check (super admin only)",
)
async def trigger_firmware_check(
current_user: CurrentUser = Depends(get_current_user),
) -> dict[str, Any]:
if not current_user.is_super_admin:
raise HTTPException(status_code=403, detail="Super admin only")
from app.services.firmware_service import check_latest_versions
results = await check_latest_versions()
return {"status": "ok", "versions_discovered": len(results), "versions": results}
@router.get(
"/firmware/cache",
summary="List locally cached NPK files (super admin only)",
)
async def list_firmware_cache(
current_user: CurrentUser = Depends(get_current_user),
) -> list[dict[str, Any]]:
if not current_user.is_super_admin:
raise HTTPException(status_code=403, detail="Super admin only")
from app.services.firmware_service import get_cached_firmware
return await get_cached_firmware()
@router.post(
"/firmware/download",
summary="Download a specific NPK to local cache (super admin only)",
)
async def download_firmware(
body: FirmwareDownloadRequest,
current_user: CurrentUser = Depends(get_current_user),
) -> dict[str, str]:
if not current_user.is_super_admin:
raise HTTPException(status_code=403, detail="Super admin only")
from app.services.firmware_service import download_firmware as _download
path = await _download(body.architecture, body.channel, body.version)
return {"status": "ok", "path": path}
# =========================================================================
# UPGRADE ENDPOINTS
# =========================================================================
class UpgradeRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_id: str
target_version: str
architecture: str
channel: str = "stable"
confirmed_major_upgrade: bool = False
scheduled_at: Optional[str] = None # ISO datetime or None for immediate
class MassUpgradeRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_ids: list[str]
target_version: str
channel: str = "stable"
confirmed_major_upgrade: bool = False
scheduled_at: Optional[str] = None
@router.post(
"/tenants/{tenant_id}/firmware/upgrade",
summary="Start or schedule a single device firmware upgrade",
status_code=status.HTTP_202_ACCEPTED,
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def start_firmware_upgrade(
request: Request,
tenant_id: uuid.UUID,
body: UpgradeRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot initiate upgrades")
# Look up device architecture if not provided
architecture = body.architecture
if not architecture:
dev_result = await db.execute(
text("SELECT architecture FROM devices WHERE id = CAST(:id AS uuid)"),
{"id": body.device_id},
)
dev_row = dev_result.fetchone()
if not dev_row or not dev_row[0]:
raise HTTPException(422, "Device architecture unknown — cannot upgrade")
architecture = dev_row[0]
# Create upgrade job
job_id = str(uuid.uuid4())
await db.execute(
text("""
INSERT INTO firmware_upgrade_jobs
(id, tenant_id, device_id, target_version, architecture, channel,
status, confirmed_major_upgrade, scheduled_at)
VALUES
(CAST(:id AS uuid), CAST(:tenant_id AS uuid), CAST(:device_id AS uuid),
:target_version, :architecture, :channel,
:status, :confirmed, :scheduled_at)
"""),
{
"id": job_id,
"tenant_id": str(tenant_id),
"device_id": body.device_id,
"target_version": body.target_version,
"architecture": architecture,
"channel": body.channel,
"status": "scheduled" if body.scheduled_at else "pending",
"confirmed": body.confirmed_major_upgrade,
"scheduled_at": body.scheduled_at,
},
)
await db.commit()
# Schedule or start immediately
if body.scheduled_at:
from app.services.upgrade_service import schedule_upgrade
schedule_upgrade(job_id, datetime.fromisoformat(body.scheduled_at))
else:
from app.services.upgrade_service import start_upgrade
asyncio.create_task(start_upgrade(job_id))
try:
await log_action(
db, tenant_id, current_user.user_id, "firmware_upgrade",
resource_type="firmware", resource_id=job_id,
device_id=uuid.UUID(body.device_id),
details={"target_version": body.target_version, "channel": body.channel},
)
except Exception:
pass
return {"status": "accepted", "job_id": job_id}
@router.post(
"/tenants/{tenant_id}/firmware/mass-upgrade",
summary="Start or schedule a mass firmware upgrade for multiple devices",
status_code=status.HTTP_202_ACCEPTED,
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("5/minute")
async def start_mass_firmware_upgrade(
request: Request,
tenant_id: uuid.UUID,
body: MassUpgradeRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot initiate upgrades")
rollout_group_id = str(uuid.uuid4())
jobs = []
for device_id in body.device_ids:
# Look up architecture per device
dev_result = await db.execute(
text("SELECT architecture FROM devices WHERE id = CAST(:id AS uuid)"),
{"id": device_id},
)
dev_row = dev_result.fetchone()
architecture = dev_row[0] if dev_row and dev_row[0] else "unknown"
job_id = str(uuid.uuid4())
await db.execute(
text("""
INSERT INTO firmware_upgrade_jobs
(id, tenant_id, device_id, rollout_group_id,
target_version, architecture, channel,
status, confirmed_major_upgrade, scheduled_at)
VALUES
(CAST(:id AS uuid), CAST(:tenant_id AS uuid),
CAST(:device_id AS uuid), CAST(:group_id AS uuid),
:target_version, :architecture, :channel,
:status, :confirmed, :scheduled_at)
"""),
{
"id": job_id,
"tenant_id": str(tenant_id),
"device_id": device_id,
"group_id": rollout_group_id,
"target_version": body.target_version,
"architecture": architecture,
"channel": body.channel,
"status": "scheduled" if body.scheduled_at else "pending",
"confirmed": body.confirmed_major_upgrade,
"scheduled_at": body.scheduled_at,
},
)
jobs.append({"job_id": job_id, "device_id": device_id, "architecture": architecture})
await db.commit()
# Schedule or start immediately
if body.scheduled_at:
from app.services.upgrade_service import schedule_mass_upgrade
schedule_mass_upgrade(rollout_group_id, datetime.fromisoformat(body.scheduled_at))
else:
from app.services.upgrade_service import start_mass_upgrade
asyncio.create_task(start_mass_upgrade(rollout_group_id))
return {
"status": "accepted",
"rollout_group_id": rollout_group_id,
"jobs": jobs,
}
@router.get(
"/tenants/{tenant_id}/firmware/upgrades",
summary="List firmware upgrade jobs for tenant",
dependencies=[require_scope("firmware:write")],
)
async def list_upgrade_jobs(
tenant_id: uuid.UUID,
upgrade_status: Optional[str] = Query(None, alias="status"),
device_id: Optional[str] = Query(None),
rollout_group_id: Optional[str] = Query(None),
page: int = Query(1, ge=1),
per_page: int = Query(50, ge=1, le=200),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
filters = ["1=1"]
params: dict[str, Any] = {}
if upgrade_status:
filters.append("j.status = :status")
params["status"] = upgrade_status
if device_id:
filters.append("j.device_id = CAST(:device_id AS uuid)")
params["device_id"] = device_id
if rollout_group_id:
filters.append("j.rollout_group_id = CAST(:group_id AS uuid)")
params["group_id"] = rollout_group_id
where = " AND ".join(filters)
offset = (page - 1) * per_page
count_result = await db.execute(
text(f"SELECT COUNT(*) FROM firmware_upgrade_jobs j WHERE {where}"),
params,
)
total = count_result.scalar() or 0
result = await db.execute(
text(f"""
SELECT j.id, j.device_id, j.rollout_group_id,
j.target_version, j.architecture, j.channel,
j.status, j.pre_upgrade_backup_sha, j.scheduled_at,
j.started_at, j.completed_at, j.error_message,
j.confirmed_major_upgrade, j.created_at,
d.hostname AS device_hostname
FROM firmware_upgrade_jobs j
LEFT JOIN devices d ON d.id = j.device_id
WHERE {where}
ORDER BY j.created_at DESC
LIMIT :limit OFFSET :offset
"""),
{**params, "limit": per_page, "offset": offset},
)
items = [
{
"id": str(row[0]),
"device_id": str(row[1]),
"rollout_group_id": str(row[2]) if row[2] else None,
"target_version": row[3],
"architecture": row[4],
"channel": row[5],
"status": row[6],
"pre_upgrade_backup_sha": row[7],
"scheduled_at": row[8].isoformat() if row[8] else None,
"started_at": row[9].isoformat() if row[9] else None,
"completed_at": row[10].isoformat() if row[10] else None,
"error_message": row[11],
"confirmed_major_upgrade": row[12],
"created_at": row[13].isoformat() if row[13] else None,
"device_hostname": row[14],
}
for row in result.fetchall()
]
return {"items": items, "total": total, "page": page, "per_page": per_page}
@router.get(
"/tenants/{tenant_id}/firmware/upgrades/{job_id}",
summary="Get single upgrade job detail",
dependencies=[require_scope("firmware:write")],
)
async def get_upgrade_job(
tenant_id: uuid.UUID,
job_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT j.id, j.device_id, j.rollout_group_id,
j.target_version, j.architecture, j.channel,
j.status, j.pre_upgrade_backup_sha, j.scheduled_at,
j.started_at, j.completed_at, j.error_message,
j.confirmed_major_upgrade, j.created_at,
d.hostname AS device_hostname
FROM firmware_upgrade_jobs j
LEFT JOIN devices d ON d.id = j.device_id
WHERE j.id = CAST(:job_id AS uuid)
"""),
{"job_id": str(job_id)},
)
row = result.fetchone()
if not row:
raise HTTPException(404, "Upgrade job not found")
return {
"id": str(row[0]),
"device_id": str(row[1]),
"rollout_group_id": str(row[2]) if row[2] else None,
"target_version": row[3],
"architecture": row[4],
"channel": row[5],
"status": row[6],
"pre_upgrade_backup_sha": row[7],
"scheduled_at": row[8].isoformat() if row[8] else None,
"started_at": row[9].isoformat() if row[9] else None,
"completed_at": row[10].isoformat() if row[10] else None,
"error_message": row[11],
"confirmed_major_upgrade": row[12],
"created_at": row[13].isoformat() if row[13] else None,
"device_hostname": row[14],
}
@router.get(
"/tenants/{tenant_id}/firmware/rollouts/{rollout_group_id}",
summary="Get mass rollout status with all jobs",
dependencies=[require_scope("firmware:write")],
)
async def get_rollout_status(
tenant_id: uuid.UUID,
rollout_group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT j.id, j.device_id, j.status, j.target_version,
j.architecture, j.error_message, j.started_at,
j.completed_at, d.hostname
FROM firmware_upgrade_jobs j
LEFT JOIN devices d ON d.id = j.device_id
WHERE j.rollout_group_id = CAST(:group_id AS uuid)
ORDER BY j.created_at ASC
"""),
{"group_id": str(rollout_group_id)},
)
rows = result.fetchall()
if not rows:
raise HTTPException(404, "Rollout group not found")
# Compute summary
total = len(rows)
completed = sum(1 for r in rows if r[2] == "completed")
failed = sum(1 for r in rows if r[2] == "failed")
paused = sum(1 for r in rows if r[2] == "paused")
pending = sum(1 for r in rows if r[2] in ("pending", "scheduled"))
# Find currently running device
active_statuses = {"downloading", "uploading", "rebooting", "verifying"}
current_device = None
for r in rows:
if r[2] in active_statuses:
current_device = r[8] or str(r[1])
break
jobs = [
{
"id": str(r[0]),
"device_id": str(r[1]),
"status": r[2],
"target_version": r[3],
"architecture": r[4],
"error_message": r[5],
"started_at": r[6].isoformat() if r[6] else None,
"completed_at": r[7].isoformat() if r[7] else None,
"device_hostname": r[8],
}
for r in rows
]
return {
"rollout_group_id": str(rollout_group_id),
"total": total,
"completed": completed,
"failed": failed,
"paused": paused,
"pending": pending,
"current_device": current_device,
"jobs": jobs,
}
@router.post(
"/tenants/{tenant_id}/firmware/upgrades/{job_id}/cancel",
summary="Cancel a scheduled or pending upgrade",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def cancel_upgrade_endpoint(
request: Request,
tenant_id: uuid.UUID,
job_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot cancel upgrades")
from app.services.upgrade_service import cancel_upgrade
await cancel_upgrade(str(job_id))
return {"status": "ok", "message": "Upgrade cancelled"}
@router.post(
"/tenants/{tenant_id}/firmware/upgrades/{job_id}/retry",
summary="Retry a failed upgrade",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def retry_upgrade_endpoint(
request: Request,
tenant_id: uuid.UUID,
job_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot retry upgrades")
from app.services.upgrade_service import retry_failed_upgrade
await retry_failed_upgrade(str(job_id))
return {"status": "ok", "message": "Upgrade retry started"}
@router.post(
"/tenants/{tenant_id}/firmware/rollouts/{rollout_group_id}/resume",
summary="Resume a paused mass rollout",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("20/minute")
async def resume_rollout_endpoint(
request: Request,
tenant_id: uuid.UUID,
rollout_group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, str]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot resume rollouts")
from app.services.upgrade_service import resume_mass_upgrade
await resume_mass_upgrade(str(rollout_group_id))
return {"status": "ok", "message": "Rollout resumed"}
@router.post(
"/tenants/{tenant_id}/firmware/rollouts/{rollout_group_id}/abort",
summary="Abort remaining devices in a paused rollout",
dependencies=[require_scope("firmware:write")],
)
@limiter.limit("5/minute")
async def abort_rollout_endpoint(
request: Request,
tenant_id: uuid.UUID,
rollout_group_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
if current_user.role == "viewer":
raise HTTPException(403, "Viewers cannot abort rollouts")
from app.services.upgrade_service import abort_mass_upgrade
aborted = await abort_mass_upgrade(str(rollout_group_id))
return {"status": "ok", "aborted_count": aborted}

View File

@@ -0,0 +1,309 @@
"""Maintenance windows API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/ for:
- Maintenance window CRUD (list, create, update, delete)
- Filterable by status: upcoming, active, past
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: operator and above for all operations.
"""
import json
import logging
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["maintenance-windows"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_operator(current_user: CurrentUser) -> None:
"""Raise 403 if user does not have at least operator role."""
if current_user.role == "viewer":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Requires at least operator role.",
)
# ---------------------------------------------------------------------------
# Request/response schemas
# ---------------------------------------------------------------------------
class MaintenanceWindowCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
device_ids: list[str] = []
start_at: datetime
end_at: datetime
suppress_alerts: bool = True
notes: Optional[str] = None
class MaintenanceWindowUpdate(BaseModel):
model_config = ConfigDict(extra="forbid")
name: Optional[str] = None
device_ids: Optional[list[str]] = None
start_at: Optional[datetime] = None
end_at: Optional[datetime] = None
suppress_alerts: Optional[bool] = None
notes: Optional[str] = None
class MaintenanceWindowResponse(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
tenant_id: str
name: str
device_ids: list[str]
start_at: str
end_at: str
suppress_alerts: bool
notes: Optional[str] = None
created_by: Optional[str] = None
created_at: str
# ---------------------------------------------------------------------------
# CRUD endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/maintenance-windows",
summary="List maintenance windows for tenant",
)
async def list_maintenance_windows(
tenant_id: uuid.UUID,
window_status: Optional[str] = Query(None, alias="status"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
filters = ["1=1"]
params: dict[str, Any] = {}
if window_status == "active":
filters.append("mw.start_at <= NOW() AND mw.end_at >= NOW()")
elif window_status == "upcoming":
filters.append("mw.start_at > NOW()")
elif window_status == "past":
filters.append("mw.end_at < NOW()")
where = " AND ".join(filters)
result = await db.execute(
text(f"""
SELECT mw.id, mw.tenant_id, mw.name, mw.device_ids,
mw.start_at, mw.end_at, mw.suppress_alerts,
mw.notes, mw.created_by, mw.created_at
FROM maintenance_windows mw
WHERE {where}
ORDER BY mw.start_at DESC
"""),
params,
)
return [
{
"id": str(row[0]),
"tenant_id": str(row[1]),
"name": row[2],
"device_ids": row[3] if isinstance(row[3], list) else [],
"start_at": row[4].isoformat() if row[4] else None,
"end_at": row[5].isoformat() if row[5] else None,
"suppress_alerts": row[6],
"notes": row[7],
"created_by": str(row[8]) if row[8] else None,
"created_at": row[9].isoformat() if row[9] else None,
}
for row in result.fetchall()
]
@router.post(
"/tenants/{tenant_id}/maintenance-windows",
summary="Create maintenance window",
status_code=status.HTTP_201_CREATED,
)
@limiter.limit("20/minute")
async def create_maintenance_window(
request: Request,
tenant_id: uuid.UUID,
body: MaintenanceWindowCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
if body.end_at <= body.start_at:
raise HTTPException(422, "end_at must be after start_at")
window_id = str(uuid.uuid4())
await db.execute(
text("""
INSERT INTO maintenance_windows
(id, tenant_id, name, device_ids, start_at, end_at,
suppress_alerts, notes, created_by)
VALUES
(CAST(:id AS uuid), CAST(:tenant_id AS uuid),
:name, CAST(:device_ids AS jsonb), :start_at, :end_at,
:suppress_alerts, :notes, CAST(:created_by AS uuid))
"""),
{
"id": window_id,
"tenant_id": str(tenant_id),
"name": body.name,
"device_ids": json.dumps(body.device_ids),
"start_at": body.start_at,
"end_at": body.end_at,
"suppress_alerts": body.suppress_alerts,
"notes": body.notes,
"created_by": str(current_user.user_id),
},
)
await db.commit()
return {
"id": window_id,
"tenant_id": str(tenant_id),
"name": body.name,
"device_ids": body.device_ids,
"start_at": body.start_at.isoformat(),
"end_at": body.end_at.isoformat(),
"suppress_alerts": body.suppress_alerts,
"notes": body.notes,
"created_by": str(current_user.user_id),
"created_at": datetime.utcnow().isoformat(),
}
@router.put(
"/tenants/{tenant_id}/maintenance-windows/{window_id}",
summary="Update maintenance window",
)
@limiter.limit("20/minute")
async def update_maintenance_window(
request: Request,
tenant_id: uuid.UUID,
window_id: uuid.UUID,
body: MaintenanceWindowUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> dict[str, Any]:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
# Build dynamic SET clause for partial updates
set_parts: list[str] = ["updated_at = NOW()"]
params: dict[str, Any] = {"window_id": str(window_id)}
if body.name is not None:
set_parts.append("name = :name")
params["name"] = body.name
if body.device_ids is not None:
set_parts.append("device_ids = CAST(:device_ids AS jsonb)")
params["device_ids"] = json.dumps(body.device_ids)
if body.start_at is not None:
set_parts.append("start_at = :start_at")
params["start_at"] = body.start_at
if body.end_at is not None:
set_parts.append("end_at = :end_at")
params["end_at"] = body.end_at
if body.suppress_alerts is not None:
set_parts.append("suppress_alerts = :suppress_alerts")
params["suppress_alerts"] = body.suppress_alerts
if body.notes is not None:
set_parts.append("notes = :notes")
params["notes"] = body.notes
set_clause = ", ".join(set_parts)
result = await db.execute(
text(f"""
UPDATE maintenance_windows
SET {set_clause}
WHERE id = CAST(:window_id AS uuid)
RETURNING id, tenant_id, name, device_ids, start_at, end_at,
suppress_alerts, notes, created_by, created_at
"""),
params,
)
row = result.fetchone()
if not row:
raise HTTPException(404, "Maintenance window not found")
await db.commit()
return {
"id": str(row[0]),
"tenant_id": str(row[1]),
"name": row[2],
"device_ids": row[3] if isinstance(row[3], list) else [],
"start_at": row[4].isoformat() if row[4] else None,
"end_at": row[5].isoformat() if row[5] else None,
"suppress_alerts": row[6],
"notes": row[7],
"created_by": str(row[8]) if row[8] else None,
"created_at": row[9].isoformat() if row[9] else None,
}
@router.delete(
"/tenants/{tenant_id}/maintenance-windows/{window_id}",
summary="Delete maintenance window",
status_code=status.HTTP_204_NO_CONTENT,
)
@limiter.limit("5/minute")
async def delete_maintenance_window(
request: Request,
tenant_id: uuid.UUID,
window_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> None:
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
result = await db.execute(
text(
"DELETE FROM maintenance_windows WHERE id = CAST(:id AS uuid) RETURNING id"
),
{"id": str(window_id)},
)
if not result.fetchone():
raise HTTPException(404, "Maintenance window not found")
await db.commit()

View File

@@ -0,0 +1,414 @@
"""
Metrics API endpoints for querying TimescaleDB hypertables.
All device-scoped routes are tenant-scoped under
/api/tenants/{tenant_id}/devices/{device_id}/metrics/*.
Fleet summary endpoints are under /api/tenants/{tenant_id}/fleet/summary
and /api/fleet/summary (super_admin cross-tenant).
RLS is enforced via get_db() — the app_user engine applies tenant filtering
automatically based on the SET LOCAL app.current_tenant context.
All endpoints require authentication (get_current_user) and enforce
tenant access via _check_tenant_access.
"""
import uuid
from datetime import datetime, timedelta
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.middleware.tenant_context import CurrentUser, get_current_user
router = APIRouter(tags=["metrics"])
def _bucket_for_range(start: datetime, end: datetime) -> timedelta:
"""
Select an appropriate time_bucket size based on the requested time range.
Shorter ranges get finer granularity; longer ranges get coarser buckets
to keep result sets manageable.
Returns a timedelta because asyncpg requires a Python timedelta (not a
string interval literal) when binding the first argument of time_bucket().
"""
delta = end - start
hours = delta.total_seconds() / 3600
if hours <= 1:
return timedelta(minutes=1)
elif hours <= 6:
return timedelta(minutes=5)
elif hours <= 24:
return timedelta(minutes=15)
elif hours <= 168: # 7 days
return timedelta(hours=1)
elif hours <= 720: # 30 days
return timedelta(hours=6)
else:
return timedelta(days=1)
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""
Verify the current user is allowed to access the given tenant.
- super_admin can access any tenant — re-sets DB tenant context to target tenant.
- All other roles must match their own tenant_id.
"""
if current_user.is_super_admin:
# Re-set tenant context to the target tenant so RLS allows the operation
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
# ---------------------------------------------------------------------------
# Health metrics
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/health",
summary="Time-bucketed health metrics (CPU, memory, disk, temperature)",
)
async def device_health_metrics(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
start: datetime = Query(..., description="Start of time range (ISO format)"),
end: datetime = Query(..., description="End of time range (ISO format)"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return time-bucketed CPU, memory, disk, and temperature metrics for a device.
Bucket size adapts automatically to the requested time range.
"""
await _check_tenant_access(current_user, tenant_id, db)
bucket = _bucket_for_range(start, end)
result = await db.execute(
text("""
SELECT
time_bucket(:bucket, time) AS bucket,
avg(cpu_load)::smallint AS avg_cpu,
max(cpu_load)::smallint AS max_cpu,
avg(CASE WHEN total_memory > 0
THEN round((1 - free_memory::float / total_memory) * 100)
ELSE NULL END)::smallint AS avg_mem_pct,
avg(CASE WHEN total_disk > 0
THEN round((1 - free_disk::float / total_disk) * 100)
ELSE NULL END)::smallint AS avg_disk_pct,
avg(temperature)::smallint AS avg_temp
FROM health_metrics
WHERE device_id = :device_id
AND time >= :start AND time < :end
GROUP BY bucket
ORDER BY bucket ASC
"""),
{"bucket": bucket, "device_id": str(device_id), "start": start, "end": end},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
# ---------------------------------------------------------------------------
# Interface traffic metrics
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/interfaces",
summary="Time-bucketed interface bandwidth metrics (bps from cumulative byte deltas)",
)
async def device_interface_metrics(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
start: datetime = Query(..., description="Start of time range (ISO format)"),
end: datetime = Query(..., description="End of time range (ISO format)"),
interface: Optional[str] = Query(None, description="Filter to a specific interface name"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return time-bucketed interface traffic metrics for a device.
Bandwidth (bps) is computed from raw cumulative byte counters using
SQL LAG() window functions — no poller-side state is required.
Counter wraps (rx_bytes < prev_rx) are treated as NULL to avoid
incorrect spikes.
"""
await _check_tenant_access(current_user, tenant_id, db)
bucket = _bucket_for_range(start, end)
# Build interface filter clause conditionally.
# The interface name is passed as a bind parameter — never interpolated
# into the SQL string — so this is safe from SQL injection.
interface_filter = "AND interface = :interface" if interface else ""
sql = f"""
WITH ordered AS (
SELECT
time,
interface,
rx_bytes,
tx_bytes,
LAG(rx_bytes) OVER (PARTITION BY interface ORDER BY time) AS prev_rx,
LAG(tx_bytes) OVER (PARTITION BY interface ORDER BY time) AS prev_tx,
EXTRACT(EPOCH FROM time - LAG(time) OVER (PARTITION BY interface ORDER BY time)) AS dt
FROM interface_metrics
WHERE device_id = :device_id
AND time >= :start AND time < :end
{interface_filter}
),
with_bps AS (
SELECT
time,
interface,
rx_bytes,
tx_bytes,
CASE WHEN rx_bytes >= prev_rx AND dt > 0
THEN ((rx_bytes - prev_rx) * 8 / dt)::bigint
ELSE NULL END AS rx_bps,
CASE WHEN tx_bytes >= prev_tx AND dt > 0
THEN ((tx_bytes - prev_tx) * 8 / dt)::bigint
ELSE NULL END AS tx_bps
FROM ordered
WHERE prev_rx IS NOT NULL
)
SELECT
time_bucket(:bucket, time) AS bucket,
interface,
avg(rx_bps)::bigint AS avg_rx_bps,
avg(tx_bps)::bigint AS avg_tx_bps,
max(rx_bps)::bigint AS max_rx_bps,
max(tx_bps)::bigint AS max_tx_bps
FROM with_bps
WHERE rx_bps IS NOT NULL
GROUP BY bucket, interface
ORDER BY interface, bucket ASC
"""
params: dict[str, Any] = {
"bucket": bucket,
"device_id": str(device_id),
"start": start,
"end": end,
}
if interface:
params["interface"] = interface
result = await db.execute(text(sql), params)
rows = result.mappings().all()
return [dict(row) for row in rows]
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/interfaces/list",
summary="List distinct interface names for a device",
)
async def device_interface_list(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[str]:
"""Return distinct interface names seen in interface_metrics for a device."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT DISTINCT interface
FROM interface_metrics
WHERE device_id = :device_id
ORDER BY interface
"""),
{"device_id": str(device_id)},
)
rows = result.scalars().all()
return list(rows)
# ---------------------------------------------------------------------------
# Wireless metrics
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/wireless",
summary="Time-bucketed wireless metrics (clients, signal, CCQ)",
)
async def device_wireless_metrics(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
start: datetime = Query(..., description="Start of time range (ISO format)"),
end: datetime = Query(..., description="End of time range (ISO format)"),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return time-bucketed wireless metrics per interface for a device."""
await _check_tenant_access(current_user, tenant_id, db)
bucket = _bucket_for_range(start, end)
result = await db.execute(
text("""
SELECT
time_bucket(:bucket, time) AS bucket,
interface,
avg(client_count)::smallint AS avg_clients,
max(client_count)::smallint AS max_clients,
avg(avg_signal)::smallint AS avg_signal,
avg(ccq)::smallint AS avg_ccq,
max(frequency) AS frequency
FROM wireless_metrics
WHERE device_id = :device_id
AND time >= :start AND time < :end
GROUP BY bucket, interface
ORDER BY interface, bucket ASC
"""),
{"bucket": bucket, "device_id": str(device_id), "start": start, "end": end},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/wireless/latest",
summary="Latest wireless stats per interface (not time-bucketed)",
)
async def device_wireless_latest(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""Return the most recent wireless reading per interface for a device."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT DISTINCT ON (interface)
interface, client_count, avg_signal, ccq, frequency, time
FROM wireless_metrics
WHERE device_id = :device_id
ORDER BY interface, time DESC
"""),
{"device_id": str(device_id)},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
# ---------------------------------------------------------------------------
# Sparkline
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/devices/{device_id}/metrics/sparkline",
summary="Last 12 health readings for sparkline display",
)
async def device_sparkline(
tenant_id: uuid.UUID,
device_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return the last 12 CPU readings (in chronological order) for sparkline
display in the fleet table.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
text("""
SELECT cpu_load, time
FROM (
SELECT cpu_load, time
FROM health_metrics
WHERE device_id = :device_id
ORDER BY time DESC
LIMIT 12
) sub
ORDER BY time ASC
"""),
{"device_id": str(device_id)},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
# ---------------------------------------------------------------------------
# Fleet summary
# ---------------------------------------------------------------------------
_FLEET_SUMMARY_SQL = """
SELECT
d.id, d.hostname, d.ip_address, d.status, d.model, d.last_seen,
d.uptime_seconds, d.last_cpu_load, d.last_memory_used_pct,
d.latitude, d.longitude,
d.tenant_id, t.name AS tenant_name
FROM devices d
JOIN tenants t ON d.tenant_id = t.id
ORDER BY t.name, d.hostname
"""
@router.get(
"/tenants/{tenant_id}/fleet/summary",
summary="Fleet summary for a tenant (latest metrics per device)",
)
async def fleet_summary(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return fleet summary for a single tenant.
Queries the devices table (not hypertables) for speed.
RLS filters to only devices belonging to the tenant automatically.
"""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(text(_FLEET_SUMMARY_SQL))
rows = result.mappings().all()
return [dict(row) for row in rows]
@router.get(
"/fleet/summary",
summary="Cross-tenant fleet summary (super_admin only)",
)
async def fleet_summary_all(
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[dict[str, Any]]:
"""
Return fleet summary across ALL tenants.
Requires super_admin role. The RLS policy for super_admin returns all
rows across all tenants, so the same SQL query works without modification.
This avoids the N+1 problem of fetching per-tenant summaries in a loop.
"""
if current_user.role != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Super admin required",
)
result = await db.execute(text(_FLEET_SUMMARY_SQL))
rows = result.mappings().all()
return [dict(row) for row in rows]

View File

@@ -0,0 +1,146 @@
"""Report generation API endpoint.
POST /api/tenants/{tenant_id}/reports/generate
Generates PDF or CSV reports for device inventory, metrics summary,
alert history, and change log.
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: require at least operator role.
"""
import uuid
from datetime import datetime
from enum import Enum
from typing import Optional
import structlog
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, ConfigDict
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.services.report_service import generate_report
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["reports"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_operator(current_user: CurrentUser) -> None:
"""Raise 403 if user is a viewer (reports require operator+)."""
if current_user.role == "viewer":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Reports require at least operator role.",
)
# ---------------------------------------------------------------------------
# Request schema
# ---------------------------------------------------------------------------
class ReportType(str, Enum):
device_inventory = "device_inventory"
metrics_summary = "metrics_summary"
alert_history = "alert_history"
change_log = "change_log"
class ReportFormat(str, Enum):
pdf = "pdf"
csv = "csv"
class ReportRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
type: ReportType
date_from: Optional[datetime] = None
date_to: Optional[datetime] = None
format: ReportFormat = ReportFormat.pdf
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/reports/generate",
summary="Generate a report (PDF or CSV)",
response_class=StreamingResponse,
)
async def generate_report_endpoint(
tenant_id: uuid.UUID,
body: ReportRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> StreamingResponse:
"""Generate and download a report as PDF or CSV.
- device_inventory: no date range required
- metrics_summary, alert_history, change_log: date_from and date_to required
"""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
# Validate date range for time-based reports
if body.type != ReportType.device_inventory:
if not body.date_from or not body.date_to:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"date_from and date_to are required for {body.type.value} reports.",
)
if body.date_from > body.date_to:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="date_from must be before date_to.",
)
try:
file_bytes, content_type, filename = await generate_report(
db=db,
tenant_id=tenant_id,
report_type=body.type.value,
date_from=body.date_from,
date_to=body.date_to,
fmt=body.format.value,
)
except Exception as exc:
logger.error("report_generation_failed", error=str(exc), report_type=body.type.value)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Report generation failed: {str(exc)}",
)
import io
return StreamingResponse(
io.BytesIO(file_bytes),
media_type=content_type,
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(len(file_bytes)),
},
)

View File

@@ -0,0 +1,155 @@
"""System settings router — global SMTP configuration.
Super-admin only. Stores SMTP settings in system_settings table with
Transit encryption for passwords. Falls back to .env values.
"""
import logging
from typing import Optional
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy import text
from app.config import settings
from app.database import AdminAsyncSessionLocal
from app.middleware.rbac import require_role
from app.services.email_service import SMTPConfig, send_test_email, test_smtp_connection
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/settings", tags=["settings"])
SMTP_KEYS = [
"smtp_host",
"smtp_port",
"smtp_user",
"smtp_password",
"smtp_use_tls",
"smtp_from_address",
"smtp_provider",
]
class SMTPSettingsUpdate(BaseModel):
smtp_host: str
smtp_port: int = 587
smtp_user: Optional[str] = None
smtp_password: Optional[str] = None
smtp_use_tls: bool = False
smtp_from_address: str = "noreply@example.com"
smtp_provider: str = "custom"
class SMTPTestRequest(BaseModel):
to: str
smtp_host: Optional[str] = None
smtp_port: Optional[int] = None
smtp_user: Optional[str] = None
smtp_password: Optional[str] = None
smtp_use_tls: Optional[bool] = None
smtp_from_address: Optional[str] = None
async def _get_system_settings(keys: list[str]) -> dict:
"""Read settings from system_settings table."""
async with AdminAsyncSessionLocal() as session:
result = await session.execute(
text("SELECT key, value FROM system_settings WHERE key = ANY(:keys)"),
{"keys": keys},
)
return {row[0]: row[1] for row in result.fetchall()}
async def _set_system_settings(updates: dict, user_id: str) -> None:
"""Upsert settings into system_settings table."""
async with AdminAsyncSessionLocal() as session:
for key, value in updates.items():
await session.execute(
text("""
INSERT INTO system_settings (key, value, updated_by, updated_at)
VALUES (:key, :value, CAST(:user_id AS uuid), now())
ON CONFLICT (key) DO UPDATE
SET value = :value, updated_by = CAST(:user_id AS uuid), updated_at = now()
"""),
{"key": key, "value": str(value) if value is not None else None, "user_id": user_id},
)
await session.commit()
async def get_smtp_config() -> SMTPConfig:
"""Get SMTP config from system_settings, falling back to .env."""
db_settings = await _get_system_settings(SMTP_KEYS)
return SMTPConfig(
host=db_settings.get("smtp_host") or settings.SMTP_HOST,
port=int(db_settings.get("smtp_port") or settings.SMTP_PORT),
user=db_settings.get("smtp_user") or settings.SMTP_USER,
password=db_settings.get("smtp_password") or settings.SMTP_PASSWORD,
use_tls=(db_settings.get("smtp_use_tls") or str(settings.SMTP_USE_TLS)).lower() == "true",
from_address=db_settings.get("smtp_from_address") or settings.SMTP_FROM_ADDRESS,
)
@router.get("/smtp")
async def get_smtp_settings(user=Depends(require_role("super_admin"))):
"""Get current global SMTP configuration. Password is redacted."""
db_settings = await _get_system_settings(SMTP_KEYS)
return {
"smtp_host": db_settings.get("smtp_host") or settings.SMTP_HOST,
"smtp_port": int(db_settings.get("smtp_port") or settings.SMTP_PORT),
"smtp_user": db_settings.get("smtp_user") or settings.SMTP_USER or "",
"smtp_use_tls": (db_settings.get("smtp_use_tls") or str(settings.SMTP_USE_TLS)).lower() == "true",
"smtp_from_address": db_settings.get("smtp_from_address") or settings.SMTP_FROM_ADDRESS,
"smtp_provider": db_settings.get("smtp_provider") or "custom",
"smtp_password_set": bool(db_settings.get("smtp_password") or settings.SMTP_PASSWORD),
"source": "database" if db_settings.get("smtp_host") else "environment",
}
@router.put("/smtp")
async def update_smtp_settings(
data: SMTPSettingsUpdate,
user=Depends(require_role("super_admin")),
):
"""Update global SMTP configuration."""
updates = {
"smtp_host": data.smtp_host,
"smtp_port": str(data.smtp_port),
"smtp_user": data.smtp_user,
"smtp_use_tls": str(data.smtp_use_tls).lower(),
"smtp_from_address": data.smtp_from_address,
"smtp_provider": data.smtp_provider,
}
if data.smtp_password is not None:
updates["smtp_password"] = data.smtp_password
await _set_system_settings(updates, str(user.id))
return {"status": "ok"}
@router.post("/smtp/test")
async def test_smtp(
data: SMTPTestRequest,
user=Depends(require_role("super_admin")),
):
"""Test SMTP connection and optionally send a test email."""
# Use provided values or fall back to saved config
saved = await get_smtp_config()
config = SMTPConfig(
host=data.smtp_host or saved.host,
port=data.smtp_port if data.smtp_port is not None else saved.port,
user=data.smtp_user if data.smtp_user is not None else saved.user,
password=data.smtp_password if data.smtp_password is not None else saved.password,
use_tls=data.smtp_use_tls if data.smtp_use_tls is not None else saved.use_tls,
from_address=data.smtp_from_address or saved.from_address,
)
conn_result = await test_smtp_connection(config)
if not conn_result["success"]:
return conn_result
if data.to:
return await send_test_email(data.to, config)
return conn_result

141
backend/app/routers/sse.py Normal file
View File

@@ -0,0 +1,141 @@
"""SSE streaming endpoint for real-time event delivery.
Provides a Server-Sent Events endpoint per tenant that streams device status,
alert, config push, and firmware progress events in real time. Authentication
is via a short-lived, single-use exchange token (obtained from POST /auth/sse-token)
to avoid exposing the full JWT in query parameters.
"""
import asyncio
import json
import uuid
from typing import AsyncGenerator, Optional
import redis.asyncio as aioredis
import structlog
from fastapi import APIRouter, HTTPException, Query, Request, status
from sse_starlette.sse import EventSourceResponse, ServerSentEvent
from app.services.sse_manager import SSEConnectionManager
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["sse"])
# ─── Redis for SSE token validation ───────────────────────────────────────────
_redis: aioredis.Redis | None = None
async def _get_sse_redis() -> aioredis.Redis:
"""Lazily initialise and return the SSE Redis client."""
global _redis
if _redis is None:
from app.config import settings
_redis = aioredis.from_url(settings.REDIS_URL, decode_responses=True)
return _redis
async def _validate_sse_token(token: str) -> dict:
"""Validate a short-lived SSE exchange token via Redis.
The token is single-use: retrieved and deleted atomically with GETDEL.
If the token is not found (expired or already used), raises 401.
Args:
token: SSE exchange token string (from query param).
Returns:
Dict with user_id, tenant_id, and role.
Raises:
HTTPException 401: If the token is invalid, expired, or already used.
"""
redis = await _get_sse_redis()
key = f"sse_token:{token}"
data = await redis.getdel(key) # Single-use: delete on retrieval
if not data:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired SSE token",
)
return json.loads(data)
@router.get(
"/tenants/{tenant_id}/events/stream",
summary="SSE event stream for real-time tenant events",
response_class=EventSourceResponse,
)
async def event_stream(
request: Request,
tenant_id: uuid.UUID,
token: str = Query(..., description="Short-lived SSE exchange token (from POST /auth/sse-token)"),
) -> EventSourceResponse:
"""Stream real-time events for a tenant via Server-Sent Events.
Event types: device_status, alert_fired, alert_resolved, config_push,
firmware_progress, metric_update.
Supports Last-Event-ID header for reconnection replay.
Sends heartbeat comments every 15 seconds on idle connections.
"""
# Validate exchange token from query parameter (single-use, 30s TTL)
user_context = await _validate_sse_token(token)
user_role = user_context.get("role", "")
user_tenant_id = user_context.get("tenant_id")
user_id = user_context.get("user_id", "")
# Authorization: user must belong to the requested tenant or be super_admin
if user_role != "super_admin" and (user_tenant_id is None or str(user_tenant_id) != str(tenant_id)):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not authorized for this tenant",
)
# super_admin receives events from ALL tenants (tenant_id filter = None)
filter_tenant_id: Optional[str] = None if user_role == "super_admin" else str(tenant_id)
# Generate unique connection ID
connection_id = f"sse-{uuid.uuid4().hex[:12]}"
# Check for Last-Event-ID header (reconnection replay)
last_event_id = request.headers.get("Last-Event-ID")
logger.info(
"sse.stream_requested",
connection_id=connection_id,
tenant_id=str(tenant_id),
user_id=user_id,
role=user_role,
last_event_id=last_event_id,
)
manager = SSEConnectionManager()
queue = await manager.connect(
connection_id=connection_id,
tenant_id=filter_tenant_id,
last_event_id=last_event_id,
)
async def event_generator() -> AsyncGenerator[ServerSentEvent, None]:
"""Yield SSE events from the queue with 15s heartbeat on idle."""
try:
while True:
try:
event = await asyncio.wait_for(queue.get(), timeout=15.0)
yield ServerSentEvent(
data=event["data"],
event=event["event"],
id=event["id"],
)
except asyncio.TimeoutError:
# Send heartbeat comment to keep connection alive
yield ServerSentEvent(comment="heartbeat")
except asyncio.CancelledError:
break
finally:
await manager.disconnect()
logger.info("sse.stream_closed", connection_id=connection_id)
return EventSourceResponse(event_generator())

View File

@@ -0,0 +1,613 @@
"""
Config template CRUD, preview, and push API endpoints.
All routes are tenant-scoped under:
/api/tenants/{tenant_id}/templates/
Provides:
- GET /templates -- list templates (optional tag filter)
- POST /templates -- create a template
- GET /templates/{id} -- get single template
- PUT /templates/{id} -- update a template
- DELETE /templates/{id} -- delete a template
- POST /templates/{id}/preview -- preview rendered template for a device
- POST /templates/{id}/push -- push template to devices (sequential rollout)
- GET /templates/push-status/{rollout_id} -- poll push progress
RLS is enforced via get_db() (app_user engine with tenant context).
RBAC: viewer = read (GET/preview); operator and above = write (POST/PUT/DELETE/push).
"""
import asyncio
import logging
import uuid
from datetime import datetime, timezone
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.middleware.rate_limit import limiter
from app.middleware.rbac import require_min_role, require_scope
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.config_template import ConfigTemplate, ConfigTemplateTag, TemplatePushJob
from app.models.device import Device
from app.services import template_service
logger = logging.getLogger(__name__)
router = APIRouter(tags=["templates"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
from app.database import set_tenant_context
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
def _serialize_template(template: ConfigTemplate, include_content: bool = False) -> dict:
"""Serialize a ConfigTemplate to a response dict."""
result: dict[str, Any] = {
"id": str(template.id),
"name": template.name,
"description": template.description,
"tags": [tag.name for tag in template.tags],
"variable_count": len(template.variables) if template.variables else 0,
"created_at": template.created_at.isoformat(),
"updated_at": template.updated_at.isoformat(),
}
if include_content:
result["content"] = template.content
result["variables"] = template.variables or []
return result
# ---------------------------------------------------------------------------
# Request/Response schemas
# ---------------------------------------------------------------------------
class VariableDef(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
type: str = "string" # string | ip | integer | boolean | subnet
default: Optional[str] = None
description: Optional[str] = None
class TemplateCreateRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
description: Optional[str] = None
content: str
variables: list[VariableDef] = []
tags: list[str] = []
class TemplateUpdateRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
description: Optional[str] = None
content: str
variables: list[VariableDef] = []
tags: list[str] = []
class PreviewRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_id: str
variables: dict[str, str] = {}
class PushRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
device_ids: list[str]
variables: dict[str, str] = {}
# ---------------------------------------------------------------------------
# CRUD endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/templates",
summary="List config templates",
dependencies=[require_scope("config:read")],
)
async def list_templates(
tenant_id: uuid.UUID,
tag: Optional[str] = Query(None, description="Filter by tag name"),
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> list[dict]:
"""List all config templates for a tenant with optional tag filtering."""
await _check_tenant_access(current_user, tenant_id, db)
query = (
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(ConfigTemplate.tenant_id == tenant_id) # type: ignore[arg-type]
.order_by(ConfigTemplate.updated_at.desc())
)
if tag:
query = query.where(
ConfigTemplate.id.in_( # type: ignore[attr-defined]
select(ConfigTemplateTag.template_id).where(
ConfigTemplateTag.name == tag,
ConfigTemplateTag.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
)
result = await db.execute(query)
templates = result.scalars().all()
return [_serialize_template(t) for t in templates]
@router.post(
"/tenants/{tenant_id}/templates",
summary="Create a config template",
status_code=status.HTTP_201_CREATED,
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def create_template(
request: Request,
tenant_id: uuid.UUID,
body: TemplateCreateRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Create a new config template with Jinja2 content and variable definitions."""
await _check_tenant_access(current_user, tenant_id, db)
# Auto-extract variables from content for comparison
detected = template_service.extract_variables(body.content)
provided_names = {v.name for v in body.variables}
unmatched = set(detected) - provided_names
if unmatched:
logger.warning(
"Template '%s' has undeclared variables: %s (auto-adding as string type)",
body.name, unmatched,
)
# Create template
template = ConfigTemplate(
tenant_id=tenant_id,
name=body.name,
description=body.description,
content=body.content,
variables=[v.model_dump() for v in body.variables],
)
db.add(template)
await db.flush() # Get the generated ID
# Create tags
for tag_name in body.tags:
tag = ConfigTemplateTag(
tenant_id=tenant_id,
name=tag_name,
template_id=template.id,
)
db.add(tag)
await db.flush()
# Re-query with tags loaded
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(ConfigTemplate.id == template.id) # type: ignore[arg-type]
)
template = result.scalar_one()
return _serialize_template(template, include_content=True)
@router.get(
"/tenants/{tenant_id}/templates/{template_id}",
summary="Get a single config template",
dependencies=[require_scope("config:read")],
)
async def get_template(
tenant_id: uuid.UUID,
template_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Get a config template with full content, variables, and tags."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
return _serialize_template(template, include_content=True)
@router.put(
"/tenants/{tenant_id}/templates/{template_id}",
summary="Update a config template",
dependencies=[require_scope("config:write")],
)
@limiter.limit("20/minute")
async def update_template(
request: Request,
tenant_id: uuid.UUID,
template_id: uuid.UUID,
body: TemplateUpdateRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Update an existing config template."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
# Update fields
template.name = body.name
template.description = body.description
template.content = body.content
template.variables = [v.model_dump() for v in body.variables]
# Replace tags: delete old, create new
await db.execute(
delete(ConfigTemplateTag).where(
ConfigTemplateTag.template_id == template_id # type: ignore[arg-type]
)
)
for tag_name in body.tags:
tag = ConfigTemplateTag(
tenant_id=tenant_id,
name=tag_name,
template_id=template.id,
)
db.add(tag)
await db.flush()
# Re-query with fresh tags
result = await db.execute(
select(ConfigTemplate)
.options(selectinload(ConfigTemplate.tags))
.where(ConfigTemplate.id == template.id) # type: ignore[arg-type]
)
template = result.scalar_one()
return _serialize_template(template, include_content=True)
@router.delete(
"/tenants/{tenant_id}/templates/{template_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete a config template",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def delete_template(
request: Request,
tenant_id: uuid.UUID,
template_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> None:
"""Delete a config template. Tags are cascade-deleted. Push jobs are SET NULL."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(ConfigTemplate).where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
await db.delete(template)
# ---------------------------------------------------------------------------
# Preview & Push endpoints
# ---------------------------------------------------------------------------
@router.post(
"/tenants/{tenant_id}/templates/{template_id}/preview",
summary="Preview template rendered for a specific device",
dependencies=[require_scope("config:read")],
)
async def preview_template(
tenant_id: uuid.UUID,
template_id: uuid.UUID,
body: PreviewRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Render a template with device context and custom variables for preview."""
await _check_tenant_access(current_user, tenant_id, db)
# Load template
result = await db.execute(
select(ConfigTemplate).where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
# Load device
result = await db.execute(
select(Device).where(Device.id == body.device_id) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {body.device_id} not found",
)
# Validate variables against type definitions
if template.variables:
for var_def in template.variables:
var_name = var_def.get("name", "")
var_type = var_def.get("type", "string")
value = body.variables.get(var_name)
if value is None:
# Use default if available
default = var_def.get("default")
if default is not None:
body.variables[var_name] = default
continue
error = template_service.validate_variable(var_name, value, var_type)
if error:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=error,
)
# Render
try:
rendered = template_service.render_template(
template.content,
{
"hostname": device.hostname,
"ip_address": device.ip_address,
"model": device.model,
},
body.variables,
)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Template rendering failed: {exc}",
)
return {
"rendered": rendered,
"device_hostname": device.hostname,
}
@router.post(
"/tenants/{tenant_id}/templates/{template_id}/push",
summary="Push template to devices (sequential rollout with panic-revert)",
dependencies=[require_scope("config:write")],
)
@limiter.limit("5/minute")
async def push_template(
request: Request,
tenant_id: uuid.UUID,
template_id: uuid.UUID,
body: PushRequest,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("operator")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Start a template push to one or more devices.
Creates push jobs for each device and starts a background sequential rollout.
Returns the rollout_id for status polling.
"""
await _check_tenant_access(current_user, tenant_id, db)
# Load template
result = await db.execute(
select(ConfigTemplate).where(
ConfigTemplate.id == template_id, # type: ignore[arg-type]
ConfigTemplate.tenant_id == tenant_id, # type: ignore[arg-type]
)
)
template = result.scalar_one_or_none()
if template is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Template {template_id} not found",
)
if not body.device_ids:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="At least one device_id is required",
)
# Validate variables
if template.variables:
for var_def in template.variables:
var_name = var_def.get("name", "")
var_type = var_def.get("type", "string")
value = body.variables.get(var_name)
if value is None:
default = var_def.get("default")
if default is not None:
body.variables[var_name] = default
continue
error = template_service.validate_variable(var_name, value, var_type)
if error:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=error,
)
rollout_id = uuid.uuid4()
jobs_created = []
for device_id_str in body.device_ids:
# Load device to render template per-device
result = await db.execute(
select(Device).where(Device.id == device_id_str) # type: ignore[arg-type]
)
device = result.scalar_one_or_none()
if device is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Device {device_id_str} not found",
)
# Render template with this device's context
try:
rendered = template_service.render_template(
template.content,
{
"hostname": device.hostname,
"ip_address": device.ip_address,
"model": device.model,
},
body.variables,
)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Template rendering failed for device {device.hostname}: {exc}",
)
# Create push job
job = TemplatePushJob(
tenant_id=tenant_id,
template_id=template_id,
device_id=device.id,
rollout_id=rollout_id,
rendered_content=rendered,
status="pending",
)
db.add(job)
jobs_created.append({
"job_id": str(job.id),
"device_id": str(device.id),
"device_hostname": device.hostname,
})
await db.flush()
# Start background push task
asyncio.create_task(template_service.push_to_devices(str(rollout_id)))
return {
"rollout_id": str(rollout_id),
"jobs": jobs_created,
}
@router.get(
"/tenants/{tenant_id}/templates/push-status/{rollout_id}",
summary="Poll push progress for a rollout",
dependencies=[require_scope("config:read")],
)
async def push_status(
tenant_id: uuid.UUID,
rollout_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> dict:
"""Return all push job statuses for a rollout with device hostnames."""
await _check_tenant_access(current_user, tenant_id, db)
result = await db.execute(
select(TemplatePushJob, Device.hostname)
.join(Device, TemplatePushJob.device_id == Device.id) # type: ignore[arg-type]
.where(
TemplatePushJob.rollout_id == rollout_id, # type: ignore[arg-type]
TemplatePushJob.tenant_id == tenant_id, # type: ignore[arg-type]
)
.order_by(TemplatePushJob.created_at.asc())
)
rows = result.all()
jobs = []
for job, hostname in rows:
jobs.append({
"device_id": str(job.device_id),
"hostname": hostname,
"status": job.status,
"error_message": job.error_message,
"started_at": job.started_at.isoformat() if job.started_at else None,
"completed_at": job.completed_at.isoformat() if job.completed_at else None,
})
return {
"rollout_id": str(rollout_id),
"jobs": jobs,
}

View File

@@ -0,0 +1,367 @@
"""
Tenant management endpoints.
GET /api/tenants — list tenants (super_admin: all; tenant_admin: own only)
POST /api/tenants — create tenant (super_admin only)
GET /api/tenants/{id} — get tenant detail
PUT /api/tenants/{id} — update tenant (super_admin only)
DELETE /api/tenants/{id} — delete tenant (super_admin only)
"""
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy import func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.middleware.rate_limit import limiter
from app.database import get_admin_db, get_db
from app.middleware.rbac import require_super_admin, require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser
from app.models.device import Device
from app.models.tenant import Tenant
from app.models.user import User
from app.schemas.tenant import TenantCreate, TenantResponse, TenantUpdate
router = APIRouter(prefix="/tenants", tags=["tenants"])
async def _get_tenant_response(
tenant: Tenant,
db: AsyncSession,
) -> TenantResponse:
"""Build a TenantResponse with user and device counts."""
user_count_result = await db.execute(
select(func.count(User.id)).where(User.tenant_id == tenant.id)
)
user_count = user_count_result.scalar_one() or 0
device_count_result = await db.execute(
select(func.count(Device.id)).where(Device.tenant_id == tenant.id)
)
device_count = device_count_result.scalar_one() or 0
return TenantResponse(
id=tenant.id,
name=tenant.name,
description=tenant.description,
contact_email=tenant.contact_email,
user_count=user_count,
device_count=device_count,
created_at=tenant.created_at,
)
@router.get("", response_model=list[TenantResponse], summary="List tenants")
async def list_tenants(
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> list[TenantResponse]:
"""
List tenants.
- super_admin: sees all tenants
- tenant_admin: sees only their own tenant
"""
if current_user.is_super_admin:
result = await db.execute(select(Tenant).order_by(Tenant.name))
tenants = result.scalars().all()
else:
if not current_user.tenant_id:
return []
result = await db.execute(
select(Tenant).where(Tenant.id == current_user.tenant_id)
)
tenants = result.scalars().all()
return [await _get_tenant_response(tenant, db) for tenant in tenants]
@router.post("", response_model=TenantResponse, status_code=status.HTTP_201_CREATED, summary="Create a tenant")
@limiter.limit("20/minute")
async def create_tenant(
request: Request,
data: TenantCreate,
current_user: CurrentUser = Depends(require_super_admin),
db: AsyncSession = Depends(get_admin_db),
) -> TenantResponse:
"""Create a new tenant (super_admin only)."""
# Check for name uniqueness
existing = await db.execute(select(Tenant).where(Tenant.name == data.name))
if existing.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Tenant with name '{data.name}' already exists",
)
tenant = Tenant(name=data.name, description=data.description, contact_email=data.contact_email)
db.add(tenant)
await db.commit()
await db.refresh(tenant)
# Seed default alert rules for new tenant
default_rules = [
("High CPU Usage", "cpu_load", "gt", 90, 5, "warning"),
("High Memory Usage", "memory_used_pct", "gt", 90, 5, "warning"),
("High Disk Usage", "disk_used_pct", "gt", 85, 3, "warning"),
("Device Offline", "device_offline", "eq", 1, 1, "critical"),
]
for name, metric, operator, threshold, duration, sev in default_rules:
await db.execute(text("""
INSERT INTO alert_rules (id, tenant_id, name, metric, operator, threshold, duration_polls, severity, enabled, is_default)
VALUES (gen_random_uuid(), CAST(:tenant_id AS uuid), :name, :metric, :operator, :threshold, :duration, :severity, TRUE, TRUE)
"""), {
"tenant_id": str(tenant.id), "name": name, "metric": metric,
"operator": operator, "threshold": threshold, "duration": duration, "severity": sev,
})
await db.commit()
# Seed starter config templates for new tenant
await _seed_starter_templates(db, tenant.id)
await db.commit()
# Provision OpenBao Transit key for the new tenant (non-blocking)
try:
from app.config import settings
from app.services.key_service import provision_tenant_key
if settings.OPENBAO_ADDR:
await provision_tenant_key(db, tenant.id)
await db.commit()
except Exception as exc:
import logging
logging.getLogger(__name__).warning(
"OpenBao key provisioning failed for tenant %s (will be provisioned on next startup): %s",
tenant.id,
exc,
)
return await _get_tenant_response(tenant, db)
@router.get("/{tenant_id}", response_model=TenantResponse, summary="Get tenant detail")
async def get_tenant(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> TenantResponse:
"""Get tenant detail. Tenant admins can only view their own tenant."""
# Enforce tenant_admin can only see their own tenant
if not current_user.is_super_admin and current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
return await _get_tenant_response(tenant, db)
@router.put("/{tenant_id}", response_model=TenantResponse, summary="Update a tenant")
@limiter.limit("20/minute")
async def update_tenant(
request: Request,
tenant_id: uuid.UUID,
data: TenantUpdate,
current_user: CurrentUser = Depends(require_super_admin),
db: AsyncSession = Depends(get_admin_db),
) -> TenantResponse:
"""Update tenant (super_admin only)."""
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
if data.name is not None:
# Check name uniqueness
name_check = await db.execute(
select(Tenant).where(Tenant.name == data.name, Tenant.id != tenant_id)
)
if name_check.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Tenant with name '{data.name}' already exists",
)
tenant.name = data.name
if data.description is not None:
tenant.description = data.description
if data.contact_email is not None:
tenant.contact_email = data.contact_email
await db.commit()
await db.refresh(tenant)
return await _get_tenant_response(tenant, db)
@router.delete("/{tenant_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Delete a tenant")
@limiter.limit("5/minute")
async def delete_tenant(
request: Request,
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(require_super_admin),
db: AsyncSession = Depends(get_admin_db),
) -> None:
"""Delete tenant (super_admin only). Cascades to all users and devices."""
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
await db.delete(tenant)
await db.commit()
# ---------------------------------------------------------------------------
# Starter template seeding
# ---------------------------------------------------------------------------
_STARTER_TEMPLATES = [
{
"name": "Basic Router",
"description": "Complete SOHO/branch router setup: WAN on ether1, LAN bridge, DHCP, DNS, NAT, basic firewall",
"content": """/interface bridge add name=bridge-lan comment="LAN bridge"
/interface bridge port add bridge=bridge-lan interface=ether2
/interface bridge port add bridge=bridge-lan interface=ether3
/interface bridge port add bridge=bridge-lan interface=ether4
/interface bridge port add bridge=bridge-lan interface=ether5
# WAN — DHCP client on ether1
/ip dhcp-client add interface={{ wan_interface }} disabled=no comment="WAN uplink"
# LAN address
/ip address add address={{ lan_gateway }}/{{ lan_cidr }} interface=bridge-lan
# DNS
/ip dns set servers={{ dns_servers }} allow-remote-requests=yes
# DHCP server for LAN
/ip pool add name=lan-pool ranges={{ dhcp_start }}-{{ dhcp_end }}
/ip dhcp-server network add address={{ lan_network }}/{{ lan_cidr }} gateway={{ lan_gateway }} dns-server={{ lan_gateway }}
/ip dhcp-server add name=lan-dhcp interface=bridge-lan address-pool=lan-pool disabled=no
# NAT masquerade
/ip firewall nat add chain=srcnat out-interface={{ wan_interface }} action=masquerade
# Firewall — input chain
/ip firewall filter
add chain=input connection-state=established,related action=accept
add chain=input connection-state=invalid action=drop
add chain=input in-interface={{ wan_interface }} action=drop comment="Drop all other WAN input"
# Firewall — forward chain
add chain=forward connection-state=established,related action=accept
add chain=forward connection-state=invalid action=drop
add chain=forward in-interface=bridge-lan out-interface={{ wan_interface }} action=accept comment="Allow LAN to WAN"
add chain=forward action=drop comment="Drop everything else"
# NTP
/system ntp client set enabled=yes servers={{ ntp_server }}
# Identity
/system identity set name={{ device.hostname }}""",
"variables": [
{"name": "wan_interface", "type": "string", "default": "ether1", "description": "WAN-facing interface"},
{"name": "lan_gateway", "type": "ip", "default": "192.168.88.1", "description": "LAN gateway IP"},
{"name": "lan_cidr", "type": "integer", "default": "24", "description": "LAN subnet mask bits"},
{"name": "lan_network", "type": "ip", "default": "192.168.88.0", "description": "LAN network address"},
{"name": "dhcp_start", "type": "ip", "default": "192.168.88.100", "description": "DHCP pool start"},
{"name": "dhcp_end", "type": "ip", "default": "192.168.88.254", "description": "DHCP pool end"},
{"name": "dns_servers", "type": "string", "default": "8.8.8.8,8.8.4.4", "description": "Upstream DNS servers"},
{"name": "ntp_server", "type": "string", "default": "pool.ntp.org", "description": "NTP server"},
],
},
{
"name": "Basic Firewall",
"description": "Standard firewall ruleset with WAN protection and LAN forwarding",
"content": """/ip firewall filter
add chain=input connection-state=established,related action=accept
add chain=input connection-state=invalid action=drop
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=8291 action=drop comment="Block Winbox from WAN"
add chain=input in-interface={{ wan_interface }} protocol=tcp dst-port=22 action=drop comment="Block SSH from WAN"
add chain=forward connection-state=established,related action=accept
add chain=forward connection-state=invalid action=drop
add chain=forward src-address={{ allowed_network }} action=accept
add chain=forward action=drop""",
"variables": [
{"name": "wan_interface", "type": "string", "default": "ether1", "description": "WAN-facing interface"},
{"name": "allowed_network", "type": "subnet", "default": "192.168.88.0/24", "description": "Allowed source network"},
],
},
{
"name": "DHCP Server Setup",
"description": "Configure DHCP server with address pool, DNS, and gateway",
"content": """/ip pool add name=dhcp-pool ranges={{ pool_start }}-{{ pool_end }}
/ip dhcp-server network add address={{ gateway }}/24 gateway={{ gateway }} dns-server={{ dns_server }}
/ip dhcp-server add name=dhcp1 interface={{ interface }} address-pool=dhcp-pool disabled=no""",
"variables": [
{"name": "pool_start", "type": "ip", "default": "192.168.88.100", "description": "DHCP pool start address"},
{"name": "pool_end", "type": "ip", "default": "192.168.88.254", "description": "DHCP pool end address"},
{"name": "gateway", "type": "ip", "default": "192.168.88.1", "description": "Default gateway"},
{"name": "dns_server", "type": "ip", "default": "8.8.8.8", "description": "DNS server address"},
{"name": "interface", "type": "string", "default": "bridge-lan", "description": "Interface to serve DHCP on"},
],
},
{
"name": "Wireless AP Config",
"description": "Configure wireless access point with WPA2 security",
"content": """/interface wireless security-profiles add name=portal-wpa2 mode=dynamic-keys authentication-types=wpa2-psk wpa2-pre-shared-key={{ password }}
/interface wireless set wlan1 mode=ap-bridge ssid={{ ssid }} security-profile=portal-wpa2 frequency={{ frequency }} channel-width={{ channel_width }} disabled=no""",
"variables": [
{"name": "ssid", "type": "string", "default": "MikroTik-AP", "description": "Wireless network name"},
{"name": "password", "type": "string", "default": "", "description": "WPA2 pre-shared key (min 8 characters)"},
{"name": "frequency", "type": "integer", "default": "2412", "description": "Wireless frequency in MHz"},
{"name": "channel_width", "type": "string", "default": "20/40mhz-XX", "description": "Channel width setting"},
],
},
{
"name": "Initial Device Setup",
"description": "Set device identity, NTP, DNS, and disable unused services",
"content": """/system identity set name={{ device.hostname }}
/system ntp client set enabled=yes servers={{ ntp_server }}
/ip dns set servers={{ dns_servers }} allow-remote-requests=no
/ip service disable telnet,ftp,www,api-ssl
/ip service set ssh port=22
/ip service set winbox port=8291""",
"variables": [
{"name": "ntp_server", "type": "ip", "default": "pool.ntp.org", "description": "NTP server address"},
{"name": "dns_servers", "type": "string", "default": "8.8.8.8,8.8.4.4", "description": "Comma-separated DNS servers"},
],
},
]
async def _seed_starter_templates(db, tenant_id) -> None:
"""Insert starter config templates for a newly created tenant."""
import json as _json
for tmpl in _STARTER_TEMPLATES:
await db.execute(text("""
INSERT INTO config_templates (id, tenant_id, name, description, content, variables)
VALUES (gen_random_uuid(), CAST(:tid AS uuid), :name, :desc, :content, CAST(:vars AS jsonb))
"""), {
"tid": str(tenant_id),
"name": tmpl["name"],
"desc": tmpl["description"],
"content": tmpl["content"],
"vars": _json.dumps(tmpl["variables"]),
})

View File

@@ -0,0 +1,374 @@
"""
Network topology inference endpoint.
Endpoint: GET /api/tenants/{tenant_id}/topology
Builds a topology graph of managed devices by:
1. Querying all devices for the tenant (via RLS)
2. Fetching /ip/neighbor tables from online devices via NATS
3. Matching neighbor addresses to known devices
4. Falling back to shared /24 subnet inference when neighbor data is unavailable
5. Caching results in Redis with 5-minute TTL
"""
import asyncio
import ipaddress
import json
import logging
import uuid
from typing import Any
import redis.asyncio as aioredis
import structlog
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db, set_tenant_context
from app.middleware.rbac import require_min_role
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.models.vpn import VpnPeer
from app.services import routeros_proxy
logger = structlog.get_logger(__name__)
router = APIRouter(tags=["topology"])
# ---------------------------------------------------------------------------
# Redis connection (lazy initialized, same pattern as routeros_proxy NATS)
# ---------------------------------------------------------------------------
_redis: aioredis.Redis | None = None
TOPOLOGY_CACHE_TTL = 300 # 5 minutes
async def _get_redis() -> aioredis.Redis:
"""Get or create a Redis connection for topology caching."""
global _redis
if _redis is None:
_redis = aioredis.from_url(settings.REDIS_URL, decode_responses=True)
logger.info("Topology Redis connection established")
return _redis
# ---------------------------------------------------------------------------
# Response schemas
# ---------------------------------------------------------------------------
class TopologyNode(BaseModel):
id: str
hostname: str
ip: str
status: str
model: str | None
uptime: str | None
class TopologyEdge(BaseModel):
source: str
target: str
label: str
class TopologyResponse(BaseModel):
nodes: list[TopologyNode]
edges: list[TopologyEdge]
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
return
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied: you do not belong to this tenant.",
)
def _format_uptime(seconds: int | None) -> str | None:
"""Convert uptime seconds to a human-readable string."""
if seconds is None:
return None
days = seconds // 86400
hours = (seconds % 86400) // 3600
minutes = (seconds % 3600) // 60
if days > 0:
return f"{days}d {hours}h {minutes}m"
if hours > 0:
return f"{hours}h {minutes}m"
return f"{minutes}m"
def _get_subnet_key(ip_str: str) -> str | None:
"""Return the /24 network key for an IPv4 address, or None if invalid."""
try:
addr = ipaddress.ip_address(ip_str)
if isinstance(addr, ipaddress.IPv4Address):
network = ipaddress.ip_network(f"{ip_str}/24", strict=False)
return str(network)
except ValueError:
pass
return None
def _build_edges_from_neighbors(
neighbor_data: dict[str, list[dict[str, Any]]],
ip_to_device: dict[str, str],
) -> list[TopologyEdge]:
"""Build topology edges from neighbor discovery results.
Args:
neighbor_data: Mapping of device_id -> list of neighbor entries.
ip_to_device: Mapping of IP address -> device_id for known devices.
Returns:
De-duplicated list of topology edges.
"""
seen_edges: set[tuple[str, str]] = set()
edges: list[TopologyEdge] = []
for device_id, neighbors in neighbor_data.items():
for neighbor in neighbors:
# RouterOS neighbor entry has 'address' (or 'address4') field
neighbor_ip = neighbor.get("address") or neighbor.get("address4", "")
if not neighbor_ip:
continue
target_device_id = ip_to_device.get(neighbor_ip)
if target_device_id is None or target_device_id == device_id:
continue
# De-duplicate bidirectional edges (A->B and B->A become one edge)
edge_key = tuple(sorted([device_id, target_device_id]))
if edge_key in seen_edges:
continue
seen_edges.add(edge_key)
interface_name = neighbor.get("interface", "neighbor")
edges.append(
TopologyEdge(
source=device_id,
target=target_device_id,
label=interface_name,
)
)
return edges
def _build_edges_from_subnets(
devices: list[Device],
existing_connected: set[tuple[str, str]],
) -> list[TopologyEdge]:
"""Infer edges from shared /24 subnets for devices without neighbor data.
Only adds subnet-based edges for device pairs that are NOT already connected
via neighbor discovery.
"""
# Group devices by /24 subnet
subnet_groups: dict[str, list[str]] = {}
for device in devices:
subnet_key = _get_subnet_key(device.ip_address)
if subnet_key:
subnet_groups.setdefault(subnet_key, []).append(str(device.id))
edges: list[TopologyEdge] = []
for subnet, device_ids in subnet_groups.items():
if len(device_ids) < 2:
continue
# Connect all pairs in the subnet
for i, src in enumerate(device_ids):
for tgt in device_ids[i + 1 :]:
edge_key = tuple(sorted([src, tgt]))
if edge_key in existing_connected:
continue
edges.append(
TopologyEdge(
source=src,
target=tgt,
label="shared subnet",
)
)
existing_connected.add(edge_key)
return edges
# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/topology",
response_model=TopologyResponse,
summary="Get network topology for a tenant",
)
async def get_topology(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
_role: CurrentUser = Depends(require_min_role("viewer")),
db: AsyncSession = Depends(get_db),
) -> TopologyResponse:
"""Build and return a network topology graph for the given tenant.
The topology is inferred from:
1. LLDP/CDP/MNDP neighbor discovery on online devices
2. Shared /24 subnet fallback for devices without neighbor data
Results are cached in Redis with a 5-minute TTL.
"""
await _check_tenant_access(current_user, tenant_id, db)
cache_key = f"topology:{tenant_id}"
# Check Redis cache
try:
rd = await _get_redis()
cached = await rd.get(cache_key)
if cached:
data = json.loads(cached)
return TopologyResponse(**data)
except Exception as exc:
logger.warning("Redis cache read failed, computing topology fresh", error=str(exc))
# Fetch all devices for tenant (RLS enforced via get_db)
result = await db.execute(
select(
Device.id,
Device.hostname,
Device.ip_address,
Device.status,
Device.model,
Device.uptime_seconds,
)
)
rows = result.all()
if not rows:
return TopologyResponse(nodes=[], edges=[])
# Build nodes
nodes: list[TopologyNode] = []
ip_to_device: dict[str, str] = {}
online_device_ids: list[str] = []
devices_by_id: dict[str, Any] = {}
for row in rows:
device_id = str(row.id)
nodes.append(
TopologyNode(
id=device_id,
hostname=row.hostname,
ip=row.ip_address,
status=row.status,
model=row.model,
uptime=_format_uptime(row.uptime_seconds),
)
)
ip_to_device[row.ip_address] = device_id
if row.status == "online":
online_device_ids.append(device_id)
# Fetch neighbor tables from online devices in parallel
neighbor_data: dict[str, list[dict[str, Any]]] = {}
if online_device_ids:
tasks = [
routeros_proxy.execute_command(
device_id, "/ip/neighbor/print", timeout=10.0
)
for device_id in online_device_ids
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for device_id, res in zip(online_device_ids, results):
if isinstance(res, Exception):
logger.warning(
"Neighbor fetch failed",
device_id=device_id,
error=str(res),
)
continue
if isinstance(res, dict) and res.get("success") and res.get("data"):
neighbor_data[device_id] = res["data"]
# Build edges from neighbor discovery
neighbor_edges = _build_edges_from_neighbors(neighbor_data, ip_to_device)
# Track connected pairs for subnet fallback
connected_pairs: set[tuple[str, str]] = set()
for edge in neighbor_edges:
connected_pairs.add(tuple(sorted([edge.source, edge.target])))
# VPN-based edges: query WireGuard peers to infer hub-spoke topology.
# VPN peers all connect to the same WireGuard server. The gateway device
# is the managed device NOT in the VPN peers list (it's the server, not a
# client). If found, create star edges from gateway to each VPN peer device.
vpn_edges: list[TopologyEdge] = []
vpn_peer_device_ids: set[str] = set()
try:
peer_result = await db.execute(
select(VpnPeer.device_id).where(VpnPeer.is_enabled.is_(True))
)
vpn_peer_device_ids = {str(row[0]) for row in peer_result.all()}
if vpn_peer_device_ids:
# Gateway = managed devices NOT in VPN peers (typically the Core router)
all_device_ids = {str(row.id) for row in rows}
gateway_ids = all_device_ids - vpn_peer_device_ids
# Pick the gateway that's online (prefer online devices)
gateway_id = None
for gid in gateway_ids:
if gid in online_device_ids:
gateway_id = gid
break
if not gateway_id and gateway_ids:
gateway_id = next(iter(gateway_ids))
if gateway_id:
for peer_device_id in vpn_peer_device_ids:
edge_key = tuple(sorted([gateway_id, peer_device_id]))
if edge_key not in connected_pairs:
vpn_edges.append(
TopologyEdge(
source=gateway_id,
target=peer_device_id,
label="vpn tunnel",
)
)
connected_pairs.add(edge_key)
except Exception as exc:
logger.warning("VPN edge detection failed", error=str(exc))
# Fallback: infer connections from shared /24 subnets
# Query full Device objects for subnet analysis
device_result = await db.execute(select(Device))
all_devices = list(device_result.scalars().all())
subnet_edges = _build_edges_from_subnets(all_devices, connected_pairs)
all_edges = neighbor_edges + vpn_edges + subnet_edges
topology = TopologyResponse(nodes=nodes, edges=all_edges)
# Cache result in Redis
try:
rd = await _get_redis()
await rd.set(cache_key, topology.model_dump_json(), ex=TOPOLOGY_CACHE_TTL)
except Exception as exc:
logger.warning("Redis cache write failed", error=str(exc))
return topology

View File

@@ -0,0 +1,391 @@
"""Transparency log API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/ for:
- Paginated, filterable key access transparency log listing
- Transparency log statistics (total events, last 24h, unique devices, justification breakdown)
- CSV export of transparency logs
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: admin and above can view transparency logs (tenant_admin or super_admin).
Phase 31: Data Access Transparency Dashboard - TRUST-01, TRUST-02
Shows tenant admins every KMS credential access event for their tenant.
"""
import csv
import io
import logging
import uuid
from datetime import datetime
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy import and_, func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.tenant_context import CurrentUser, get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(tags=["transparency"])
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
"""Verify the current user is allowed to access the given tenant."""
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
def _require_admin(current_user: CurrentUser) -> None:
"""Raise 403 if user does not have at least admin role.
Transparency data is sensitive operational intelligence --
only tenant_admin and super_admin can view it.
"""
allowed = {"super_admin", "admin", "tenant_admin"}
if current_user.role not in allowed:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="At least admin role required to view transparency logs.",
)
# ---------------------------------------------------------------------------
# Response models
# ---------------------------------------------------------------------------
class TransparencyLogItem(BaseModel):
id: str
action: str
device_name: Optional[str] = None
device_id: Optional[str] = None
justification: Optional[str] = None
operator_email: Optional[str] = None
correlation_id: Optional[str] = None
resource_type: Optional[str] = None
resource_id: Optional[str] = None
ip_address: Optional[str] = None
created_at: str
class TransparencyLogResponse(BaseModel):
items: list[TransparencyLogItem]
total: int
page: int
per_page: int
class TransparencyStats(BaseModel):
total_events: int
events_last_24h: int
unique_devices: int
justification_breakdown: dict[str, int]
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get(
"/tenants/{tenant_id}/transparency-logs",
response_model=TransparencyLogResponse,
summary="List KMS credential access events for tenant",
)
async def list_transparency_logs(
tenant_id: uuid.UUID,
page: int = Query(default=1, ge=1),
per_page: int = Query(default=50, ge=1, le=100),
device_id: Optional[uuid.UUID] = Query(default=None),
justification: Optional[str] = Query(default=None),
action: Optional[str] = Query(default=None),
date_from: Optional[datetime] = Query(default=None),
date_to: Optional[datetime] = Query(default=None),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Any:
_require_admin(current_user)
await _check_tenant_access(current_user, tenant_id, db)
# Build filter conditions using parameterized text fragments
conditions = [text("k.tenant_id = :tenant_id")]
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
if device_id:
conditions.append(text("k.device_id = :device_id"))
params["device_id"] = str(device_id)
if justification:
conditions.append(text("k.justification = :justification"))
params["justification"] = justification
if action:
conditions.append(text("k.action = :action"))
params["action"] = action
if date_from:
conditions.append(text("k.created_at >= :date_from"))
params["date_from"] = date_from.isoformat()
if date_to:
conditions.append(text("k.created_at <= :date_to"))
params["date_to"] = date_to.isoformat()
where_clause = and_(*conditions)
# Shared SELECT columns for data queries
_data_columns = text(
"k.id, k.action, d.hostname AS device_name, "
"k.device_id, k.justification, u.email AS operator_email, "
"k.correlation_id, k.resource_type, k.resource_id, "
"k.ip_address, k.created_at"
)
_data_from = text(
"key_access_log k "
"LEFT JOIN users u ON k.user_id = u.id "
"LEFT JOIN devices d ON k.device_id = d.id"
)
# Count total
count_result = await db.execute(
select(func.count())
.select_from(text("key_access_log k"))
.where(where_clause),
params,
)
total = count_result.scalar() or 0
# Paginated query
offset = (page - 1) * per_page
params["limit"] = per_page
params["offset"] = offset
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("k.created_at DESC"))
.limit(per_page)
.offset(offset),
params,
)
rows = result.mappings().all()
items = [
TransparencyLogItem(
id=str(row["id"]),
action=row["action"],
device_name=row["device_name"],
device_id=str(row["device_id"]) if row["device_id"] else None,
justification=row["justification"],
operator_email=row["operator_email"],
correlation_id=row["correlation_id"],
resource_type=row["resource_type"],
resource_id=row["resource_id"],
ip_address=row["ip_address"],
created_at=row["created_at"].isoformat() if row["created_at"] else "",
)
for row in rows
]
return TransparencyLogResponse(
items=items,
total=total,
page=page,
per_page=per_page,
)
@router.get(
"/tenants/{tenant_id}/transparency-logs/stats",
response_model=TransparencyStats,
summary="Get transparency log statistics",
)
async def get_transparency_stats(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> TransparencyStats:
_require_admin(current_user)
await _check_tenant_access(current_user, tenant_id, db)
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
# Total events
total_result = await db.execute(
select(func.count())
.select_from(text("key_access_log"))
.where(text("tenant_id = :tenant_id")),
params,
)
total_events = total_result.scalar() or 0
# Events in last 24 hours
last_24h_result = await db.execute(
select(func.count())
.select_from(text("key_access_log"))
.where(
and_(
text("tenant_id = :tenant_id"),
text("created_at >= NOW() - INTERVAL '24 hours'"),
)
),
params,
)
events_last_24h = last_24h_result.scalar() or 0
# Unique devices
unique_devices_result = await db.execute(
select(func.count(text("DISTINCT device_id")))
.select_from(text("key_access_log"))
.where(
and_(
text("tenant_id = :tenant_id"),
text("device_id IS NOT NULL"),
)
),
params,
)
unique_devices = unique_devices_result.scalar() or 0
# Justification breakdown
breakdown_result = await db.execute(
select(
text("COALESCE(justification, 'system') AS justification_label"),
func.count().label("count"),
)
.select_from(text("key_access_log"))
.where(text("tenant_id = :tenant_id"))
.group_by(text("justification_label")),
params,
)
justification_breakdown: dict[str, int] = {}
for row in breakdown_result.mappings().all():
justification_breakdown[row["justification_label"]] = row["count"]
return TransparencyStats(
total_events=total_events,
events_last_24h=events_last_24h,
unique_devices=unique_devices,
justification_breakdown=justification_breakdown,
)
@router.get(
"/tenants/{tenant_id}/transparency-logs/export",
summary="Export transparency logs as CSV",
)
async def export_transparency_logs(
tenant_id: uuid.UUID,
device_id: Optional[uuid.UUID] = Query(default=None),
justification: Optional[str] = Query(default=None),
action: Optional[str] = Query(default=None),
date_from: Optional[datetime] = Query(default=None),
date_to: Optional[datetime] = Query(default=None),
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> StreamingResponse:
_require_admin(current_user)
await _check_tenant_access(current_user, tenant_id, db)
# Build filter conditions
conditions = [text("k.tenant_id = :tenant_id")]
params: dict[str, Any] = {"tenant_id": str(tenant_id)}
if device_id:
conditions.append(text("k.device_id = :device_id"))
params["device_id"] = str(device_id)
if justification:
conditions.append(text("k.justification = :justification"))
params["justification"] = justification
if action:
conditions.append(text("k.action = :action"))
params["action"] = action
if date_from:
conditions.append(text("k.created_at >= :date_from"))
params["date_from"] = date_from.isoformat()
if date_to:
conditions.append(text("k.created_at <= :date_to"))
params["date_to"] = date_to.isoformat()
where_clause = and_(*conditions)
_data_columns = text(
"k.id, k.action, d.hostname AS device_name, "
"k.device_id, k.justification, u.email AS operator_email, "
"k.correlation_id, k.resource_type, k.resource_id, "
"k.ip_address, k.created_at"
)
_data_from = text(
"key_access_log k "
"LEFT JOIN users u ON k.user_id = u.id "
"LEFT JOIN devices d ON k.device_id = d.id"
)
result = await db.execute(
select(_data_columns)
.select_from(_data_from)
.where(where_clause)
.order_by(text("k.created_at DESC")),
params,
)
all_rows = result.mappings().all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"ID",
"Action",
"Device Name",
"Device ID",
"Justification",
"Operator Email",
"Correlation ID",
"Resource Type",
"Resource ID",
"IP Address",
"Timestamp",
])
for row in all_rows:
writer.writerow([
str(row["id"]),
row["action"],
row["device_name"] or "",
str(row["device_id"]) if row["device_id"] else "",
row["justification"] or "",
row["operator_email"] or "",
row["correlation_id"] or "",
row["resource_type"] or "",
row["resource_id"] or "",
row["ip_address"] or "",
str(row["created_at"]),
])
output.seek(0)
return StreamingResponse(
iter([output.getvalue()]),
media_type="text/csv",
headers={
"Content-Disposition": "attachment; filename=transparency-logs.csv"
},
)

View File

@@ -0,0 +1,231 @@
"""
User management endpoints (scoped to tenant).
GET /api/tenants/{tenant_id}/users — list users in tenant
POST /api/tenants/{tenant_id}/users — create user in tenant
GET /api/tenants/{tenant_id}/users/{id} — get user detail
PUT /api/tenants/{tenant_id}/users/{id} — update user
DELETE /api/tenants/{tenant_id}/users/{id} — deactivate user
"""
import uuid
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.middleware.rate_limit import limiter
from app.database import get_admin_db
from app.middleware.rbac import require_tenant_admin_or_above
from app.middleware.tenant_context import CurrentUser
from app.models.tenant import Tenant
from app.models.user import User, UserRole
from app.schemas.user import UserCreate, UserResponse, UserUpdate
from app.services.auth import hash_password
router = APIRouter(prefix="/tenants", tags=["users"])
async def _check_tenant_access(
tenant_id: uuid.UUID,
current_user: CurrentUser,
db: AsyncSession,
) -> Tenant:
"""
Verify the tenant exists and the current user has access to it.
super_admin can access any tenant.
tenant_admin can only access their own tenant.
"""
if not current_user.is_super_admin and current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this tenant",
)
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found",
)
return tenant
@router.get("/{tenant_id}/users", response_model=list[UserResponse], summary="List users in tenant")
async def list_users(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> list[UserResponse]:
"""
List users in a tenant.
- super_admin: can list users in any tenant
- tenant_admin: can only list users in their own tenant
"""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User)
.where(User.tenant_id == tenant_id)
.order_by(User.name)
)
users = result.scalars().all()
return [UserResponse.model_validate(user) for user in users]
@router.post(
"/{tenant_id}/users",
response_model=UserResponse,
status_code=status.HTTP_201_CREATED,
summary="Create a user in tenant",
)
@limiter.limit("20/minute")
async def create_user(
request: Request,
tenant_id: uuid.UUID,
data: UserCreate,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> UserResponse:
"""
Create a user within a tenant.
- super_admin: can create users in any tenant
- tenant_admin: can only create users in their own tenant
- No email invitation flow — admin creates accounts with temporary passwords
"""
await _check_tenant_access(tenant_id, current_user, db)
# Check email uniqueness (global, not per-tenant)
existing = await db.execute(
select(User).where(User.email == data.email.lower())
)
if existing.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="A user with this email already exists",
)
user = User(
email=data.email.lower(),
hashed_password=hash_password(data.password),
name=data.name,
role=data.role.value,
tenant_id=tenant_id,
is_active=True,
must_upgrade_auth=True,
)
db.add(user)
await db.commit()
await db.refresh(user)
return UserResponse.model_validate(user)
@router.get("/{tenant_id}/users/{user_id}", response_model=UserResponse, summary="Get user detail")
async def get_user(
tenant_id: uuid.UUID,
user_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> UserResponse:
"""Get user detail."""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User).where(User.id == user_id, User.tenant_id == tenant_id)
)
user = result.scalar_one_or_none()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="User not found",
)
return UserResponse.model_validate(user)
@router.put("/{tenant_id}/users/{user_id}", response_model=UserResponse, summary="Update a user")
@limiter.limit("20/minute")
async def update_user(
request: Request,
tenant_id: uuid.UUID,
user_id: uuid.UUID,
data: UserUpdate,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> UserResponse:
"""
Update user attributes (name, role, is_active).
Role assignment is editable by admins.
"""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User).where(User.id == user_id, User.tenant_id == tenant_id)
)
user = result.scalar_one_or_none()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="User not found",
)
if data.name is not None:
user.name = data.name
if data.role is not None:
user.role = data.role.value
if data.is_active is not None:
user.is_active = data.is_active
await db.commit()
await db.refresh(user)
return UserResponse.model_validate(user)
@router.delete("/{tenant_id}/users/{user_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Deactivate a user")
@limiter.limit("5/minute")
async def deactivate_user(
request: Request,
tenant_id: uuid.UUID,
user_id: uuid.UUID,
current_user: CurrentUser = Depends(require_tenant_admin_or_above),
db: AsyncSession = Depends(get_admin_db),
) -> None:
"""
Deactivate a user (soft delete — sets is_active=False).
This preserves audit trail while preventing login.
"""
await _check_tenant_access(tenant_id, current_user, db)
result = await db.execute(
select(User).where(User.id == user_id, User.tenant_id == tenant_id)
)
user = result.scalar_one_or_none()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="User not found",
)
# Prevent self-deactivation
if user.id == current_user.user_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Cannot deactivate your own account",
)
user.is_active = False
await db.commit()

236
backend/app/routers/vpn.py Normal file
View File

@@ -0,0 +1,236 @@
"""WireGuard VPN API endpoints.
Tenant-scoped routes under /api/tenants/{tenant_id}/vpn/ for:
- VPN setup (enable WireGuard for tenant)
- VPN config management (update endpoint, enable/disable)
- Peer management (add device, remove, get config)
RLS enforced via get_db() (app_user engine with tenant context).
RBAC: operator and above for all operations.
"""
import uuid
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db, set_tenant_context
from app.middleware.rate_limit import limiter
from app.middleware.tenant_context import CurrentUser, get_current_user
from app.models.device import Device
from app.schemas.vpn import (
VpnConfigResponse,
VpnConfigUpdate,
VpnOnboardRequest,
VpnOnboardResponse,
VpnPeerConfig,
VpnPeerCreate,
VpnPeerResponse,
VpnSetupRequest,
)
from app.services import vpn_service
router = APIRouter(tags=["vpn"])
async def _check_tenant_access(
current_user: CurrentUser, tenant_id: uuid.UUID, db: AsyncSession
) -> None:
if current_user.is_super_admin:
await set_tenant_context(db, str(tenant_id))
elif current_user.tenant_id != tenant_id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
def _require_operator(current_user: CurrentUser) -> None:
if current_user.role == "viewer":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Operator role required")
# ── VPN Config ──
@router.get("/tenants/{tenant_id}/vpn", response_model=VpnConfigResponse | None)
async def get_vpn_config(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Get VPN configuration for this tenant."""
await _check_tenant_access(current_user, tenant_id, db)
config = await vpn_service.get_vpn_config(db, tenant_id)
if not config:
return None
peers = await vpn_service.get_peers(db, tenant_id)
resp = VpnConfigResponse.model_validate(config)
resp.peer_count = len(peers)
return resp
@router.post("/tenants/{tenant_id}/vpn", response_model=VpnConfigResponse, status_code=status.HTTP_201_CREATED)
@limiter.limit("20/minute")
async def setup_vpn(
request: Request,
tenant_id: uuid.UUID,
body: VpnSetupRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Enable VPN for this tenant — generates server keys."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
config = await vpn_service.setup_vpn(db, tenant_id, endpoint=body.endpoint)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
return VpnConfigResponse.model_validate(config)
@router.patch("/tenants/{tenant_id}/vpn", response_model=VpnConfigResponse)
@limiter.limit("20/minute")
async def update_vpn_config(
request: Request,
tenant_id: uuid.UUID,
body: VpnConfigUpdate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Update VPN settings (endpoint, enable/disable)."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
config = await vpn_service.update_vpn_config(
db, tenant_id, endpoint=body.endpoint, is_enabled=body.is_enabled
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
peers = await vpn_service.get_peers(db, tenant_id)
resp = VpnConfigResponse.model_validate(config)
resp.peer_count = len(peers)
return resp
# ── VPN Peers ──
@router.get("/tenants/{tenant_id}/vpn/peers", response_model=list[VpnPeerResponse])
async def list_peers(
tenant_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""List all VPN peers for this tenant."""
await _check_tenant_access(current_user, tenant_id, db)
peers = await vpn_service.get_peers(db, tenant_id)
# Enrich with device info
device_ids = [p.device_id for p in peers]
devices = {}
if device_ids:
result = await db.execute(select(Device).where(Device.id.in_(device_ids)))
devices = {d.id: d for d in result.scalars().all()}
# Read live WireGuard status for handshake enrichment
wg_status = vpn_service.read_wg_status()
responses = []
for peer in peers:
resp = VpnPeerResponse.model_validate(peer)
device = devices.get(peer.device_id)
if device:
resp.device_hostname = device.hostname
resp.device_ip = device.ip_address
# Enrich with live handshake from WireGuard container
live_handshake = vpn_service.get_peer_handshake(wg_status, peer.peer_public_key)
if live_handshake:
resp.last_handshake = live_handshake
responses.append(resp)
return responses
@router.post("/tenants/{tenant_id}/vpn/peers", response_model=VpnPeerResponse, status_code=status.HTTP_201_CREATED)
@limiter.limit("20/minute")
async def add_peer(
request: Request,
tenant_id: uuid.UUID,
body: VpnPeerCreate,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Add a device as a VPN peer."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
peer = await vpn_service.add_peer(db, tenant_id, body.device_id, additional_allowed_ips=body.additional_allowed_ips)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
# Enrich with device info
result = await db.execute(select(Device).where(Device.id == peer.device_id))
device = result.scalar_one_or_none()
resp = VpnPeerResponse.model_validate(peer)
if device:
resp.device_hostname = device.hostname
resp.device_ip = device.ip_address
return resp
@router.post("/tenants/{tenant_id}/vpn/peers/onboard", response_model=VpnOnboardResponse, status_code=status.HTTP_201_CREATED)
@limiter.limit("10/minute")
async def onboard_device(
request: Request,
tenant_id: uuid.UUID,
body: VpnOnboardRequest,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Create device + VPN peer in one step. Returns RouterOS commands for tunnel setup."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
result = await vpn_service.onboard_device(
db, tenant_id,
hostname=body.hostname,
username=body.username,
password=body.password,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
return VpnOnboardResponse(**result)
@router.delete("/tenants/{tenant_id}/vpn/peers/{peer_id}", status_code=status.HTTP_204_NO_CONTENT)
@limiter.limit("5/minute")
async def remove_peer(
request: Request,
tenant_id: uuid.UUID,
peer_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Remove a VPN peer."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
await vpn_service.remove_peer(db, tenant_id, peer_id)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
@router.get("/tenants/{tenant_id}/vpn/peers/{peer_id}/config", response_model=VpnPeerConfig)
async def get_peer_device_config(
tenant_id: uuid.UUID,
peer_id: uuid.UUID,
current_user: CurrentUser = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Get the full config for a peer — includes private key and RouterOS commands."""
await _check_tenant_access(current_user, tenant_id, db)
_require_operator(current_user)
try:
config = await vpn_service.get_peer_config(db, tenant_id, peer_id)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
return VpnPeerConfig(**config)

View File

@@ -0,0 +1,18 @@
"""Pydantic schemas for request/response validation."""
from app.schemas.auth import LoginRequest, TokenResponse, RefreshRequest, UserMeResponse
from app.schemas.tenant import TenantCreate, TenantResponse, TenantUpdate
from app.schemas.user import UserCreate, UserResponse, UserUpdate
__all__ = [
"LoginRequest",
"TokenResponse",
"RefreshRequest",
"UserMeResponse",
"TenantCreate",
"TenantResponse",
"TenantUpdate",
"UserCreate",
"UserResponse",
"UserUpdate",
]

123
backend/app/schemas/auth.py Normal file
View File

@@ -0,0 +1,123 @@
"""Authentication request/response schemas."""
import uuid
from typing import Optional
from pydantic import BaseModel, EmailStr
class LoginRequest(BaseModel):
email: EmailStr
password: str
class TokenResponse(BaseModel):
access_token: str
refresh_token: str
token_type: str = "bearer"
auth_upgrade_required: bool = False # True when bcrypt user needs SRP registration
class RefreshRequest(BaseModel):
refresh_token: str
class UserMeResponse(BaseModel):
id: uuid.UUID
email: str
name: str
role: str
tenant_id: Optional[uuid.UUID] = None
auth_version: int = 1
model_config = {"from_attributes": True}
class ChangePasswordRequest(BaseModel):
current_password: str
new_password: str
# SRP users must provide re-derived credentials
new_srp_salt: Optional[str] = None
new_srp_verifier: Optional[str] = None
# Re-wrapped key bundle (SRP users re-encrypt with new AUK)
encrypted_private_key: Optional[str] = None
private_key_nonce: Optional[str] = None
encrypted_vault_key: Optional[str] = None
vault_key_nonce: Optional[str] = None
public_key: Optional[str] = None
pbkdf2_salt: Optional[str] = None
hkdf_salt: Optional[str] = None
class ForgotPasswordRequest(BaseModel):
email: EmailStr
class ResetPasswordRequest(BaseModel):
token: str
new_password: str
class MessageResponse(BaseModel):
message: str
# --- SRP Zero-Knowledge Authentication Schemas ---
class SRPInitRequest(BaseModel):
"""Step 1 request: client sends email to begin SRP handshake."""
email: EmailStr
class SRPInitResponse(BaseModel):
"""Step 1 response: server returns ephemeral B and key derivation salts."""
salt: str # hex-encoded SRP salt
server_public: str # hex-encoded server ephemeral B
session_id: str # Redis session key nonce
pbkdf2_salt: str # base64-encoded, from user_key_sets (needed for 2SKD before SRP verify)
hkdf_salt: str # base64-encoded, from user_key_sets (needed for 2SKD before SRP verify)
class SRPVerifyRequest(BaseModel):
"""Step 2 request: client sends proof M1 to complete handshake."""
email: EmailStr
session_id: str
client_public: str # hex-encoded client ephemeral A
client_proof: str # hex-encoded client proof M1
class SRPVerifyResponse(BaseModel):
"""Step 2 response: server returns tokens and proof M2."""
access_token: str
refresh_token: str
token_type: str = "bearer"
server_proof: str # hex-encoded server proof M2
encrypted_key_set: Optional[dict] = None # Key bundle for client-side decryption
class SRPRegisterRequest(BaseModel):
"""Used during registration to store SRP verifier and key set."""
srp_salt: str # hex-encoded
srp_verifier: str # hex-encoded
encrypted_private_key: str # base64-encoded
private_key_nonce: str # base64-encoded
encrypted_vault_key: str # base64-encoded
vault_key_nonce: str # base64-encoded
public_key: str # base64-encoded
pbkdf2_salt: str # base64-encoded
hkdf_salt: str # base64-encoded
# --- Account Self-Service Schemas ---
class DeleteAccountRequest(BaseModel):
"""Request body for account self-deletion. User must type 'DELETE' to confirm."""
confirmation: str # Must be "DELETE" to confirm
class DeleteAccountResponse(BaseModel):
"""Response after successful account deletion."""
message: str
deleted: bool

View File

@@ -0,0 +1,78 @@
"""Pydantic request/response schemas for the Internal Certificate Authority."""
from datetime import datetime
from uuid import UUID
from pydantic import BaseModel, ConfigDict
# ---------------------------------------------------------------------------
# Request schemas
# ---------------------------------------------------------------------------
class CACreateRequest(BaseModel):
"""Request to generate a new root CA for the tenant."""
common_name: str = "Portal Root CA"
validity_years: int = 10 # Default 10 years for CA
class CertSignRequest(BaseModel):
"""Request to sign a per-device certificate using the tenant CA."""
device_id: UUID
validity_days: int = 730 # Default 2 years for device certs
class BulkCertDeployRequest(BaseModel):
"""Request to deploy certificates to multiple devices."""
device_ids: list[UUID]
# ---------------------------------------------------------------------------
# Response schemas
# ---------------------------------------------------------------------------
class CAResponse(BaseModel):
"""Public details of a tenant's Certificate Authority (no private key)."""
id: UUID
tenant_id: UUID
common_name: str
fingerprint_sha256: str
serial_number: str
not_valid_before: datetime
not_valid_after: datetime
created_at: datetime
model_config = ConfigDict(from_attributes=True)
class DeviceCertResponse(BaseModel):
"""Public details of a device certificate (no private key)."""
id: UUID
tenant_id: UUID
device_id: UUID
ca_id: UUID
common_name: str
fingerprint_sha256: str
serial_number: str
not_valid_before: datetime
not_valid_after: datetime
status: str
deployed_at: datetime | None
created_at: datetime
updated_at: datetime
model_config = ConfigDict(from_attributes=True)
class CertDeployResponse(BaseModel):
"""Result of a single device certificate deployment attempt."""
success: bool
device_id: UUID
cert_name_on_device: str | None = None
error: str | None = None

View File

@@ -0,0 +1,271 @@
"""Pydantic schemas for Device, DeviceGroup, and DeviceTag endpoints."""
import uuid
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, field_validator
# ---------------------------------------------------------------------------
# Device schemas
# ---------------------------------------------------------------------------
class DeviceCreate(BaseModel):
"""Schema for creating a new device."""
hostname: str
ip_address: str
api_port: int = 8728
api_ssl_port: int = 8729
username: str
password: str
class DeviceUpdate(BaseModel):
"""Schema for updating an existing device. All fields optional."""
hostname: Optional[str] = None
ip_address: Optional[str] = None
api_port: Optional[int] = None
api_ssl_port: Optional[int] = None
username: Optional[str] = None
password: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
tls_mode: Optional[str] = None
@field_validator("tls_mode")
@classmethod
def validate_tls_mode(cls, v: Optional[str]) -> Optional[str]:
"""Validate tls_mode is one of the allowed values."""
if v is None:
return v
allowed = {"auto", "insecure", "plain", "portal_ca"}
if v not in allowed:
raise ValueError(f"tls_mode must be one of: {', '.join(sorted(allowed))}")
return v
class DeviceTagRef(BaseModel):
"""Minimal tag info embedded in device responses."""
id: uuid.UUID
name: str
color: Optional[str] = None
model_config = {"from_attributes": True}
class DeviceGroupRef(BaseModel):
"""Minimal group info embedded in device responses."""
id: uuid.UUID
name: str
model_config = {"from_attributes": True}
class DeviceResponse(BaseModel):
"""Device response schema. NEVER includes credential fields."""
id: uuid.UUID
hostname: str
ip_address: str
api_port: int
api_ssl_port: int
model: Optional[str] = None
serial_number: Optional[str] = None
firmware_version: Optional[str] = None
routeros_version: Optional[str] = None
routeros_major_version: Optional[int] = None
uptime_seconds: Optional[int] = None
last_seen: Optional[datetime] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
status: str
tls_mode: str = "auto"
tags: list[DeviceTagRef] = []
groups: list[DeviceGroupRef] = []
created_at: datetime
model_config = {"from_attributes": True}
class DeviceListResponse(BaseModel):
"""Paginated device list response."""
items: list[DeviceResponse]
total: int
page: int
page_size: int
# ---------------------------------------------------------------------------
# Subnet scan schemas
# ---------------------------------------------------------------------------
class SubnetScanRequest(BaseModel):
"""Request body for a subnet scan."""
cidr: str
@field_validator("cidr")
@classmethod
def validate_cidr(cls, v: str) -> str:
"""Validate that the value is a valid CIDR notation and RFC 1918 private range."""
import ipaddress
try:
network = ipaddress.ip_network(v, strict=False)
except ValueError as e:
raise ValueError(f"Invalid CIDR notation: {e}") from e
# Only allow private IP ranges (RFC 1918: 10/8, 172.16/12, 192.168/16)
if not network.is_private:
raise ValueError(
"Only private IP ranges can be scanned (RFC 1918: "
"10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16)"
)
# Reject ranges larger than /20 (4096 IPs) to prevent abuse
if network.num_addresses > 4096:
raise ValueError(
f"CIDR range too large ({network.num_addresses} addresses). "
"Maximum allowed: /20 (4096 addresses)."
)
return v
class SubnetScanResult(BaseModel):
"""A single discovered host from a subnet scan."""
ip_address: str
hostname: Optional[str] = None
api_port_open: bool = False
api_ssl_port_open: bool = False
class SubnetScanResponse(BaseModel):
"""Response for a subnet scan operation."""
cidr: str
discovered: list[SubnetScanResult]
total_scanned: int
total_discovered: int
# ---------------------------------------------------------------------------
# Bulk add from scan
# ---------------------------------------------------------------------------
class BulkDeviceAdd(BaseModel):
"""One device entry within a bulk-add request."""
ip_address: str
hostname: Optional[str] = None
api_port: int = 8728
api_ssl_port: int = 8729
username: Optional[str] = None
password: Optional[str] = None
class BulkAddRequest(BaseModel):
"""
Bulk-add devices selected from a scan result.
shared_username / shared_password are used for all devices that do not
provide their own credentials.
"""
devices: list[BulkDeviceAdd]
shared_username: Optional[str] = None
shared_password: Optional[str] = None
class BulkAddResult(BaseModel):
"""Summary result of a bulk-add operation."""
added: list[DeviceResponse]
failed: list[dict] # {ip_address, error}
# ---------------------------------------------------------------------------
# DeviceGroup schemas
# ---------------------------------------------------------------------------
class DeviceGroupCreate(BaseModel):
"""Schema for creating a device group."""
name: str
description: Optional[str] = None
class DeviceGroupUpdate(BaseModel):
"""Schema for updating a device group."""
name: Optional[str] = None
description: Optional[str] = None
class DeviceGroupResponse(BaseModel):
"""Device group response schema."""
id: uuid.UUID
name: str
description: Optional[str] = None
device_count: int = 0
created_at: datetime
model_config = {"from_attributes": True}
# ---------------------------------------------------------------------------
# DeviceTag schemas
# ---------------------------------------------------------------------------
class DeviceTagCreate(BaseModel):
"""Schema for creating a device tag."""
name: str
color: Optional[str] = None
@field_validator("color")
@classmethod
def validate_color(cls, v: Optional[str]) -> Optional[str]:
"""Validate hex color format if provided."""
if v is None:
return v
import re
if not re.match(r"^#[0-9A-Fa-f]{6}$", v):
raise ValueError("Color must be a valid 6-digit hex color (e.g. #FF5733)")
return v
class DeviceTagUpdate(BaseModel):
"""Schema for updating a device tag."""
name: Optional[str] = None
color: Optional[str] = None
@field_validator("color")
@classmethod
def validate_color(cls, v: Optional[str]) -> Optional[str]:
if v is None:
return v
import re
if not re.match(r"^#[0-9A-Fa-f]{6}$", v):
raise ValueError("Color must be a valid 6-digit hex color (e.g. #FF5733)")
return v
class DeviceTagResponse(BaseModel):
"""Device tag response schema."""
id: uuid.UUID
name: str
color: Optional[str] = None
model_config = {"from_attributes": True}

View File

@@ -0,0 +1,31 @@
"""Tenant request/response schemas."""
import uuid
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
class TenantCreate(BaseModel):
name: str
description: Optional[str] = None
contact_email: Optional[str] = None
class TenantUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
contact_email: Optional[str] = None
class TenantResponse(BaseModel):
id: uuid.UUID
name: str
description: Optional[str] = None
contact_email: Optional[str] = None
user_count: int = 0
device_count: int = 0
created_at: datetime
model_config = {"from_attributes": True}

View File

@@ -0,0 +1,53 @@
"""User request/response schemas."""
import uuid
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, EmailStr, field_validator
from app.models.user import UserRole
class UserCreate(BaseModel):
name: str
email: EmailStr
password: str
role: UserRole = UserRole.VIEWER
@field_validator("password")
@classmethod
def validate_password(cls, v: str) -> str:
if len(v) < 8:
raise ValueError("Password must be at least 8 characters")
return v
@field_validator("role")
@classmethod
def validate_role(cls, v: UserRole) -> UserRole:
"""Tenant admins can only create operator/viewer roles; super_admin via separate flow."""
allowed_tenant_roles = {UserRole.TENANT_ADMIN, UserRole.OPERATOR, UserRole.VIEWER}
if v not in allowed_tenant_roles:
raise ValueError(
f"Role must be one of: {', '.join(r.value for r in allowed_tenant_roles)}"
)
return v
class UserResponse(BaseModel):
id: uuid.UUID
name: str
email: str
role: str
tenant_id: Optional[uuid.UUID] = None
is_active: bool
last_login: Optional[datetime] = None
created_at: datetime
model_config = {"from_attributes": True}
class UserUpdate(BaseModel):
name: Optional[str] = None
role: Optional[UserRole] = None
is_active: Optional[bool] = None

View File

@@ -0,0 +1,91 @@
"""Pydantic schemas for WireGuard VPN management."""
import uuid
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
# ── VPN Config (server-side) ──
class VpnSetupRequest(BaseModel):
"""Request to enable VPN for a tenant."""
endpoint: Optional[str] = None # public hostname:port — if blank, devices must be configured manually
class VpnConfigResponse(BaseModel):
"""VPN server configuration (never exposes private key)."""
model_config = {"from_attributes": True}
id: uuid.UUID
tenant_id: uuid.UUID
server_public_key: str
subnet: str
server_port: int
server_address: str
endpoint: Optional[str]
is_enabled: bool
peer_count: int = 0
created_at: datetime
class VpnConfigUpdate(BaseModel):
"""Update VPN configuration."""
endpoint: Optional[str] = None
is_enabled: Optional[bool] = None
# ── VPN Peers ──
class VpnPeerCreate(BaseModel):
"""Add a device as a VPN peer."""
device_id: uuid.UUID
additional_allowed_ips: Optional[str] = None # comma-separated subnets for site-to-site routing
class VpnPeerResponse(BaseModel):
"""VPN peer info (never exposes private key)."""
model_config = {"from_attributes": True}
id: uuid.UUID
device_id: uuid.UUID
device_hostname: str = ""
device_ip: str = ""
peer_public_key: str
assigned_ip: str
is_enabled: bool
last_handshake: Optional[datetime]
created_at: datetime
# ── VPN Onboarding (combined device + peer creation) ──
class VpnOnboardRequest(BaseModel):
"""Combined device creation + VPN peer onboarding."""
hostname: str
username: str
password: str
class VpnOnboardResponse(BaseModel):
"""Response from onboarding — device, peer, and RouterOS commands."""
device_id: uuid.UUID
peer_id: uuid.UUID
hostname: str
assigned_ip: str
routeros_commands: list[str]
class VpnPeerConfig(BaseModel):
"""Full peer config for display/export — includes private key for device setup."""
peer_private_key: str
peer_public_key: str
assigned_ip: str
server_public_key: str
server_endpoint: str
allowed_ips: str
routeros_commands: list[str]

View File

View File

@@ -0,0 +1,95 @@
"""Dangerous RouterOS command and path blocklist.
Prevents destructive or sensitive operations from being executed through
the config editor. Commands and paths are checked via case-insensitive
prefix matching against known-dangerous entries.
To extend: add strings to DANGEROUS_COMMANDS, BROWSE_BLOCKED_PATHS,
or WRITE_BLOCKED_PATHS.
"""
from fastapi import HTTPException, status
# CLI commands blocked from the execute endpoint.
# Matched as case-insensitive prefixes (e.g., "/user" blocks "/user/print" too).
DANGEROUS_COMMANDS: list[str] = [
"/system/reset-configuration",
"/system/shutdown",
"/system/reboot",
"/system/backup",
"/system/license",
"/user",
"/password",
"/certificate",
"/radius",
"/export",
"/import",
]
# Paths blocked from ALL operations including browse (truly dangerous to read).
BROWSE_BLOCKED_PATHS: list[str] = [
"system/reset-configuration",
"system/shutdown",
"system/reboot",
"system/backup",
"system/license",
"password",
]
# Paths blocked from write operations (add/set/remove) but readable via browse.
WRITE_BLOCKED_PATHS: list[str] = [
"user",
"certificate",
"radius",
]
def check_command_safety(command: str) -> None:
"""Reject dangerous CLI commands with HTTP 403.
Normalizes the command (strip + lowercase) and checks against
DANGEROUS_COMMANDS using prefix matching.
Raises:
HTTPException: 403 if the command matches a dangerous prefix.
"""
normalized = command.strip().lower()
for blocked in DANGEROUS_COMMANDS:
if normalized.startswith(blocked):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=(
f"Command blocked: '{command}' matches dangerous prefix '{blocked}'. "
f"This operation is not allowed through the config editor."
),
)
def check_path_safety(path: str, *, write: bool = False) -> None:
"""Reject dangerous menu paths with HTTP 403.
Normalizes the path (strip + lowercase + lstrip '/') and checks
against blocked path lists using prefix matching.
Args:
path: The RouterOS menu path to check.
write: If True, also check WRITE_BLOCKED_PATHS (for add/set/remove).
If False, only check BROWSE_BLOCKED_PATHS (for read-only browse).
Raises:
HTTPException: 403 if the path matches a blocked prefix.
"""
normalized = path.strip().lower().lstrip("/")
blocked_lists = [BROWSE_BLOCKED_PATHS]
if write:
blocked_lists.append(WRITE_BLOCKED_PATHS)
for blocklist in blocked_lists:
for blocked in blocklist:
if normalized.startswith(blocked):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=(
f"Path blocked: '{path}' matches dangerous prefix '{blocked}'. "
f"This operation is not allowed through the config editor."
),
)

View File

@@ -0,0 +1 @@
"""Backend services — auth, crypto, and business logic."""

View File

@@ -0,0 +1,240 @@
"""Account self-service operations: deletion and data export.
Provides GDPR/CCPA-compliant account deletion with full PII erasure
and data portability export (Article 20).
All queries use raw SQL via text() with admin sessions (bypass RLS)
since these are cross-table operations on the authenticated user's data.
"""
import hashlib
import uuid
from datetime import UTC, datetime
from typing import Any
import structlog
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import AdminAsyncSessionLocal
from app.services.audit_service import log_action
logger = structlog.get_logger("account_service")
async def delete_user_account(
db: AsyncSession,
user_id: uuid.UUID,
tenant_id: uuid.UUID | None,
user_email: str,
) -> dict[str, Any]:
"""Hard-delete a user account with full PII erasure.
Steps:
1. Create a deletion receipt audit log (persisted via separate session)
2. Anonymize PII in existing audit_logs for this user
3. Hard-delete the user row (CASCADE handles related tables)
4. Best-effort session invalidation via Redis
Args:
db: Admin async session (bypasses RLS).
user_id: UUID of the user to delete.
tenant_id: Tenant UUID (None for super_admin).
user_email: User's email (needed for audit hash before deletion).
Returns:
Dict with deleted=True and user_id on success.
"""
effective_tenant_id = tenant_id or uuid.UUID(int=0)
email_hash = hashlib.sha256(user_email.encode()).hexdigest()
# ── 1. Pre-deletion audit receipt (separate session so it persists) ────
try:
async with AdminAsyncSessionLocal() as audit_db:
await log_action(
audit_db,
tenant_id=effective_tenant_id,
user_id=user_id,
action="account_deleted",
resource_type="user",
resource_id=str(user_id),
details={
"deleted_user_id": str(user_id),
"email_hash": email_hash,
"deletion_type": "self_service",
"deleted_at": datetime.now(UTC).isoformat(),
},
)
await audit_db.commit()
except Exception:
logger.warning(
"deletion_receipt_failed",
user_id=str(user_id),
exc_info=True,
)
# ── 2. Anonymize PII in audit_logs for this user ─────────────────────
# Strip PII keys from details JSONB (email, name, user_email, user_name)
await db.execute(
text(
"UPDATE audit_logs "
"SET details = details - 'email' - 'name' - 'user_email' - 'user_name' "
"WHERE user_id = :user_id"
),
{"user_id": user_id},
)
# Null out encrypted_details (may contain encrypted PII)
await db.execute(
text(
"UPDATE audit_logs "
"SET encrypted_details = NULL "
"WHERE user_id = :user_id"
),
{"user_id": user_id},
)
# ── 3. Hard delete user row ──────────────────────────────────────────
# CASCADE handles: user_key_sets, api_keys, password_reset_tokens
# SET NULL handles: audit_logs.user_id, key_access_log.user_id,
# maintenance_windows.created_by, alert_events.acknowledged_by
await db.execute(
text("DELETE FROM users WHERE id = :user_id"),
{"user_id": user_id},
)
await db.commit()
# ── 4. Best-effort Redis session invalidation ────────────────────────
try:
import redis.asyncio as aioredis
from app.config import settings
from app.services.auth import revoke_user_tokens
r = aioredis.from_url(settings.REDIS_URL, decode_responses=True)
await revoke_user_tokens(r, str(user_id))
await r.aclose()
except Exception:
# JWT expires in 15 min anyway; not critical
logger.debug("redis_session_invalidation_skipped", user_id=str(user_id))
logger.info("account_deleted", user_id=str(user_id), email_hash=email_hash)
return {"deleted": True, "user_id": str(user_id)}
async def export_user_data(
db: AsyncSession,
user_id: uuid.UUID,
tenant_id: uuid.UUID | None,
) -> dict[str, Any]:
"""Assemble all user data for GDPR Art. 20 data portability export.
Returns a structured dict with user profile, API keys, audit logs,
and key access log entries.
Args:
db: Admin async session (bypasses RLS).
user_id: UUID of the user whose data to export.
tenant_id: Tenant UUID (None for super_admin).
Returns:
Envelope dict with export_date, format_version, and all user data.
"""
# ── User profile ─────────────────────────────────────────────────────
result = await db.execute(
text(
"SELECT id, email, name, role, tenant_id, "
"created_at, last_login, auth_version "
"FROM users WHERE id = :user_id"
),
{"user_id": user_id},
)
user_row = result.mappings().first()
user_data: dict[str, Any] = {}
if user_row:
user_data = {
"id": str(user_row["id"]),
"email": user_row["email"],
"name": user_row["name"],
"role": user_row["role"],
"tenant_id": str(user_row["tenant_id"]) if user_row["tenant_id"] else None,
"created_at": user_row["created_at"].isoformat() if user_row["created_at"] else None,
"last_login": user_row["last_login"].isoformat() if user_row["last_login"] else None,
"auth_version": user_row["auth_version"],
}
# ── API keys (exclude key_hash for security) ─────────────────────────
result = await db.execute(
text(
"SELECT id, name, key_prefix, scopes, created_at, "
"expires_at, revoked_at, last_used_at "
"FROM api_keys WHERE user_id = :user_id "
"ORDER BY created_at DESC"
),
{"user_id": user_id},
)
api_keys = []
for row in result.mappings().all():
api_keys.append({
"id": str(row["id"]),
"name": row["name"],
"key_prefix": row["key_prefix"],
"scopes": row["scopes"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"expires_at": row["expires_at"].isoformat() if row["expires_at"] else None,
"revoked_at": row["revoked_at"].isoformat() if row["revoked_at"] else None,
"last_used_at": row["last_used_at"].isoformat() if row["last_used_at"] else None,
})
# ── Audit logs (limit 1000, most recent first) ───────────────────────
result = await db.execute(
text(
"SELECT id, action, resource_type, resource_id, "
"details, ip_address, created_at "
"FROM audit_logs WHERE user_id = :user_id "
"ORDER BY created_at DESC LIMIT 1000"
),
{"user_id": user_id},
)
audit_logs = []
for row in result.mappings().all():
details = row["details"] if row["details"] else {}
audit_logs.append({
"id": str(row["id"]),
"action": row["action"],
"resource_type": row["resource_type"],
"resource_id": row["resource_id"],
"details": details,
"ip_address": row["ip_address"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
})
# ── Key access log (limit 1000, most recent first) ───────────────────
result = await db.execute(
text(
"SELECT id, action, resource_type, ip_address, created_at "
"FROM key_access_log WHERE user_id = :user_id "
"ORDER BY created_at DESC LIMIT 1000"
),
{"user_id": user_id},
)
key_access_entries = []
for row in result.mappings().all():
key_access_entries.append({
"id": str(row["id"]),
"action": row["action"],
"resource_type": row["resource_type"],
"ip_address": row["ip_address"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
})
return {
"export_date": datetime.now(UTC).isoformat(),
"format_version": "1.0",
"user": user_data,
"api_keys": api_keys,
"audit_logs": audit_logs,
"key_access_log": key_access_entries,
}

Some files were not shown because too many files have changed in this diff Show More