feat: The Other Dude v9.0.1 — full-featured email system

ci: add GitHub Pages deployment workflow for docs site

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-08 17:46:37 -05:00
commit b840047e19
511 changed files with 106948 additions and 0 deletions

9
poller/.dockerignore Normal file
View File

@@ -0,0 +1,9 @@
# Git
.git
.gitignore
# Go build artifacts
vendor/
# Logs
*.log

7
poller/.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
# Go build output
/poller
/cmd/poller/poller
# Test binaries
*.test
*.out

17
poller/Dockerfile Normal file
View File

@@ -0,0 +1,17 @@
FROM golang:1.24-alpine AS builder
WORKDIR /build
COPY go.mod go.sum ./
RUN go mod download
COPY . .
# GOMAXPROCS=1 limits the Go compiler to one OS thread during the Docker build.
# Without this, go build spawns workers proportional to GOMAXPROCS (defaults to
# the host CPU count), which combined with the parallel Node and Python builds
# can saturate all cores and spike RAM on a 2-core / 2-4 GB server.
RUN CGO_ENABLED=0 GOOS=linux GOMAXPROCS=1 go build -o /poller ./cmd/poller
FROM alpine:3.21
RUN apk add --no-cache ca-certificates iproute2
COPY --from=builder /poller /usr/local/bin/poller
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

231
poller/cmd/poller/main.go Normal file
View File

@@ -0,0 +1,231 @@
// Command poller is the MikroTik device polling microservice.
//
// It connects to RouterOS devices via the binary API (port 8729 TLS), detects
// their online/offline status and version, and publishes events to NATS JetStream.
// It uses Redis distributed locking to prevent duplicate polls when running as
// multiple replicas.
package main
import (
"context"
"log/slog"
"os"
"os/signal"
"syscall"
"time"
"github.com/bsm/redislock"
"github.com/redis/go-redis/v9"
"github.com/mikrotik-portal/poller/internal/bus"
"github.com/mikrotik-portal/poller/internal/config"
"github.com/mikrotik-portal/poller/internal/observability"
"github.com/mikrotik-portal/poller/internal/poller"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/vault"
)
func main() {
// -----------------------------------------------------------------------
// Structured logging setup (log/slog, JSON for production)
// -----------------------------------------------------------------------
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelInfo, // overridden below once config is loaded
}).WithAttrs([]slog.Attr{
slog.String("service", "poller"),
})))
slog.Info("mikrotik poller starting")
// -----------------------------------------------------------------------
// Load configuration from environment
// -----------------------------------------------------------------------
cfg, err := config.Load()
if err != nil {
slog.Error("failed to load configuration", "error", err)
os.Exit(1)
}
// Apply configured log level.
var logLevel slog.Level
switch cfg.LogLevel {
case "debug":
logLevel = slog.LevelDebug
case "warn":
logLevel = slog.LevelWarn
case "error":
logLevel = slog.LevelError
default:
logLevel = slog.LevelInfo
}
hostname, _ := os.Hostname()
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: logLevel,
}).WithAttrs([]slog.Attr{
slog.String("service", "poller"),
slog.String("instance", hostname),
})))
slog.Info("configuration loaded",
"poll_interval_s", cfg.PollIntervalSeconds,
"device_refresh_s", cfg.DeviceRefreshSeconds,
"connection_timeout_s", cfg.ConnectionTimeoutSeconds,
"log_level", cfg.LogLevel,
)
// -----------------------------------------------------------------------
// Context with graceful shutdown on SIGINT/SIGTERM
// -----------------------------------------------------------------------
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-sigCh
slog.Info("received shutdown signal", "signal", sig.String())
cancel()
}()
// -----------------------------------------------------------------------
// Initialize PostgreSQL device store
// -----------------------------------------------------------------------
deviceStore, err := store.NewDeviceStore(ctx, cfg.DatabaseURL)
if err != nil {
slog.Error("failed to connect to database", "error", err)
os.Exit(1)
}
defer deviceStore.Close()
slog.Info("connected to PostgreSQL")
// -----------------------------------------------------------------------
// Initialize Redis client and distributed locker
// -----------------------------------------------------------------------
redisOpts, err := redis.ParseURL(cfg.RedisURL)
if err != nil {
slog.Error("invalid REDIS_URL", "error", err)
os.Exit(1)
}
redisClient := redis.NewClient(redisOpts)
defer redisClient.Close()
// Verify Redis connectivity.
if err := redisClient.Ping(ctx).Err(); err != nil {
slog.Error("failed to connect to Redis", "error", err)
os.Exit(1)
}
slog.Info("connected to Redis")
locker := redislock.New(redisClient)
// Make Redis client available to the poller for firmware check rate limiting.
poller.SetRedisClient(redisClient)
// -----------------------------------------------------------------------
// Initialize credential cache (OpenBao Transit + legacy fallback)
// -----------------------------------------------------------------------
var transitClient *vault.TransitClient
if cfg.OpenBaoAddr != "" {
transitClient = vault.NewTransitClient(cfg.OpenBaoAddr, cfg.OpenBaoToken)
slog.Info("OpenBao Transit client initialized", "addr", cfg.OpenBaoAddr)
}
credentialCache := vault.NewCredentialCache(
1024, // max 1024 cached credentials
5*time.Minute, // 5-minute TTL
transitClient, // nil if OpenBao not configured
cfg.CredentialEncryptionKey, // nil if legacy key not set
deviceStore.Pool(), // for key_access_log inserts
)
slog.Info("credential cache initialized", "max_size", 1024, "ttl", "5m")
// -----------------------------------------------------------------------
// Initialize NATS JetStream publisher
// -----------------------------------------------------------------------
publisher, err := bus.NewPublisher(cfg.NatsURL)
if err != nil {
slog.Error("failed to connect to NATS", "error", err)
os.Exit(1)
}
defer publisher.Close()
slog.Info("connected to NATS JetStream")
// -----------------------------------------------------------------------
// Initialize NATS command responder for interactive device commands
// -----------------------------------------------------------------------
cmdResponder := bus.NewCmdResponder(publisher.Conn(), deviceStore, credentialCache)
if err := cmdResponder.Start(); err != nil {
slog.Error("failed to start command responder", "error", err)
os.Exit(1)
}
defer cmdResponder.Stop()
slog.Info("NATS command responder started (device.cmd.*)")
// -----------------------------------------------------------------------
// Initialize NATS cert deploy responder for certificate deployment
// -----------------------------------------------------------------------
certDeployResponder := bus.NewCertDeployResponder(publisher.Conn(), deviceStore, credentialCache)
if err := certDeployResponder.Start(); err != nil {
slog.Error("failed to start cert deploy responder", "error", err)
os.Exit(1)
}
defer certDeployResponder.Stop()
slog.Info("NATS cert deploy responder started (cert.deploy.*)")
// -----------------------------------------------------------------------
// Initialize NATS credential change subscriber for cache invalidation
// -----------------------------------------------------------------------
credentialSub := bus.NewCredentialSubscriber(publisher.Conn(), credentialCache)
if err := credentialSub.Start(); err != nil {
slog.Error("failed to start credential subscriber", "error", err)
os.Exit(1)
}
defer credentialSub.Stop()
slog.Info("NATS credential subscriber started (device.credential_changed.>)")
// -----------------------------------------------------------------------
// Start observability HTTP server (Prometheus metrics + health endpoint)
// -----------------------------------------------------------------------
observability.StartServer(ctx, ":9091")
slog.Info("observability server started", "addr", ":9091")
// -----------------------------------------------------------------------
// Start the device scheduler
// -----------------------------------------------------------------------
pollInterval := time.Duration(cfg.PollIntervalSeconds) * time.Second
connTimeout := time.Duration(cfg.ConnectionTimeoutSeconds) * time.Second
cmdTimeout := time.Duration(cfg.CommandTimeoutSeconds) * time.Second
refreshPeriod := time.Duration(cfg.DeviceRefreshSeconds) * time.Second
baseBackoff := time.Duration(cfg.CircuitBreakerBaseBackoffSeconds) * time.Second
maxBackoff := time.Duration(cfg.CircuitBreakerMaxBackoffSeconds) * time.Second
scheduler := poller.NewScheduler(
deviceStore,
locker,
publisher,
credentialCache,
pollInterval,
connTimeout,
cmdTimeout,
refreshPeriod,
cfg.CircuitBreakerMaxFailures,
baseBackoff,
maxBackoff,
)
slog.Info("starting device scheduler",
"poll_interval", pollInterval,
"refresh_period", refreshPeriod,
"conn_timeout", connTimeout,
)
if err := scheduler.Run(ctx); err != nil {
slog.Error("scheduler exited with error", "error", err)
os.Exit(1)
}
slog.Info("poller shutdown complete")
}

15
poller/docker-entrypoint.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/sh
# Add VPN routes through wireguard container if WIREGUARD_GATEWAY is set
# WIREGUARD_GATEWAY can be an IP or hostname (resolved via Docker DNS)
if [ -n "$WIREGUARD_GATEWAY" ]; then
# Resolve hostname to IP if needed
GW_IP=$(getent hosts "$WIREGUARD_GATEWAY" 2>/dev/null | awk '{print $1}')
if [ -z "$GW_IP" ]; then
GW_IP="$WIREGUARD_GATEWAY"
fi
ip route add 10.10.0.0/16 via "$GW_IP" 2>/dev/null || true
echo "VPN route: 10.10.0.0/16 via $GW_IP ($WIREGUARD_GATEWAY)"
fi
# Drop to nobody and exec poller
exec su -s /bin/sh nobody -c "/usr/local/bin/poller"

92
poller/go.mod Normal file
View File

@@ -0,0 +1,92 @@
module github.com/mikrotik-portal/poller
go 1.24.0
require (
github.com/bsm/redislock v0.9.4
github.com/go-routeros/routeros/v3 v3.0.0
github.com/google/uuid v1.6.0
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/jackc/pgx/v5 v5.7.4
github.com/nats-io/nats.go v1.38.0
github.com/pkg/sftp v1.13.10
github.com/prometheus/client_golang v1.23.2
github.com/redis/go-redis/v9 v9.7.3
github.com/stretchr/testify v1.11.1
github.com/testcontainers/testcontainers-go v0.40.0
github.com/testcontainers/testcontainers-go/modules/nats v0.40.0
github.com/testcontainers/testcontainers-go/modules/postgres v0.40.0
github.com/testcontainers/testcontainers-go/modules/redis v0.40.0
golang.org/x/crypto v0.48.0
)
require (
dario.cat/mergo v1.0.2 // indirect
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/containerd/platforms v0.2.1 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/docker/docker v28.5.1+incompatible // indirect
github.com/docker/go-connections v0.6.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/ebitengine/purego v0.8.4 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/magiconair/properties v1.8.10 // indirect
github.com/mdelapenya/tlscert v0.2.0 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/go-archive v0.1.0 // indirect
github.com/moby/patternmatcher v0.6.0 // indirect
github.com/moby/sys/sequential v0.6.0 // indirect
github.com/moby/sys/user v0.4.0 // indirect
github.com/moby/sys/userns v0.1.0 // indirect
github.com/moby/term v0.5.0 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nats-io/nkeys v0.4.9 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
github.com/shirou/gopsutil/v4 v4.25.6 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect
github.com/tklauser/numcpus v0.6.1 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
go.opentelemetry.io/otel v1.39.0 // indirect
go.opentelemetry.io/otel/metric v1.39.0 // indirect
go.opentelemetry.io/otel/sdk v1.39.0 // indirect
go.opentelemetry.io/otel/trace v1.39.0 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.41.0 // indirect
golang.org/x/text v0.34.0 // indirect
google.golang.org/grpc v1.79.1 // indirect
google.golang.org/protobuf v1.36.11 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

227
poller/go.sum Normal file
View File

@@ -0,0 +1,227 @@
dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8=
dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/bsm/redislock v0.9.4 h1:X/Wse1DPpiQgHbVYRE9zv6m070UcKoOGekgvpNhiSvw=
github.com/bsm/redislock v0.9.4/go.mod h1:Epf7AJLiSFwLCiZcfi6pWFO/8eAYrYpQXFxEDPoDeAk=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/docker v28.5.1+incompatible h1:Bm8DchhSD2J6PsFzxC35TZo4TLGR2PdW/E69rU45NhM=
github.com/docker/docker v28.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw=
github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-routeros/routeros/v3 v3.0.0 h1:/V4Cgr+wmn3IyyYIXUX1KYK8pA1ADPiwLSlAi912j1M=
github.com/go-routeros/routeros/v3 v3.0.0/go.mod h1:j4mq65czXfKtHsdLkgVv8w7sNzyhLZy1TKi2zQDMpiQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.7.4 h1:9wKznZrhWa2QiHL+NjTSPP6yjl3451BX3imWDnokYlg=
github.com/jackc/pgx/v5 v5.7.4/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ=
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI=
github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/go-archive v0.1.0 h1:Kk/5rdW/g+H8NHdJW2gsXyZ7UnzvJNOy6VKJqueWdcQ=
github.com/moby/go-archive v0.1.0/go.mod h1:G9B+YoujNohJmrIYFBpSd54GTUB4lt9S+xVQvsJyFuo=
github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw=
github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs=
github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/nats-io/nats.go v1.38.0 h1:A7P+g7Wjp4/NWqDOOP/K6hfhr54DvdDQUznt5JFg9XA=
github.com/nats-io/nats.go v1.38.0/go.mod h1:IGUM++TwokGnXPs82/wCuiHS02/aKrdYUQkU8If6yjw=
github.com/nats-io/nkeys v0.4.9 h1:qe9Faq2Gxwi6RZnZMXfmGMZkg3afLLOtrU+gDZJ35b0=
github.com/nats-io/nkeys v0.4.9/go.mod h1:jcMqs+FLG+W5YO36OX6wFIFcmpdAns+w1Wm6D3I/evE=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/sftp v1.13.10 h1:+5FbKNTe5Z9aspU88DPIKJ9z2KZoaGCu6Sr6kKR/5mU=
github.com/pkg/sftp v1.13.10/go.mod h1:bJ1a7uDhrX/4OII+agvy28lzRvQrmIQuaHrcI1HbeGA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=
github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/shirou/gopsutil/v4 v4.25.6 h1:kLysI2JsKorfaFPcYmcJqbzROzsBWEOAtw6A7dIfqXs=
github.com/shirou/gopsutil/v4 v4.25.6/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/testcontainers/testcontainers-go v0.40.0 h1:pSdJYLOVgLE8YdUY2FHQ1Fxu+aMnb6JfVz1mxk7OeMU=
github.com/testcontainers/testcontainers-go v0.40.0/go.mod h1:FSXV5KQtX2HAMlm7U3APNyLkkap35zNLxukw9oBi/MY=
github.com/testcontainers/testcontainers-go/modules/nats v0.40.0 h1:IfMgeVI7Mg7CIu0R9N0c85XYMjai7e4OCCmHvkmG6Hg=
github.com/testcontainers/testcontainers-go/modules/nats v0.40.0/go.mod h1:HpKiTohLxK5QGdCkF0W57nEUDzOR5aZsazH1uo8nqso=
github.com/testcontainers/testcontainers-go/modules/postgres v0.40.0 h1:s2bIayFXlbDFexo96y+htn7FzuhpXLYJNnIuglNKqOk=
github.com/testcontainers/testcontainers-go/modules/postgres v0.40.0/go.mod h1:h+u/2KoREGTnTl9UwrQ/g+XhasAT8E6dClclAADeXoQ=
github.com/testcontainers/testcontainers-go/modules/redis v0.40.0 h1:OG4qwcxp2O0re7V7M9lY9w0v6wWgWf7j7rtkpAnGMd0=
github.com/testcontainers/testcontainers-go/modules/redis v0.40.0/go.mod h1:Bc+EDhKMo5zI5V5zdBkHiMVzeAXbtI4n5isS/nzf6zw=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw=
go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48=
go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 h1:IeMeyr1aBvBiPVYihXIaeIZba6b8E1bYp7lbdxK8CQg=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0/go.mod h1:oVdCUtjq9MK9BlS7TtucsQwUcXcymNiEDjgDD2jMtZU=
go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0=
go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs=
go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18=
go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE=
go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI=
go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA=
go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I=
go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o=
golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8=
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.40.0 h1:36e4zGLqU4yhjlmxEaagx2KuYbJq3EwY8K943ZsHcvg=
golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM=
golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 h1:vVKdlvoWBphwdxWKrFZEuM0kGgGLxUOYcY4U/2Vjg44=
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0=
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
google.golang.org/grpc v1.79.1 h1:zGhSi45ODB9/p3VAawt9a+O/MULLl9dpizzNNpq7flY=
google.golang.org/grpc v1.79.1/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=

View File

@@ -0,0 +1,182 @@
// Package bus provides a NATS request-reply handler for certificate deployment.
//
// cmd_cert_deploy.go handles cert.deploy.{device_id} subjects. The Python backend
// sends signed certificate PEM data via NATS, and this handler:
// 1. Looks up the device and decrypts credentials
// 2. Establishes SSH/SFTP + RouterOS API connections
// 3. Calls device.DeployCert for the full deployment flow
// 4. Returns the result via NATS reply
package bus
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"strings"
"time"
"github.com/nats-io/nats.go"
"github.com/mikrotik-portal/poller/internal/device"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/vault"
)
// CertDeployResponder handles NATS request-reply for certificate deployment.
type CertDeployResponder struct {
nc *nats.Conn
store *store.DeviceStore
credentialCache *vault.CredentialCache
sub *nats.Subscription
}
// NewCertDeployResponder creates a certificate deployment responder using the
// given NATS connection, device store, and credential cache.
func NewCertDeployResponder(nc *nats.Conn, store *store.DeviceStore, credentialCache *vault.CredentialCache) *CertDeployResponder {
return &CertDeployResponder{nc: nc, store: store, credentialCache: credentialCache}
}
// Start subscribes to "cert.deploy.*" with a queue group for load balancing
// across multiple poller instances.
func (r *CertDeployResponder) Start() error {
sub, err := r.nc.QueueSubscribe("cert.deploy.*", "cert-deploy-workers", r.handleRequest)
if err != nil {
return fmt.Errorf("subscribing to cert.deploy.*: %w", err)
}
r.sub = sub
slog.Info("cert deploy responder subscribed", "subject", "cert.deploy.*", "queue", "cert-deploy-workers")
return nil
}
// Stop unsubscribes from NATS.
func (r *CertDeployResponder) Stop() {
if r.sub != nil {
if err := r.sub.Unsubscribe(); err != nil {
slog.Warn("error unsubscribing cert deploy responder", "error", err)
}
}
}
// handleRequest processes a single certificate deployment request.
func (r *CertDeployResponder) handleRequest(msg *nats.Msg) {
// Extract device ID from subject: cert.deploy.{device_id}
parts := strings.Split(msg.Subject, ".")
if len(parts) < 3 {
r.respondError(msg, "invalid subject format")
return
}
deviceID := parts[2]
// Parse cert deploy request
var req device.CertDeployRequest
if err := json.Unmarshal(msg.Data, &req); err != nil {
r.respondError(msg, fmt.Sprintf("invalid request JSON: %s", err))
return
}
slog.Info("cert deploy request received",
"device_id", deviceID,
"cert_name", req.CertName,
"ssh_port", req.SSHPort,
)
// Default SSH port if not specified
if req.SSHPort == 0 {
req.SSHPort = 22
}
// Look up device from DB
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
dev, err := r.store.GetDevice(ctx, deviceID)
if err != nil {
slog.Warn("device lookup failed for cert deploy", "device_id", deviceID, "error", err)
r.respondError(msg, fmt.Sprintf("device not found: %s", err))
return
}
// Decrypt device credentials via credential cache (Transit preferred, legacy fallback)
username, password, err := r.credentialCache.GetCredentials(
dev.ID,
dev.TenantID,
dev.EncryptedCredentialsTransit,
dev.EncryptedCredentials,
)
if err != nil {
r.respondError(msg, fmt.Sprintf("credential decryption failed: %s", err))
return
}
// Create SSH client for SFTP upload
sshClient, err := device.NewSSHClient(dev.IPAddress, req.SSHPort, username, password, 30*time.Second)
if err != nil {
slog.Warn("SSH connection failed for cert deploy",
"device_id", deviceID,
"ip", dev.IPAddress,
"ssh_port", req.SSHPort,
"error", err,
)
r.respondError(msg, fmt.Sprintf("SSH connection failed: %s", err))
return
}
defer sshClient.Close()
// Create RouterOS API client for certificate import commands.
// Uses the existing ConnectDevice which tries TLS then falls back to plain.
// Pass nil for caCertPEM -- we're deploying the cert, so the device doesn't
// have a portal-signed cert yet. Plan 03 wires per-device CA cert loading.
apiClient, err := device.ConnectDevice(
dev.IPAddress,
dev.APISSLPort,
dev.APIPort,
username,
password,
10*time.Second,
nil, // caCertPEM: device has no portal cert yet during deployment
dev.TLSMode,
)
if err != nil {
slog.Warn("API connection failed for cert deploy",
"device_id", deviceID,
"ip", dev.IPAddress,
"error", err,
)
r.respondError(msg, fmt.Sprintf("device API connection failed: %s", err))
return
}
defer device.CloseDevice(apiClient)
// Execute the full deployment flow
resp := device.DeployCert(sshClient, apiClient, req)
slog.Info("cert deploy completed",
"device_id", deviceID,
"success", resp.Success,
"cert_name_on_device", resp.CertNameOnDevice,
)
// Respond with result
data, err := json.Marshal(resp)
if err != nil {
r.respondError(msg, fmt.Sprintf("failed to marshal response: %s", err))
return
}
if err := msg.Respond(data); err != nil {
slog.Error("failed to respond to cert deploy request", "error", err)
}
}
// respondError sends an error response to a NATS cert deploy request.
func (r *CertDeployResponder) respondError(msg *nats.Msg, errMsg string) {
resp := device.CertDeployResponse{
Success: false,
Error: errMsg,
}
data, _ := json.Marshal(resp)
if err := msg.Respond(data); err != nil {
slog.Error("failed to respond with cert deploy error", "error", err)
}
}

View File

@@ -0,0 +1,166 @@
// Package bus provides NATS messaging for the poller service.
//
// cmd_responder.go implements a NATS request-reply handler for interactive
// RouterOS device commands. The Python backend sends command requests to
// "device.cmd.{device_id}" and receives structured responses.
package bus
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"strings"
"time"
"github.com/nats-io/nats.go"
"github.com/mikrotik-portal/poller/internal/device"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/vault"
)
// CmdResponder handles NATS request-reply for device commands.
type CmdResponder struct {
nc *nats.Conn
store *store.DeviceStore
credentialCache *vault.CredentialCache
sub *nats.Subscription
}
// NewCmdResponder creates a command responder using the given NATS connection,
// device store, and credential cache.
func NewCmdResponder(nc *nats.Conn, store *store.DeviceStore, credentialCache *vault.CredentialCache) *CmdResponder {
return &CmdResponder{nc: nc, store: store, credentialCache: credentialCache}
}
// Start subscribes to "device.cmd.*" with a queue group for load balancing
// across multiple poller instances.
func (r *CmdResponder) Start() error {
sub, err := r.nc.QueueSubscribe("device.cmd.*", "cmd-workers", r.handleRequest)
if err != nil {
return fmt.Errorf("subscribing to device.cmd.*: %w", err)
}
r.sub = sub
slog.Info("command responder subscribed", "subject", "device.cmd.*", "queue", "cmd-workers")
return nil
}
// Stop unsubscribes from NATS.
func (r *CmdResponder) Stop() {
if r.sub != nil {
if err := r.sub.Unsubscribe(); err != nil {
slog.Warn("error unsubscribing command responder", "error", err)
}
}
}
// handleRequest processes a single device command request.
func (r *CmdResponder) handleRequest(msg *nats.Msg) {
// Extract device ID from subject: device.cmd.{device_id}
parts := strings.Split(msg.Subject, ".")
if len(parts) < 3 {
r.respondError(msg, "invalid subject format")
return
}
deviceID := parts[2]
// Parse command request
var req device.CommandRequest
if err := json.Unmarshal(msg.Data, &req); err != nil {
r.respondError(msg, fmt.Sprintf("invalid request JSON: %s", err))
return
}
slog.Debug("command request received",
"device_id", deviceID,
"command", req.Command,
"args_count", len(req.Args),
)
// Look up device from DB
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
dev, err := r.store.GetDevice(ctx, deviceID)
if err != nil {
slog.Warn("device lookup failed for command", "device_id", deviceID, "error", err)
r.respondError(msg, fmt.Sprintf("device not found: %s", err))
return
}
// Decrypt credentials via credential cache (Transit preferred, legacy fallback)
username, password, err := r.credentialCache.GetCredentials(
dev.ID,
dev.TenantID,
dev.EncryptedCredentialsTransit,
dev.EncryptedCredentials,
)
if err != nil {
r.respondError(msg, fmt.Sprintf("credential decryption failed: %s", err))
return
}
// Prepare CA cert PEM for TLS verification (only populated for portal_ca devices).
var caCertPEM []byte
if dev.CACertPEM != nil {
caCertPEM = []byte(*dev.CACertPEM)
}
// Connect to device with 10-second timeout
client, err := device.ConnectDevice(
dev.IPAddress,
dev.APISSLPort,
dev.APIPort,
username,
password,
10*time.Second,
caCertPEM,
dev.TLSMode,
)
if err != nil {
slog.Info("device connection failed for command",
"device_id", deviceID,
"ip", dev.IPAddress,
"error", err,
)
r.respondError(msg, fmt.Sprintf("device connection failed: %s", err))
return
}
defer device.CloseDevice(client)
// Execute the command
resp := device.ExecuteCommand(client, req.Command, req.Args)
slog.Debug("command executed",
"device_id", deviceID,
"command", req.Command,
"success", resp.Success,
"result_count", len(resp.Data),
)
// Respond
data, err := json.Marshal(resp)
if err != nil {
r.respondError(msg, fmt.Sprintf("failed to marshal response: %s", err))
return
}
if err := msg.Respond(data); err != nil {
slog.Error("failed to respond to command request", "error", err)
}
}
// respondError sends an error response to a NATS request.
func (r *CmdResponder) respondError(msg *nats.Msg, errMsg string) {
resp := device.CommandResponse{
Success: false,
Data: nil,
Error: errMsg,
}
data, _ := json.Marshal(resp)
if err := msg.Respond(data); err != nil {
slog.Error("failed to respond with error", "error", err)
}
}

View File

@@ -0,0 +1,75 @@
// Package bus provides NATS messaging for the poller service.
//
// credential_subscriber.go subscribes to device.credential_changed.> events
// and invalidates the credential cache so the poller uses fresh credentials
// on the next poll cycle instead of waiting for the 5-minute cache TTL.
package bus
import (
"encoding/json"
"log/slog"
"github.com/nats-io/nats.go"
"github.com/mikrotik-portal/poller/internal/vault"
)
// CredentialSubscriber listens for credential change events and invalidates
// the credential cache. This ensures the poller picks up new credentials
// within seconds of a change rather than waiting for the 5-minute TTL.
type CredentialSubscriber struct {
nc *nats.Conn
credentialCache *vault.CredentialCache
sub *nats.Subscription
}
// NewCredentialSubscriber creates a subscriber that invalidates cached
// credentials when the backend publishes credential_changed events.
func NewCredentialSubscriber(nc *nats.Conn, credentialCache *vault.CredentialCache) *CredentialSubscriber {
return &CredentialSubscriber{nc: nc, credentialCache: credentialCache}
}
// Start subscribes to "device.credential_changed.>" with a queue group
// so only one poller instance processes each event.
func (s *CredentialSubscriber) Start() error {
sub, err := s.nc.QueueSubscribe("device.credential_changed.>", "credential-invalidators", s.handleEvent)
if err != nil {
return err
}
s.sub = sub
slog.Info("credential subscriber started", "subject", "device.credential_changed.>", "queue", "credential-invalidators")
return nil
}
// Stop unsubscribes from NATS.
func (s *CredentialSubscriber) Stop() {
if s.sub != nil {
if err := s.sub.Unsubscribe(); err != nil {
slog.Warn("error unsubscribing credential subscriber", "error", err)
}
}
}
// handleEvent processes a credential_changed event by invalidating the
// device's entry in the credential cache.
func (s *CredentialSubscriber) handleEvent(msg *nats.Msg) {
var event struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
}
if err := json.Unmarshal(msg.Data, &event); err != nil {
slog.Warn("failed to unmarshal credential_changed event", "error", err)
return
}
if event.DeviceID == "" {
slog.Warn("credential_changed event missing device_id")
return
}
s.credentialCache.Invalidate(event.DeviceID)
slog.Info("credential cache invalidated",
"device_id", event.DeviceID,
"tenant_id", event.TenantID,
)
}

View File

@@ -0,0 +1,322 @@
// Package bus provides NATS JetStream publishing for device events.
package bus
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"time"
"github.com/nats-io/nats.go"
"github.com/nats-io/nats.go/jetstream"
"github.com/mikrotik-portal/poller/internal/device"
)
// DeviceStatusEvent is the payload published to NATS JetStream when a device
// is polled. Consumers subscribe to "device.status.>" to receive all events.
type DeviceStatusEvent struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
Status string `json:"status"` // "online" or "offline"
RouterOSVersion string `json:"routeros_version,omitempty"`
MajorVersion int `json:"major_version,omitempty"`
BoardName string `json:"board_name,omitempty"`
Architecture string `json:"architecture,omitempty"`
Uptime string `json:"uptime,omitempty"`
CPULoad string `json:"cpu_load,omitempty"`
FreeMemory string `json:"free_memory,omitempty"`
TotalMemory string `json:"total_memory,omitempty"`
SerialNumber string `json:"serial_number,omitempty"`
FirmwareVersion string `json:"firmware_version,omitempty"`
LastSeen string `json:"last_seen"` // RFC3339
}
// DeviceMetricsEvent is the payload published to NATS JetStream for metric data
// collected from a RouterOS device on each poll cycle.
//
// Events are published to "device.metrics.{type}.{device_id}" where type is one
// of "health", "interfaces", or "wireless". Only the field matching the type will
// be populated; the others will be omitted from the JSON payload.
type DeviceMetricsEvent struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
CollectedAt string `json:"collected_at"` // RFC3339
Type string `json:"type"` // "health", "interfaces", "wireless"
Health *device.HealthMetrics `json:"health,omitempty"`
Interfaces []device.InterfaceStats `json:"interfaces,omitempty"`
Wireless []device.WirelessStats `json:"wireless,omitempty"`
}
// ConfigChangedEvent is published when a device's config changes out-of-band.
type ConfigChangedEvent struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
OldTimestamp string `json:"old_timestamp"`
NewTimestamp string `json:"new_timestamp"`
}
// PushRollbackEvent triggers automatic rollback for template pushes.
type PushRollbackEvent struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
PushOperationID string `json:"push_operation_id"`
PrePushCommitSHA string `json:"pre_push_commit_sha"`
}
// PushAlertEvent triggers an alert for editor pushes (one-click rollback).
type PushAlertEvent struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
PushType string `json:"push_type"`
}
// Publisher wraps a NATS JetStream connection for publishing device events.
type Publisher struct {
nc *nats.Conn
js jetstream.JetStream
}
// NewPublisher connects to NATS and ensures the DEVICE_EVENTS stream exists.
//
// The DEVICE_EVENTS stream covers device.status.>, device.metrics.>, and
// device.firmware.> subjects. These are explicit to avoid capturing
// device.cmd.* (used by CmdResponder for request-reply). This allows
// the Python API to subscribe to either family via durable consumers.
//
// The connection uses unlimited reconnects with a 2-second wait between attempts
// so the poller survives transient NATS restarts gracefully.
func NewPublisher(natsURL string) (*Publisher, error) {
nc, err := nats.Connect(natsURL,
nats.MaxReconnects(-1),
nats.ReconnectWait(2*time.Second),
nats.DisconnectErrHandler(func(nc *nats.Conn, err error) {
slog.Warn("NATS disconnected", "error", err)
}),
nats.ReconnectHandler(func(nc *nats.Conn) {
slog.Info("NATS reconnected", "url", nc.ConnectedUrl())
}),
)
if err != nil {
return nil, fmt.Errorf("connecting to NATS at %s: %w", natsURL, err)
}
js, err := jetstream.New(nc)
if err != nil {
nc.Close()
return nil, fmt.Errorf("creating JetStream context: %w", err)
}
// Ensure the DEVICE_EVENTS stream exists. CreateOrUpdateStream is idempotent.
// Subjects are explicit (not "device.>") to avoid capturing device.cmd.*
// which is used by CmdResponder for core NATS request-reply.
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_, err = js.CreateOrUpdateStream(ctx, jetstream.StreamConfig{
Name: "DEVICE_EVENTS",
Subjects: []string{
"device.status.>",
"device.metrics.>",
"device.firmware.>",
"device.credential_changed.>",
"config.changed.>",
"config.push.rollback.>",
"config.push.alert.>",
},
MaxAge: 24 * time.Hour,
})
if err != nil {
nc.Close()
return nil, fmt.Errorf("ensuring DEVICE_EVENTS stream: %w", err)
}
slog.Info("NATS JetStream DEVICE_EVENTS stream ready")
return &Publisher{nc: nc, js: js}, nil
}
// PublishStatus publishes a device status event to NATS JetStream.
//
// Events are published to "device.status.{DeviceID}" so consumers can subscribe
// to individual devices or all events via "device.status.>".
func (p *Publisher) PublishStatus(ctx context.Context, event DeviceStatusEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshalling event: %w", err)
}
subject := fmt.Sprintf("device.status.%s", event.DeviceID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publishing to %s: %w", subject, err)
}
slog.Debug("published device status event",
"device_id", event.DeviceID,
"status", event.Status,
"subject", subject,
)
return nil
}
// PublishMetrics publishes a device metrics event to NATS JetStream.
//
// Events are published to "device.metrics.{type}.{device_id}" so consumers can
// subscribe to all metrics via "device.metrics.>" or filter by type.
func (p *Publisher) PublishMetrics(ctx context.Context, event DeviceMetricsEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshalling metrics event: %w", err)
}
subject := fmt.Sprintf("device.metrics.%s.%s", event.Type, event.DeviceID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publishing to %s: %w", subject, err)
}
slog.Debug("published device metrics event",
"device_id", event.DeviceID,
"type", event.Type,
"subject", subject,
)
return nil
}
// DeviceFirmwareEvent is the payload published to NATS JetStream when the poller
// checks a device's firmware update status (rate-limited to once per day per device).
type DeviceFirmwareEvent struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
InstalledVersion string `json:"installed_version"`
LatestVersion string `json:"latest_version,omitempty"`
Channel string `json:"channel,omitempty"`
Status string `json:"status"`
Architecture string `json:"architecture"`
}
// PublishFirmware publishes a device firmware status event to NATS JetStream.
//
// Events are published to "device.firmware.{DeviceID}" so the Python firmware
// subscriber can process them and update the firmware_versions table.
func (p *Publisher) PublishFirmware(ctx context.Context, event DeviceFirmwareEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshalling firmware event: %w", err)
}
subject := fmt.Sprintf("device.firmware.%s", event.DeviceID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publishing to %s: %w", subject, err)
}
slog.Debug("published device firmware event",
"device_id", event.DeviceID,
"installed", event.InstalledVersion,
"latest", event.LatestVersion,
"subject", subject,
)
return nil
}
// PublishConfigChanged publishes a config change event for a device.
//
// Events are published to "config.changed.{TenantID}.{DeviceID}" so the Python
// backend can trigger event-driven backups when out-of-band changes are detected.
func (p *Publisher) PublishConfigChanged(ctx context.Context, event ConfigChangedEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshal config changed event: %w", err)
}
subject := fmt.Sprintf("config.changed.%s.%s", event.TenantID, event.DeviceID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publish config changed: %w", err)
}
slog.Debug("published config changed event",
"device_id", event.DeviceID,
"tenant_id", event.TenantID,
"old_timestamp", event.OldTimestamp,
"new_timestamp", event.NewTimestamp,
"subject", subject,
)
return nil
}
// PublishPushRollback publishes a push rollback event when a device goes offline
// after a template or restore config push, triggering automatic rollback.
func (p *Publisher) PublishPushRollback(ctx context.Context, event PushRollbackEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshal push rollback event: %w", err)
}
subject := fmt.Sprintf("config.push.rollback.%s.%s", event.TenantID, event.DeviceID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publishing to %s: %w", subject, err)
}
slog.Info("published push rollback event",
"device_id", event.DeviceID,
"tenant_id", event.TenantID,
"push_operation_id", event.PushOperationID,
"subject", subject,
)
return nil
}
// PublishPushAlert publishes a push alert event when a device goes offline
// after an editor config push, enabling one-click rollback in the UI.
func (p *Publisher) PublishPushAlert(ctx context.Context, event PushAlertEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshal push alert event: %w", err)
}
subject := fmt.Sprintf("config.push.alert.%s.%s", event.TenantID, event.DeviceID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publishing to %s: %w", subject, err)
}
slog.Info("published push alert event",
"device_id", event.DeviceID,
"tenant_id", event.TenantID,
"push_type", event.PushType,
"subject", subject,
)
return nil
}
// Conn returns the raw NATS connection for use by other components
// (e.g., CmdResponder for request-reply subscriptions).
func (p *Publisher) Conn() *nats.Conn {
return p.nc
}
// Close drains the NATS connection, flushing pending messages before closing.
func (p *Publisher) Close() {
if p.nc != nil {
if err := p.nc.Drain(); err != nil {
slog.Warn("error draining NATS connection", "error", err)
}
}
}

View File

@@ -0,0 +1,232 @@
package bus_test
import (
"context"
"encoding/json"
"testing"
"time"
"github.com/nats-io/nats.go"
"github.com/nats-io/nats.go/jetstream"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/mikrotik-portal/poller/internal/bus"
"github.com/mikrotik-portal/poller/internal/testutil"
)
func TestPublisher_PublishStatus_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
natsURL, cleanup := testutil.SetupNATS(t)
defer cleanup()
pub, err := bus.NewPublisher(natsURL)
require.NoError(t, err)
defer pub.Close()
// Create a direct NATS consumer to receive messages.
nc, err := nats.Connect(natsURL)
require.NoError(t, err)
defer nc.Close()
js, err := jetstream.New(nc)
require.NoError(t, err)
ctx := context.Background()
// Create a consumer on the DEVICE_EVENTS stream.
cons, err := js.CreateOrUpdateConsumer(ctx, "DEVICE_EVENTS", jetstream.ConsumerConfig{
FilterSubject: "device.status.>",
AckPolicy: jetstream.AckNonePolicy,
})
require.NoError(t, err)
// Publish a status event.
event := bus.DeviceStatusEvent{
DeviceID: "dev-abc-123",
TenantID: "tenant-xyz",
Status: "online",
LastSeen: time.Now().UTC().Format(time.RFC3339),
}
err = pub.PublishStatus(ctx, event)
require.NoError(t, err)
// Consume the message with timeout.
msgBatch, err := cons.Fetch(1, jetstream.FetchMaxWait(5*time.Second))
require.NoError(t, err)
var received *jetstream.Msg
for msg := range msgBatch.Messages() {
received = &msg
break
}
require.NotNil(t, received, "should receive a message within 5 seconds")
var got bus.DeviceStatusEvent
err = json.Unmarshal((*received).Data(), &got)
require.NoError(t, err)
assert.Equal(t, event.DeviceID, got.DeviceID)
assert.Equal(t, event.TenantID, got.TenantID)
assert.Equal(t, event.Status, got.Status)
}
func TestPublisher_PublishMetrics_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
natsURL, cleanup := testutil.SetupNATS(t)
defer cleanup()
pub, err := bus.NewPublisher(natsURL)
require.NoError(t, err)
defer pub.Close()
nc, err := nats.Connect(natsURL)
require.NoError(t, err)
defer nc.Close()
js, err := jetstream.New(nc)
require.NoError(t, err)
ctx := context.Background()
// Create a consumer filtering on metrics subjects.
cons, err := js.CreateOrUpdateConsumer(ctx, "DEVICE_EVENTS", jetstream.ConsumerConfig{
FilterSubject: "device.metrics.>",
AckPolicy: jetstream.AckNonePolicy,
})
require.NoError(t, err)
// Publish a metrics event.
event := bus.DeviceMetricsEvent{
DeviceID: "dev-metrics-456",
TenantID: "tenant-xyz",
CollectedAt: time.Now().UTC().Format(time.RFC3339),
Type: "health",
}
err = pub.PublishMetrics(ctx, event)
require.NoError(t, err)
// Consume the message.
msgBatch, err := cons.Fetch(1, jetstream.FetchMaxWait(5*time.Second))
require.NoError(t, err)
var received *jetstream.Msg
for msg := range msgBatch.Messages() {
received = &msg
break
}
require.NotNil(t, received, "should receive metrics message within 5 seconds")
// Verify the subject includes the type and device_id.
assert.Equal(t, "device.metrics.health.dev-metrics-456", (*received).Subject())
var got bus.DeviceMetricsEvent
err = json.Unmarshal((*received).Data(), &got)
require.NoError(t, err)
assert.Equal(t, event.DeviceID, got.DeviceID)
assert.Equal(t, event.TenantID, got.TenantID)
assert.Equal(t, event.Type, got.Type)
}
func TestPublisher_PublishFirmware_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
natsURL, cleanup := testutil.SetupNATS(t)
defer cleanup()
pub, err := bus.NewPublisher(natsURL)
require.NoError(t, err)
defer pub.Close()
nc, err := nats.Connect(natsURL)
require.NoError(t, err)
defer nc.Close()
js, err := jetstream.New(nc)
require.NoError(t, err)
ctx := context.Background()
cons, err := js.CreateOrUpdateConsumer(ctx, "DEVICE_EVENTS", jetstream.ConsumerConfig{
FilterSubject: "device.firmware.>",
AckPolicy: jetstream.AckNonePolicy,
})
require.NoError(t, err)
event := bus.DeviceFirmwareEvent{
DeviceID: "dev-fw-789",
TenantID: "tenant-xyz",
InstalledVersion: "7.15",
LatestVersion: "7.16",
Channel: "stable",
Status: "update_available",
Architecture: "arm64",
}
err = pub.PublishFirmware(ctx, event)
require.NoError(t, err)
msgBatch, err := cons.Fetch(1, jetstream.FetchMaxWait(5*time.Second))
require.NoError(t, err)
var received *jetstream.Msg
for msg := range msgBatch.Messages() {
received = &msg
break
}
require.NotNil(t, received, "should receive firmware message within 5 seconds")
assert.Equal(t, "device.firmware.dev-fw-789", (*received).Subject())
var got bus.DeviceFirmwareEvent
err = json.Unmarshal((*received).Data(), &got)
require.NoError(t, err)
assert.Equal(t, event.DeviceID, got.DeviceID)
assert.Equal(t, event.InstalledVersion, got.InstalledVersion)
assert.Equal(t, event.LatestVersion, got.LatestVersion)
assert.Equal(t, event.Status, got.Status)
}
func TestPublisher_NewPublisher_StreamCreation_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
natsURL, cleanup := testutil.SetupNATS(t)
defer cleanup()
pub, err := bus.NewPublisher(natsURL)
require.NoError(t, err)
defer pub.Close()
// Verify the DEVICE_EVENTS stream was created with correct config.
nc, err := nats.Connect(natsURL)
require.NoError(t, err)
defer nc.Close()
js, err := jetstream.New(nc)
require.NoError(t, err)
ctx := context.Background()
stream, err := js.Stream(ctx, "DEVICE_EVENTS")
require.NoError(t, err, "DEVICE_EVENTS stream should exist")
info, err := stream.Info(ctx)
require.NoError(t, err)
assert.Equal(t, "DEVICE_EVENTS", info.Config.Name)
assert.Contains(t, info.Config.Subjects, "device.status.>",
"stream should cover device.status.> subjects")
assert.Contains(t, info.Config.Subjects, "device.metrics.>",
"stream should cover device.metrics.> subjects")
assert.Contains(t, info.Config.Subjects, "device.firmware.>",
"stream should cover device.firmware.> subjects")
}

View File

@@ -0,0 +1,160 @@
// Package config loads poller configuration from environment variables.
package config
import (
"encoding/base64"
"fmt"
"log/slog"
"os"
"strconv"
)
// Config holds all runtime configuration for the poller service.
type Config struct {
// Environment is the deployment environment (dev, staging, production).
// Controls startup validation of security-sensitive defaults.
Environment string
// DatabaseURL is the PostgreSQL connection string for the poller_user role.
// Example: postgres://poller_user:poller_password@localhost:5432/mikrotik
DatabaseURL string
// RedisURL is the Redis connection URL.
RedisURL string
// NatsURL is the NATS server URL.
NatsURL string
// CredentialEncryptionKey is the 32-byte AES key decoded from base64.
// MUST match the Python backend CREDENTIAL_ENCRYPTION_KEY environment variable.
// OPTIONAL when OpenBao Transit is configured (OPENBAO_ADDR set).
CredentialEncryptionKey []byte
// OpenBaoAddr is the OpenBao server address for Transit API calls.
// Example: http://openbao:8200
OpenBaoAddr string
// OpenBaoToken is the authentication token for OpenBao API calls.
OpenBaoToken string
// PollIntervalSeconds is how often each device is polled.
PollIntervalSeconds int
// DeviceRefreshSeconds is how often the DB is queried for new/removed devices.
DeviceRefreshSeconds int
// ConnectionTimeoutSeconds is the TLS connection timeout per device.
ConnectionTimeoutSeconds int
// LogLevel controls log verbosity (debug, info, warn, error).
LogLevel string
// CircuitBreakerMaxFailures is the number of consecutive connection failures
// before the circuit breaker enters backoff mode for a device.
CircuitBreakerMaxFailures int
// CircuitBreakerBaseBackoffSeconds is the base backoff duration in seconds.
// Actual backoff is exponential: base * 2^(failures-1), capped at max.
CircuitBreakerBaseBackoffSeconds int
// CircuitBreakerMaxBackoffSeconds is the maximum backoff duration in seconds.
CircuitBreakerMaxBackoffSeconds int
// CommandTimeoutSeconds is the per-command timeout for RouterOS API calls.
// Each API call (DetectVersion, CollectInterfaces, etc.) is wrapped with
// this timeout to prevent indefinite blocking on unresponsive devices.
CommandTimeoutSeconds int
}
// knownInsecureEncryptionKey is the base64-encoded dev default encryption key.
// Production environments MUST NOT use this value.
const knownInsecureEncryptionKey = "LLLjnfBZTSycvL2U07HDSxUeTtLxb9cZzryQl0R9E4w="
// Load reads configuration from environment variables, applying defaults where appropriate.
// Returns an error if any required variable is missing or invalid.
func Load() (*Config, error) {
cfg := &Config{
Environment: getEnv("ENVIRONMENT", "dev"),
DatabaseURL: getEnv("DATABASE_URL", ""),
RedisURL: getEnv("REDIS_URL", "redis://localhost:6379/0"),
NatsURL: getEnv("NATS_URL", "nats://localhost:4222"),
LogLevel: getEnv("LOG_LEVEL", "info"),
PollIntervalSeconds: getEnvInt("POLL_INTERVAL_SECONDS", 60),
DeviceRefreshSeconds: getEnvInt("DEVICE_REFRESH_SECONDS", 60),
ConnectionTimeoutSeconds: getEnvInt("CONNECTION_TIMEOUT_SECONDS", 10),
CircuitBreakerMaxFailures: getEnvInt("CIRCUIT_BREAKER_MAX_FAILURES", 5),
CircuitBreakerBaseBackoffSeconds: getEnvInt("CIRCUIT_BREAKER_BASE_BACKOFF_SECONDS", 30),
CircuitBreakerMaxBackoffSeconds: getEnvInt("CIRCUIT_BREAKER_MAX_BACKOFF_SECONDS", 900),
CommandTimeoutSeconds: getEnvInt("COMMAND_TIMEOUT_SECONDS", 30),
}
if cfg.DatabaseURL == "" {
return nil, fmt.Errorf("DATABASE_URL environment variable is required")
}
// OpenBao Transit configuration (optional -- required for Phase 29+ envelope encryption)
cfg.OpenBaoAddr = getEnv("OPENBAO_ADDR", "")
cfg.OpenBaoToken = getEnv("OPENBAO_TOKEN", "")
if cfg.OpenBaoAddr != "" && cfg.OpenBaoToken == "" {
return nil, fmt.Errorf("OPENBAO_TOKEN is required when OPENBAO_ADDR is set")
}
// Decode the AES-256-GCM encryption key from base64.
// Must use StdEncoding (NOT URLEncoding) to match Python's base64.b64encode output.
// OPTIONAL when OpenBao Transit is configured (OPENBAO_ADDR set).
keyB64 := getEnv("CREDENTIAL_ENCRYPTION_KEY", "")
if keyB64 == "" {
if cfg.OpenBaoAddr == "" {
return nil, fmt.Errorf("CREDENTIAL_ENCRYPTION_KEY environment variable is required (or configure OPENBAO_ADDR for Transit encryption)")
}
// OpenBao configured without legacy key -- OK for post-migration
slog.Info("CREDENTIAL_ENCRYPTION_KEY not set; OpenBao Transit will handle all credential decryption")
} else {
// Validate production safety BEFORE decode: reject known insecure defaults in non-dev environments.
// This runs first so placeholder values like "CHANGE_ME_IN_PRODUCTION" get a clear security
// error instead of a confusing "not valid base64" error.
if cfg.Environment != "dev" {
if keyB64 == knownInsecureEncryptionKey || keyB64 == "CHANGE_ME_IN_PRODUCTION" {
return nil, fmt.Errorf(
"FATAL: CREDENTIAL_ENCRYPTION_KEY uses a known insecure default in '%s' environment. "+
"Generate a secure key for production: "+
"python -c \"import secrets, base64; print(base64.b64encode(secrets.token_bytes(32)).decode())\"",
cfg.Environment,
)
}
}
key, err := base64.StdEncoding.DecodeString(keyB64)
if err != nil {
return nil, fmt.Errorf("CREDENTIAL_ENCRYPTION_KEY is not valid base64: %w", err)
}
if len(key) != 32 {
return nil, fmt.Errorf("CREDENTIAL_ENCRYPTION_KEY must decode to exactly 32 bytes, got %d", len(key))
}
cfg.CredentialEncryptionKey = key
}
return cfg, nil
}
// getEnv returns the value of an environment variable, or the defaultValue if not set.
func getEnv(key, defaultValue string) string {
if val := os.Getenv(key); val != "" {
return val
}
return defaultValue
}
// getEnvInt returns the integer value of an environment variable, or the defaultValue if not set or invalid.
func getEnvInt(key string, defaultValue int) int {
val := os.Getenv(key)
if val == "" {
return defaultValue
}
n, err := strconv.Atoi(val)
if err != nil {
return defaultValue
}
return n
}

View File

@@ -0,0 +1,79 @@
package config
import (
"os"
"strings"
"testing"
)
func TestProductionValidationRejectsInsecureKey(t *testing.T) {
// Save and restore env
origEnv := os.Getenv("ENVIRONMENT")
origDB := os.Getenv("DATABASE_URL")
origKey := os.Getenv("CREDENTIAL_ENCRYPTION_KEY")
defer func() {
os.Setenv("ENVIRONMENT", origEnv)
os.Setenv("DATABASE_URL", origDB)
os.Setenv("CREDENTIAL_ENCRYPTION_KEY", origKey)
}()
os.Setenv("DATABASE_URL", "postgres://test:test@localhost:5432/test")
// Test: production with known insecure default key should fail
os.Setenv("ENVIRONMENT", "production")
os.Setenv("CREDENTIAL_ENCRYPTION_KEY", "LLLjnfBZTSycvL2U07HDSxUeTtLxb9cZzryQl0R9E4w=")
_, err := Load()
if err == nil {
t.Fatal("expected error for insecure key in production, got nil")
}
if !strings.Contains(err.Error(), "FATAL") {
t.Fatalf("expected FATAL in error message, got: %s", err.Error())
}
}
func TestProductionValidationRejectsPlaceholder(t *testing.T) {
origEnv := os.Getenv("ENVIRONMENT")
origDB := os.Getenv("DATABASE_URL")
origKey := os.Getenv("CREDENTIAL_ENCRYPTION_KEY")
defer func() {
os.Setenv("ENVIRONMENT", origEnv)
os.Setenv("DATABASE_URL", origDB)
os.Setenv("CREDENTIAL_ENCRYPTION_KEY", origKey)
}()
os.Setenv("DATABASE_URL", "postgres://test:test@localhost:5432/test")
os.Setenv("ENVIRONMENT", "production")
os.Setenv("CREDENTIAL_ENCRYPTION_KEY", "CHANGE_ME_IN_PRODUCTION")
_, err := Load()
if err == nil {
t.Fatal("expected error for CHANGE_ME_IN_PRODUCTION in production, got nil")
}
if !strings.Contains(err.Error(), "FATAL") {
t.Fatalf("expected FATAL in error message for placeholder, got: %s", err.Error())
}
}
func TestDevModeAcceptsInsecureDefaults(t *testing.T) {
origEnv := os.Getenv("ENVIRONMENT")
origDB := os.Getenv("DATABASE_URL")
origKey := os.Getenv("CREDENTIAL_ENCRYPTION_KEY")
defer func() {
os.Setenv("ENVIRONMENT", origEnv)
os.Setenv("DATABASE_URL", origDB)
os.Setenv("CREDENTIAL_ENCRYPTION_KEY", origKey)
}()
os.Setenv("ENVIRONMENT", "dev")
os.Setenv("DATABASE_URL", "postgres://test:test@localhost:5432/test")
os.Setenv("CREDENTIAL_ENCRYPTION_KEY", "LLLjnfBZTSycvL2U07HDSxUeTtLxb9cZzryQl0R9E4w=")
cfg, err := Load()
if err != nil {
t.Fatalf("dev mode should accept insecure defaults, got: %s", err.Error())
}
if cfg.Environment != "dev" {
t.Fatalf("expected Environment=dev, got %s", cfg.Environment)
}
}

View File

@@ -0,0 +1,104 @@
package config
import (
"encoding/base64"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestLoad_RequiredDatabaseURL(t *testing.T) {
// Clear DATABASE_URL to trigger required field error
t.Setenv("DATABASE_URL", "")
t.Setenv("CREDENTIAL_ENCRYPTION_KEY", base64.StdEncoding.EncodeToString(make([]byte, 32)))
_, err := Load()
require.Error(t, err)
assert.Contains(t, err.Error(), "DATABASE_URL")
}
func TestLoad_RequiredEncryptionKey(t *testing.T) {
t.Setenv("DATABASE_URL", "postgres://user:pass@localhost/db")
t.Setenv("CREDENTIAL_ENCRYPTION_KEY", "")
_, err := Load()
require.Error(t, err)
assert.Contains(t, err.Error(), "CREDENTIAL_ENCRYPTION_KEY")
}
func TestLoad_InvalidBase64Key(t *testing.T) {
t.Setenv("DATABASE_URL", "postgres://user:pass@localhost/db")
t.Setenv("CREDENTIAL_ENCRYPTION_KEY", "not-valid-base64!!!")
_, err := Load()
require.Error(t, err)
assert.Contains(t, err.Error(), "base64")
}
func TestLoad_WrongKeyLength(t *testing.T) {
// Encode a 16-byte key (too short -- must be 32)
t.Setenv("DATABASE_URL", "postgres://user:pass@localhost/db")
t.Setenv("CREDENTIAL_ENCRYPTION_KEY", base64.StdEncoding.EncodeToString(make([]byte, 16)))
_, err := Load()
require.Error(t, err)
assert.Contains(t, err.Error(), "32 bytes")
}
func TestLoad_DefaultValues(t *testing.T) {
t.Setenv("DATABASE_URL", "postgres://user:pass@localhost/db")
t.Setenv("CREDENTIAL_ENCRYPTION_KEY", base64.StdEncoding.EncodeToString(make([]byte, 32)))
// Clear optional vars to test defaults
t.Setenv("REDIS_URL", "")
t.Setenv("NATS_URL", "")
t.Setenv("LOG_LEVEL", "")
t.Setenv("POLL_INTERVAL_SECONDS", "")
t.Setenv("DEVICE_REFRESH_SECONDS", "")
t.Setenv("CONNECTION_TIMEOUT_SECONDS", "")
cfg, err := Load()
require.NoError(t, err)
assert.Equal(t, "redis://localhost:6379/0", cfg.RedisURL)
assert.Equal(t, "nats://localhost:4222", cfg.NatsURL)
assert.Equal(t, "info", cfg.LogLevel)
assert.Equal(t, 60, cfg.PollIntervalSeconds)
assert.Equal(t, 60, cfg.DeviceRefreshSeconds)
assert.Equal(t, 10, cfg.ConnectionTimeoutSeconds)
}
func TestLoad_CustomValues(t *testing.T) {
t.Setenv("DATABASE_URL", "postgres://custom:pass@db:5432/mydb")
t.Setenv("CREDENTIAL_ENCRYPTION_KEY", base64.StdEncoding.EncodeToString(make([]byte, 32)))
t.Setenv("REDIS_URL", "redis://custom-redis:6380/1")
t.Setenv("NATS_URL", "nats://custom-nats:4223")
t.Setenv("LOG_LEVEL", "debug")
t.Setenv("POLL_INTERVAL_SECONDS", "30")
t.Setenv("DEVICE_REFRESH_SECONDS", "120")
t.Setenv("CONNECTION_TIMEOUT_SECONDS", "5")
cfg, err := Load()
require.NoError(t, err)
assert.Equal(t, "postgres://custom:pass@db:5432/mydb", cfg.DatabaseURL)
assert.Equal(t, "redis://custom-redis:6380/1", cfg.RedisURL)
assert.Equal(t, "nats://custom-nats:4223", cfg.NatsURL)
assert.Equal(t, "debug", cfg.LogLevel)
assert.Equal(t, 30, cfg.PollIntervalSeconds)
assert.Equal(t, 120, cfg.DeviceRefreshSeconds)
assert.Equal(t, 5, cfg.ConnectionTimeoutSeconds)
}
func TestLoad_ValidEncryptionKey(t *testing.T) {
key := make([]byte, 32)
for i := range key {
key[i] = byte(i) // deterministic test key
}
t.Setenv("DATABASE_URL", "postgres://user:pass@localhost/db")
t.Setenv("CREDENTIAL_ENCRYPTION_KEY", base64.StdEncoding.EncodeToString(key))
cfg, err := Load()
require.NoError(t, err)
assert.Equal(t, key, cfg.CredentialEncryptionKey)
}

View File

@@ -0,0 +1,122 @@
// Package device provides the full certificate deployment flow for RouterOS devices.
//
// The deployment follows these steps:
// 1. Upload cert.pem and key.pem via SFTP
// 2. Import the certificate via RouterOS API (/certificate/import)
// 3. Import the private key via RouterOS API (/certificate/import)
// 4. Determine the certificate name on device
// 5. Assign the certificate to the api-ssl service (/ip/service/set)
// 6. Clean up uploaded PEM files from device filesystem (/file/remove)
package device
import (
"fmt"
"log/slog"
routeros "github.com/go-routeros/routeros/v3"
"golang.org/x/crypto/ssh"
)
// CertDeployRequest is the NATS request payload for certificate deployment.
type CertDeployRequest struct {
DeviceID string `json:"device_id"`
CertPEM string `json:"cert_pem"`
KeyPEM string `json:"key_pem"`
CertName string `json:"cert_name"` // e.g., "portal-device-cert"
SSHPort int `json:"ssh_port"`
}
// CertDeployResponse is the NATS reply payload.
type CertDeployResponse struct {
Success bool `json:"success"`
CertNameOnDevice string `json:"cert_name_on_device,omitempty"`
Error string `json:"error,omitempty"`
}
// DeployCert performs the full certificate deployment flow:
// 1. Upload cert.pem and key.pem files via SFTP
// 2. Import certificate via RouterOS API
// 3. Import key via RouterOS API
// 4. Assign certificate to api-ssl service
// 5. Clean up uploaded PEM files from device filesystem
func DeployCert(sshClient *ssh.Client, apiClient *routeros.Client, req CertDeployRequest) CertDeployResponse {
certFile := req.CertName + ".pem"
keyFile := req.CertName + "-key.pem"
// Step 1: Upload cert via SFTP
slog.Debug("uploading cert file via SFTP", "file", certFile, "device_id", req.DeviceID)
if err := UploadFile(sshClient, certFile, []byte(req.CertPEM)); err != nil {
return CertDeployResponse{Success: false, Error: fmt.Sprintf("SFTP cert upload: %s", err)}
}
// Step 2: Upload key via SFTP
slog.Debug("uploading key file via SFTP", "file", keyFile, "device_id", req.DeviceID)
if err := UploadFile(sshClient, keyFile, []byte(req.KeyPEM)); err != nil {
return CertDeployResponse{Success: false, Error: fmt.Sprintf("SFTP key upload: %s", err)}
}
// Step 3: Import certificate
slog.Debug("importing certificate", "file", certFile, "device_id", req.DeviceID)
importResult := ExecuteCommand(apiClient, "/certificate/import", []string{
"=file-name=" + certFile,
})
if !importResult.Success {
return CertDeployResponse{Success: false, Error: fmt.Sprintf("cert import: %s", importResult.Error)}
}
// Step 4: Import private key
slog.Debug("importing private key", "file", keyFile, "device_id", req.DeviceID)
keyImportResult := ExecuteCommand(apiClient, "/certificate/import", []string{
"=file-name=" + keyFile,
})
if !keyImportResult.Success {
return CertDeployResponse{Success: false, Error: fmt.Sprintf("key import: %s", keyImportResult.Error)}
}
// Determine the certificate name on device.
// RouterOS names imported certs as <filename>_0 by convention.
// Query to find the actual name by looking for certs with a private key.
certNameOnDevice := certFile + "_0"
printResult := ExecuteCommand(apiClient, "/certificate/print", []string{
"=.proplist=name,common-name,private-key",
})
if printResult.Success && len(printResult.Data) > 0 {
// Use the last cert that has a private key (most recently imported)
for _, entry := range printResult.Data {
if name, ok := entry["name"]; ok {
if pk, hasPK := entry["private-key"]; hasPK && pk == "true" {
certNameOnDevice = name
}
}
}
}
// Step 5: Assign to api-ssl service
slog.Debug("assigning certificate to api-ssl", "cert_name", certNameOnDevice, "device_id", req.DeviceID)
assignResult := ExecuteCommand(apiClient, "/ip/service/set", []string{
"=numbers=api-ssl",
"=certificate=" + certNameOnDevice,
})
if !assignResult.Success {
slog.Warn("api-ssl assignment failed (cert still imported)",
"device_id", req.DeviceID,
"error", assignResult.Error,
)
// Don't fail entirely -- cert is imported, assignment can be retried
}
// Step 6: Clean up uploaded PEM files from device filesystem
slog.Debug("cleaning up PEM files", "device_id", req.DeviceID)
ExecuteCommand(apiClient, "/file/remove", []string{"=.id=" + certFile})
ExecuteCommand(apiClient, "/file/remove", []string{"=.id=" + keyFile})
// File cleanup failures are non-fatal
slog.Info("certificate deployed successfully",
"device_id", req.DeviceID,
"cert_name", certNameOnDevice,
)
return CertDeployResponse{
Success: true,
CertNameOnDevice: certNameOnDevice,
}
}

View File

@@ -0,0 +1,115 @@
// Package device handles RouterOS device connections and queries.
package device
import (
"crypto/tls"
"crypto/x509"
"fmt"
"log/slog"
"time"
routeros "github.com/go-routeros/routeros/v3"
)
// buildTLSConfig creates a TLS config using the portal CA cert for verification.
// Falls back to InsecureSkipVerify if caCertPEM is empty or invalid.
func buildTLSConfig(caCertPEM []byte) *tls.Config {
if len(caCertPEM) == 0 {
return &tls.Config{InsecureSkipVerify: true} //nolint:gosec // no CA cert available
}
pool := x509.NewCertPool()
if !pool.AppendCertsFromPEM(caCertPEM) {
slog.Warn("failed to parse CA cert PEM, falling back to insecure TLS")
return &tls.Config{InsecureSkipVerify: true} //nolint:gosec // invalid CA cert
}
return &tls.Config{RootCAs: pool}
}
// ConnectDevice establishes a connection to a RouterOS device.
//
// Connection strategy is governed by tlsMode:
//
// - "auto" (default): Try CA-verified TLS (if caCertPEM provided) ->
// InsecureSkipVerify -> STOP. No plain-text fallback.
// - "portal_ca": Try CA-verified TLS only (strict).
// - "insecure": Skip directly to InsecureSkipVerify TLS (no CA check).
// - "plain": Explicit opt-in for plain-text API connection.
//
// Callers must call CloseDevice when done.
func ConnectDevice(ip string, sslPort, plainPort int, username, password string, timeout time.Duration, caCertPEM []byte, tlsMode string) (*routeros.Client, error) {
sslAddr := fmt.Sprintf("%s:%d", ip, sslPort)
switch tlsMode {
case "plain":
// Explicit opt-in: plain-text connection only
plainAddr := fmt.Sprintf("%s:%d", ip, plainPort)
slog.Debug("connecting to RouterOS device (plain — explicit opt-in)", "address", plainAddr)
client, err := routeros.DialTimeout(plainAddr, username, password, timeout)
if err != nil {
return nil, fmt.Errorf("plain-text connection to %s failed: %w", plainAddr, err)
}
slog.Debug("connected to RouterOS device (plain — explicit opt-in)", "address", plainAddr)
return client, nil
case "insecure":
// Skip CA verification, go straight to InsecureSkipVerify
insecureTLS := &tls.Config{InsecureSkipVerify: true} //nolint:gosec // insecure mode requested
slog.Debug("connecting to RouterOS device (insecure TLS)", "address", sslAddr)
client, err := routeros.DialTLSTimeout(sslAddr, username, password, insecureTLS, timeout)
if err != nil {
return nil, fmt.Errorf("insecure TLS connection to %s failed: %w", sslAddr, err)
}
slog.Debug("connected with insecure TLS", "address", sslAddr)
return client, nil
case "portal_ca":
// Strict CA-verified TLS only
verifiedTLS := buildTLSConfig(caCertPEM)
if verifiedTLS.RootCAs == nil {
return nil, fmt.Errorf("portal_ca mode requires a valid CA cert but none available for %s", sslAddr)
}
slog.Debug("connecting to RouterOS device (CA-verified TLS)", "address", sslAddr)
client, err := routeros.DialTLSTimeout(sslAddr, username, password, verifiedTLS, timeout)
if err != nil {
return nil, fmt.Errorf("CA-verified TLS connection to %s failed: %w", sslAddr, err)
}
slog.Debug("connected with CA-verified TLS", "address", sslAddr)
return client, nil
default:
// "auto" mode: CA-verified -> InsecureSkipVerify -> STOP (no plain-text)
// Tier 1: CA-verified TLS (if CA cert available)
if len(caCertPEM) > 0 {
verifiedTLS := buildTLSConfig(caCertPEM)
if verifiedTLS.RootCAs != nil { // only try if PEM parsed OK
slog.Debug("connecting to RouterOS device (CA-verified TLS)", "address", sslAddr)
client, err := routeros.DialTLSTimeout(sslAddr, username, password, verifiedTLS, timeout)
if err == nil {
slog.Debug("connected with CA-verified TLS", "address", sslAddr)
return client, nil
}
slog.Debug("CA-verified TLS failed, trying insecure TLS", "address", sslAddr, "error", err)
}
}
// Tier 2: InsecureSkipVerify TLS (fallback)
insecureTLS := &tls.Config{InsecureSkipVerify: true} //nolint:gosec // fallback for unprovisioned devices
slog.Debug("connecting to RouterOS device (insecure TLS)", "address", sslAddr)
client, err := routeros.DialTLSTimeout(sslAddr, username, password, insecureTLS, timeout)
if err != nil {
// NO plain-text fallback in auto mode — this is the key security change
return nil, fmt.Errorf("TLS connection to %s failed (auto mode — no plain-text fallback): %w", sslAddr, err)
}
slog.Debug("connected with insecure TLS", "address", sslAddr)
return client, nil
}
}
// CloseDevice closes a RouterOS client connection. Safe to call on a nil client.
func CloseDevice(c *routeros.Client) {
if c == nil {
return
}
c.Close()
}

View File

@@ -0,0 +1,50 @@
package device
import (
"errors"
"strings"
routeros "github.com/go-routeros/routeros/v3"
)
// CommandRequest is the JSON payload received from the Python backend via NATS.
type CommandRequest struct {
DeviceID string `json:"device_id"`
Command string `json:"command"`
Args []string `json:"args"`
}
// CommandResponse is the JSON payload returned to the Python backend via NATS.
type CommandResponse struct {
Success bool `json:"success"`
Data []map[string]string `json:"data"`
Error string `json:"error,omitempty"`
}
// ExecuteCommand runs an arbitrary RouterOS API command on a connected device.
// The command string is the full path (e.g., "/ip/address/print").
// Args are optional RouterOS API arguments (e.g., "=.proplist=.id,address").
func ExecuteCommand(client *routeros.Client, command string, args []string) CommandResponse {
cmdParts := make([]string, 0, 1+len(args))
cmdParts = append(cmdParts, command)
cmdParts = append(cmdParts, args...)
reply, err := client.Run(cmdParts...)
if err != nil {
// RouterOS 7.x returns !empty for empty results (e.g., no firewall rules).
// go-routeros/v3 doesn't recognize this word and returns UnknownReplyError.
// Treat !empty as a successful empty response.
var unkErr *routeros.UnknownReplyError
if errors.As(err, &unkErr) && strings.TrimPrefix(unkErr.Sentence.Word, "!") == "empty" {
return CommandResponse{Success: true, Data: []map[string]string{}}
}
return CommandResponse{Success: false, Data: nil, Error: err.Error()}
}
data := make([]map[string]string, 0, len(reply.Re))
for _, re := range reply.Re {
data = append(data, re.Map)
}
return CommandResponse{Success: true, Data: data}
}

View File

@@ -0,0 +1,61 @@
package device
import (
"crypto/aes"
"crypto/cipher"
"encoding/json"
"fmt"
)
// credentialsJSON is the JSON structure stored in encrypted device credentials.
// Must match the Python backend's encryption format.
type credentialsJSON struct {
Username string `json:"username"`
Password string `json:"password"`
}
// DecryptCredentials decrypts AES-256-GCM encrypted credentials and returns the
// username and password stored within.
//
// The ciphertext format MUST match what Python's cryptography.hazmat.primitives.ciphers.aead.AESGCM
// produces when called as: nonce + AESGCM.encrypt(nonce, plaintext, None)
//
// Layout on disk:
// - bytes [0:12] — 12-byte random nonce (GCM standard)
// - bytes [12:] — ciphertext + 16-byte GCM authentication tag (appended by library)
//
// Go's cipher.AEAD.Open expects the GCM tag appended to the ciphertext, which is exactly
// how Python's cryptography library stores it, so the two are directly compatible.
func DecryptCredentials(ciphertext []byte, key []byte) (username, password string, err error) {
if len(key) != 32 {
return "", "", fmt.Errorf("encryption key must be 32 bytes, got %d", len(key))
}
if len(ciphertext) < 12+16 {
return "", "", fmt.Errorf("ciphertext too short: need at least 28 bytes (12 nonce + 16 tag), got %d", len(ciphertext))
}
block, err := aes.NewCipher(key)
if err != nil {
return "", "", fmt.Errorf("creating AES cipher: %w", err)
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return "", "", fmt.Errorf("creating GCM cipher: %w", err)
}
nonce := ciphertext[:12]
encryptedData := ciphertext[12:]
plaintext, err := gcm.Open(nil, nonce, encryptedData, nil)
if err != nil {
return "", "", fmt.Errorf("decrypting credentials (wrong key or tampered data): %w", err)
}
var creds credentialsJSON
if err := json.Unmarshal(plaintext, &creds); err != nil {
return "", "", fmt.Errorf("unmarshalling decrypted credentials JSON: %w", err)
}
return creds.Username, creds.Password, nil
}

View File

@@ -0,0 +1,91 @@
package device
import (
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// encrypt is a test helper that encrypts using the same format as Python's AESGCM.
// This verifies Go-side decryption is compatible with Python-side encryption.
func encrypt(t *testing.T, plaintext []byte, key []byte) []byte {
t.Helper()
block, err := aes.NewCipher(key)
require.NoError(t, err)
gcm, err := cipher.NewGCM(block)
require.NoError(t, err)
nonce := make([]byte, 12)
_, err = rand.Read(nonce)
require.NoError(t, err)
// gcm.Seal appends ciphertext+tag after nonce
return gcm.Seal(nonce, nonce, plaintext, nil)
}
func TestDecryptCredentials_RoundTrip(t *testing.T) {
key := make([]byte, 32)
_, err := rand.Read(key)
require.NoError(t, err)
creds := credentialsJSON{Username: "admin", Password: "secret123"}
plaintext, err := json.Marshal(creds)
require.NoError(t, err)
ciphertext := encrypt(t, plaintext, key)
username, password, err := DecryptCredentials(ciphertext, key)
require.NoError(t, err)
assert.Equal(t, "admin", username)
assert.Equal(t, "secret123", password)
}
func TestDecryptCredentials_WrongKey(t *testing.T) {
key1 := make([]byte, 32)
key2 := make([]byte, 32)
_, _ = rand.Read(key1)
_, _ = rand.Read(key2)
creds := credentialsJSON{Username: "admin", Password: "secret"}
plaintext, _ := json.Marshal(creds)
ciphertext := encrypt(t, plaintext, key1)
_, _, err := DecryptCredentials(ciphertext, key2)
assert.Error(t, err)
assert.Contains(t, err.Error(), "wrong key or tampered")
}
func TestDecryptCredentials_ShortCiphertext(t *testing.T) {
key := make([]byte, 32)
_, _ = rand.Read(key)
_, _, err := DecryptCredentials([]byte("short"), key)
assert.Error(t, err)
assert.Contains(t, err.Error(), "too short")
}
func TestDecryptCredentials_WrongKeyLength(t *testing.T) {
_, _, err := DecryptCredentials(make([]byte, 50), make([]byte, 16))
assert.Error(t, err)
assert.Contains(t, err.Error(), "32 bytes")
}
func TestDecryptCredentials_TamperedCiphertext(t *testing.T) {
key := make([]byte, 32)
_, _ = rand.Read(key)
creds := credentialsJSON{Username: "admin", Password: "secret"}
plaintext, _ := json.Marshal(creds)
ciphertext := encrypt(t, plaintext, key)
// Flip a byte in the encrypted portion (after 12-byte nonce)
tampered := make([]byte, len(ciphertext))
copy(tampered, ciphertext)
tampered[15] ^= 0xFF
_, _, err := DecryptCredentials(tampered, key)
assert.Error(t, err)
}

View File

@@ -0,0 +1,99 @@
package device
import (
"log/slog"
routeros "github.com/go-routeros/routeros/v3"
)
// FirmwareInfo holds firmware update status collected from a RouterOS device.
type FirmwareInfo struct {
InstalledVersion string `json:"installed_version"`
LatestVersion string `json:"latest_version,omitempty"`
Channel string `json:"channel,omitempty"`
Status string `json:"status"` // "New version is available", "System is already up to date", "check-failed"
Architecture string `json:"architecture"` // CPU architecture (e.g., "arm", "arm64", "mipsbe")
}
// CheckFirmwareUpdate queries a RouterOS device for firmware update status.
//
// It performs two API calls:
// 1. /system/resource/print — to get the architecture and installed version.
// 2. /system/package/update/check-for-updates + /system/package/update/print
// — to get the latest available version from MikroTik's servers.
//
// If the device cannot reach MikroTik's servers (no internet), the function
// returns what it knows (installed version, architecture) with status "check-failed".
// This is non-fatal — the device may simply not have internet access.
func CheckFirmwareUpdate(c *routeros.Client) (FirmwareInfo, error) {
// 1. Get architecture and installed version from /system/resource/print.
resReply, err := c.Run("/system/resource/print")
if err != nil {
return FirmwareInfo{}, err
}
arch := ""
installedVer := ""
if len(resReply.Re) > 0 {
arch = resReply.Re[0].Map["architecture-name"]
installedVer = resReply.Re[0].Map["version"]
}
// 2. Trigger check-for-updates (makes outbound HTTP from device to MikroTik servers).
_, err = c.Run("/system/package/update/check-for-updates")
if err != nil {
slog.Debug("firmware update check failed (device may lack internet)",
"error", err,
"architecture", arch,
)
// Non-fatal: return what we know.
return FirmwareInfo{
InstalledVersion: installedVer,
Architecture: arch,
Status: "check-failed",
}, nil
}
// 3. Read results from /system/package/update/print.
reply, err := c.Run("/system/package/update/print")
if err != nil {
return FirmwareInfo{
InstalledVersion: installedVer,
Architecture: arch,
Status: "check-failed",
}, nil
}
if len(reply.Re) == 0 {
return FirmwareInfo{
InstalledVersion: installedVer,
Architecture: arch,
Status: "check-failed",
}, nil
}
m := reply.Re[0].Map
info := FirmwareInfo{
InstalledVersion: m["installed-version"],
LatestVersion: m["latest-version"],
Channel: m["channel"],
Status: m["status"],
Architecture: arch,
}
// Use the resource-detected values as fallback.
if info.InstalledVersion == "" {
info.InstalledVersion = installedVer
}
slog.Debug("firmware update check complete",
"installed", info.InstalledVersion,
"latest", info.LatestVersion,
"channel", info.Channel,
"status", info.Status,
"architecture", info.Architecture,
)
return info, nil
}

View File

@@ -0,0 +1,110 @@
package device
import (
"fmt"
"log/slog"
routeros "github.com/go-routeros/routeros/v3"
)
// HealthMetrics holds system resource metrics collected from a RouterOS device.
// String fields match the raw RouterOS API values so the subscriber can parse
// and validate them before inserting into TimescaleDB.
type HealthMetrics struct {
CPULoad string `json:"cpu_load"`
FreeMemory string `json:"free_memory"`
TotalMemory string `json:"total_memory"`
FreeDisk string `json:"free_disk"`
TotalDisk string `json:"total_disk"`
Temperature string `json:"temperature"` // empty string if device has no sensor
}
// CollectHealth gathers system health metrics for a RouterOS device.
//
// It combines data already present in DeviceInfo (CPU, memory) with additional
// disk stats from /system/resource/print and temperature from /system/health/print.
//
// Temperature handling:
// - RouterOS v7: /system/health/print returns rows with name/value columns;
// looks for "cpu-temperature" then "board-temperature" as a fallback.
// - RouterOS v6: /system/health/print returns a flat map; looks for
// "cpu-temperature" key directly.
// - If the command fails or no temperature key is found, Temperature is set to "".
func CollectHealth(client *routeros.Client, info DeviceInfo) (HealthMetrics, error) {
health := HealthMetrics{
CPULoad: info.CPULoad,
FreeMemory: info.FreeMemory,
TotalMemory: info.TotalMemory,
}
// Collect disk stats (not included in the default /system/resource/print proplist
// used by DetectVersion, so we query explicitly here).
diskReply, err := client.Run(
"/system/resource/print",
"=.proplist=free-hdd-space,total-hdd-space",
)
if err != nil {
slog.Warn("could not collect disk stats", "error", err)
} else if len(diskReply.Re) > 0 {
m := diskReply.Re[0].Map
health.FreeDisk = m["free-hdd-space"]
health.TotalDisk = m["total-hdd-space"]
}
// Collect temperature from /system/health/print.
// This command may not exist on all devices, so errors are non-fatal.
health.Temperature = collectTemperature(client, info.MajorVersion)
return health, nil
}
// collectTemperature queries /system/health/print and extracts the temperature
// reading. Returns an empty string if the device has no temperature sensor or
// the command is not supported.
func collectTemperature(client *routeros.Client, majorVersion int) string {
reply, err := client.Run("/system/health/print")
if err != nil {
slog.Debug("temperature collection not available", "error", err)
return ""
}
if len(reply.Re) == 0 {
return ""
}
// RouterOS v7 returns rows with "name" and "value" columns.
// RouterOS v6 returns a flat map in a single sentence.
if majorVersion >= 7 {
// v7: iterate rows looking for known temperature keys.
var fallback string
for _, sentence := range reply.Re {
m := sentence.Map
name := m["name"]
value := m["value"]
if name == "cpu-temperature" {
return value
}
if name == "board-temperature" {
fallback = value
}
}
return fallback
}
// v6 (or unknown version): flat map — look for cpu-temperature key directly.
m := reply.Re[0].Map
if temp, ok := m["cpu-temperature"]; ok {
return temp
}
if temp, ok := m["board-temperature"]; ok {
return temp
}
return ""
}
// collectHealthError returns an error for CollectHealth callers when the
// primary resource query fails completely.
func collectHealthError(err error) error {
return fmt.Errorf("collecting health metrics: %w", err)
}

View File

@@ -0,0 +1,61 @@
// Package device provides RouterOS metric collectors for the poller.
package device
import (
"fmt"
"log/slog"
"strconv"
routeros "github.com/go-routeros/routeros/v3"
)
// InterfaceStats holds the traffic counters for a single RouterOS interface.
type InterfaceStats struct {
Name string `json:"name"`
RxBytes int64 `json:"rx_bytes"`
TxBytes int64 `json:"tx_bytes"`
Running bool `json:"running"`
Type string `json:"type"`
}
// CollectInterfaces queries the RouterOS device for per-interface traffic
// counters via /interface/print.
//
// Returns a slice of InterfaceStats. On error, returns an empty slice and the
// error — the caller decides whether to skip the device or log a warning.
func CollectInterfaces(client *routeros.Client) ([]InterfaceStats, error) {
reply, err := client.Run(
"/interface/print",
"=.proplist=name,rx-byte,tx-byte,running,type",
)
if err != nil {
return nil, fmt.Errorf("running /interface/print: %w", err)
}
stats := make([]InterfaceStats, 0, len(reply.Re))
for _, sentence := range reply.Re {
m := sentence.Map
rxBytes, err := strconv.ParseInt(m["rx-byte"], 10, 64)
if err != nil {
slog.Warn("could not parse rx-byte for interface", "interface", m["name"], "value", m["rx-byte"])
rxBytes = 0
}
txBytes, err := strconv.ParseInt(m["tx-byte"], 10, 64)
if err != nil {
slog.Warn("could not parse tx-byte for interface", "interface", m["name"], "value", m["tx-byte"])
txBytes = 0
}
stats = append(stats, InterfaceStats{
Name: m["name"],
RxBytes: rxBytes,
TxBytes: txBytes,
Running: m["running"] == "true",
Type: m["type"],
})
}
return stats, nil
}

View File

@@ -0,0 +1,53 @@
// Package device provides SFTP file upload helpers for RouterOS devices.
//
// RouterOS has a built-in SSH/SFTP server (port 22) that accepts the same
// credentials as the API. Since the RouterOS binary API cannot upload files,
// SFTP is used to push certificate PEM files before importing them.
package device
import (
"fmt"
"time"
"github.com/pkg/sftp"
"golang.org/x/crypto/ssh"
)
// NewSSHClient creates an SSH connection to a RouterOS device.
// Uses password authentication (same credentials as API access).
func NewSSHClient(ip string, port int, username, password string, timeout time.Duration) (*ssh.Client, error) {
config := &ssh.ClientConfig{
User: username,
Auth: []ssh.AuthMethod{
ssh.Password(password),
},
HostKeyCallback: ssh.InsecureIgnoreHostKey(), //nolint:gosec // RouterOS self-signed SSH
Timeout: timeout,
}
addr := fmt.Sprintf("%s:%d", ip, port)
client, err := ssh.Dial("tcp", addr, config)
if err != nil {
return nil, fmt.Errorf("SSH dial to %s: %w", addr, err)
}
return client, nil
}
// UploadFile uploads data to a file on the RouterOS device via SFTP.
func UploadFile(sshClient *ssh.Client, remotePath string, data []byte) error {
client, err := sftp.NewClient(sshClient)
if err != nil {
return fmt.Errorf("creating SFTP client: %w", err)
}
defer client.Close()
f, err := client.Create(remotePath)
if err != nil {
return fmt.Errorf("creating remote file %s: %w", remotePath, err)
}
defer f.Close()
if _, err := f.Write(data); err != nil {
return fmt.Errorf("writing to %s: %w", remotePath, err)
}
return nil
}

View File

@@ -0,0 +1,86 @@
package device
import (
"fmt"
"log/slog"
routeros "github.com/go-routeros/routeros/v3"
)
// DeviceInfo holds metadata collected from /system/resource/print and
// /system/routerboard/print.
type DeviceInfo struct {
Version string
MajorVersion int
BoardName string
Architecture string
Uptime string
CPULoad string
FreeMemory string
TotalMemory string
SerialNumber string // from /system/routerboard serial-number
FirmwareVersion string // from /system/routerboard current-firmware
LastConfigChange string // from /system/resource last-config-change (RouterOS 7.x)
}
// DetectVersion queries the RouterOS device for system resource information.
//
// Runs /system/resource/print and parses the response into DeviceInfo.
// The major version is extracted from the first character of the version string
// (e.g. "6.49.10" -> 6, "7.12" -> 7).
func DetectVersion(c *routeros.Client) (DeviceInfo, error) {
reply, err := c.Run("/system/resource/print")
if err != nil {
return DeviceInfo{}, fmt.Errorf("running /system/resource/print: %w", err)
}
if len(reply.Re) == 0 {
return DeviceInfo{}, fmt.Errorf("/system/resource/print returned no sentences")
}
m := reply.Re[0].Map
info := DeviceInfo{
Version: m["version"],
BoardName: m["board-name"],
Architecture: m["architecture-name"],
Uptime: m["uptime"],
CPULoad: m["cpu-load"],
FreeMemory: m["free-memory"],
TotalMemory: m["total-memory"],
LastConfigChange: m["last-config-change"],
}
// Extract major version from first character of version string.
// Valid RouterOS versions start with '6' or '7'.
if len(info.Version) > 0 {
firstChar := info.Version[0]
if firstChar >= '0' && firstChar <= '9' {
info.MajorVersion = int(firstChar - '0')
} else {
slog.Warn("unexpected RouterOS version format", "version", info.Version)
info.MajorVersion = 0
}
}
// Query routerboard info for serial number and firmware version.
// Non-fatal: CHR and x86 devices don't have a routerboard.
rbReply, rbErr := c.Run("/system/routerboard/print")
if rbErr == nil && len(rbReply.Re) > 0 {
rb := rbReply.Re[0].Map
info.SerialNumber = rb["serial-number"]
info.FirmwareVersion = rb["current-firmware"]
} else if rbErr != nil {
slog.Debug("routerboard query failed (normal for CHR/x86)", "error", rbErr)
}
slog.Debug("detected RouterOS version",
"version", info.Version,
"major_version", info.MajorVersion,
"board_name", info.BoardName,
"serial", info.SerialNumber,
"firmware", info.FirmwareVersion,
)
return info, nil
}

View File

@@ -0,0 +1,145 @@
package device
import (
"log/slog"
"strconv"
routeros "github.com/go-routeros/routeros/v3"
)
// WirelessStats holds aggregated wireless metrics for a single wireless interface.
// Metrics are aggregated across all registered clients on that interface.
type WirelessStats struct {
Interface string `json:"interface"`
ClientCount int `json:"client_count"`
AvgSignal int `json:"avg_signal"` // dBm (negative), e.g. -67
CCQ int `json:"ccq"` // 0100 percentage; 0 if not available (v7)
Frequency int `json:"frequency"` // MHz
}
// CollectWireless queries the RouterOS device for wireless registration-table
// entries and aggregates them per interface.
//
// Version routing:
// - majorVersion >= 7: tries /interface/wifi/registration-table/print first;
// falls back to /interface/wireless/registration-table/print if that fails.
// - majorVersion < 7 (including 0 for unknown): uses the classic wireless path.
//
// Returns an empty slice (not an error) when the device has no wireless interfaces.
func CollectWireless(client *routeros.Client, majorVersion int) ([]WirelessStats, error) {
var registrations []map[string]string
var useV7WiFi bool
if majorVersion >= 7 {
// Try the v7 WiFi API first.
regReply, err := client.Run("/interface/wifi/registration-table/print")
if err == nil {
useV7WiFi = true
for _, s := range regReply.Re {
registrations = append(registrations, s.Map)
}
} else {
slog.Debug("v7 wifi registration-table not available, falling back to wireless", "error", err)
// Fall back to classic wireless path.
regReply, err = client.Run("/interface/wireless/registration-table/print")
if err != nil {
slog.Debug("device has no wireless interfaces", "error", err)
return nil, nil
}
for _, s := range regReply.Re {
registrations = append(registrations, s.Map)
}
}
} else {
regReply, err := client.Run("/interface/wireless/registration-table/print")
if err != nil {
slog.Debug("device has no wireless interfaces", "error", err)
return nil, nil
}
for _, s := range regReply.Re {
registrations = append(registrations, s.Map)
}
}
if len(registrations) == 0 {
return nil, nil
}
// Collect frequency per interface so we can include it in the stats.
frequencies := collectWirelessFrequencies(client, majorVersion, useV7WiFi)
// Aggregate registration-table rows per interface.
type ifaceAgg struct {
count int
signal int
ccq int
}
agg := make(map[string]*ifaceAgg)
for _, r := range registrations {
iface := r["interface"]
if iface == "" {
continue
}
if _, ok := agg[iface]; !ok {
agg[iface] = &ifaceAgg{}
}
a := agg[iface]
a.count++
if sig, err := strconv.Atoi(r["signal-strength"]); err == nil {
a.signal += sig
}
if ccq, err := strconv.Atoi(r["tx-ccq"]); err == nil {
a.ccq += ccq
}
}
result := make([]WirelessStats, 0, len(agg))
for iface, a := range agg {
avgSignal := 0
avgCCQ := 0
if a.count > 0 {
avgSignal = a.signal / a.count
avgCCQ = a.ccq / a.count
}
result = append(result, WirelessStats{
Interface: iface,
ClientCount: a.count,
AvgSignal: avgSignal,
CCQ: avgCCQ,
Frequency: frequencies[iface],
})
}
return result, nil
}
// collectWirelessFrequencies returns a map of interface name → frequency (MHz).
// Uses the v7 WiFi API or the classic wireless API based on the useV7WiFi flag.
func collectWirelessFrequencies(client *routeros.Client, majorVersion int, useV7WiFi bool) map[string]int {
freqs := make(map[string]int)
var cmd string
if useV7WiFi {
cmd = "/interface/wifi/print"
} else {
cmd = "/interface/wireless/print"
}
reply, err := client.Run(cmd, "=.proplist=name,frequency")
if err != nil {
slog.Debug("could not collect wireless frequencies", "command", cmd, "error", err)
return freqs
}
for _, s := range reply.Re {
m := s.Map
name := m["name"]
if freq, err := strconv.Atoi(m["frequency"]); err == nil {
freqs[name] = freq
}
}
return freqs
}

View File

@@ -0,0 +1,60 @@
// Package observability provides Prometheus metrics and health endpoints for the poller.
package observability
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
// PollDuration tracks the duration of individual device poll cycles.
var PollDuration = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "mikrotik_poll_duration_seconds",
Help: "Duration of a single device poll cycle in seconds.",
Buckets: []float64{0.5, 1, 2, 5, 10, 30, 60},
})
// PollTotal counts the total number of poll cycles by status.
// Status labels: "success", "error", "skipped".
var PollTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "mikrotik_poll_total",
Help: "Total number of poll cycles.",
}, []string{"status"})
// DevicesActive tracks the number of devices currently being polled.
var DevicesActive = promauto.NewGauge(prometheus.GaugeOpts{
Name: "mikrotik_devices_active",
Help: "Number of devices currently being polled.",
})
// DeviceConnectionErrors counts total device connection failures.
var DeviceConnectionErrors = promauto.NewCounter(prometheus.CounterOpts{
Name: "mikrotik_device_connection_errors_total",
Help: "Total device connection failures.",
})
// NATSPublishTotal counts NATS publish operations by subject and status.
// Subject labels: "status", "metrics", "firmware".
// Status labels: "success", "error".
var NATSPublishTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "mikrotik_nats_publish_total",
Help: "Total NATS publish operations.",
}, []string{"subject", "status"})
// RedisLockTotal counts Redis lock operations by status.
// Status labels: "obtained", "not_obtained", "error".
var RedisLockTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "mikrotik_redis_lock_total",
Help: "Total Redis lock operations.",
}, []string{"status"})
// CircuitBreakerSkips counts polls skipped due to circuit breaker backoff.
var CircuitBreakerSkips = promauto.NewCounter(prometheus.CounterOpts{
Name: "mikrotik_circuit_breaker_skips_total",
Help: "Total polls skipped because the device is in circuit breaker backoff.",
})
// CircuitBreakerResets counts circuit breaker resets (device recovered after failures).
var CircuitBreakerResets = promauto.NewCounter(prometheus.CounterOpts{
Name: "mikrotik_circuit_breaker_resets_total",
Help: "Total circuit breaker resets when a device recovers.",
})

View File

@@ -0,0 +1,59 @@
package observability
import (
"context"
"log/slog"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// StartServer starts an HTTP server for Prometheus metrics and health checks.
//
// The server exposes:
// - GET /metrics — Prometheus metrics endpoint
// - GET /health — Liveness probe (returns 200 with {"status":"ok"})
//
// The server shuts down gracefully when ctx is cancelled. It runs in a
// goroutine and does not block the caller.
func StartServer(ctx context.Context, addr string) *http.Server {
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
mux.HandleFunc("/health", healthHandler)
srv := &http.Server{
Addr: addr,
Handler: mux,
ReadHeaderTimeout: 5 * time.Second,
}
// Start serving in a goroutine.
go func() {
slog.Info("observability server starting", "addr", addr)
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
slog.Error("observability server error", "error", err)
}
}()
// Graceful shutdown when context is cancelled.
go func() {
<-ctx.Done()
slog.Info("observability server shutting down")
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := srv.Shutdown(shutdownCtx); err != nil {
slog.Error("observability server shutdown error", "error", err)
}
slog.Info("observability server stopped")
}()
return srv
}
// healthHandler returns a simple liveness response.
func healthHandler(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`{"status":"ok"}`))
}

View File

@@ -0,0 +1,195 @@
package poller_test
import (
"context"
"encoding/json"
"testing"
"time"
"github.com/bsm/redislock"
"github.com/nats-io/nats.go"
"github.com/nats-io/nats.go/jetstream"
goredis "github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/mikrotik-portal/poller/internal/bus"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/testutil"
)
// TestPollPublishConsumeCycle_Integration verifies the complete pipeline:
//
// 1. DeviceStore reads devices from real PostgreSQL
// 2. Publisher sends status events through real NATS JetStream
// 3. A NATS consumer receives the events with correct data
// 4. Redis distributed lock can be obtained and released
//
// The actual PollDevice function requires a real RouterOS device, so we test
// the integration seams individually and verify they compose correctly.
func TestPollPublishConsumeCycle_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
ctx := context.Background()
tenantID := "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
dummyCreds := []byte("dummy-encrypted-credentials")
// --- Phase 1: PostgreSQL + DeviceStore ---
connStr, pgCleanup := testutil.SetupPostgres(t)
defer pgCleanup()
v7 := "7.16"
major7 := 7
deviceID := testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "10.0.0.1",
APIPort: 8728,
APISSLPort: 8729,
EncryptedCredentials: dummyCreds,
RouterOSVersion: &v7,
MajorVersion: &major7,
})
ds, err := store.NewDeviceStore(ctx, connStr)
require.NoError(t, err)
defer ds.Close()
devices, err := ds.FetchDevices(ctx)
require.NoError(t, err)
require.Len(t, devices, 1)
assert.Equal(t, deviceID, devices[0].ID)
assert.Equal(t, tenantID, devices[0].TenantID)
// --- Phase 2: NATS + Publisher ---
natsURL, natsCleanup := testutil.SetupNATS(t)
defer natsCleanup()
pub, err := bus.NewPublisher(natsURL)
require.NoError(t, err)
defer pub.Close()
// Create a consumer to verify events.
nc, err := nats.Connect(natsURL)
require.NoError(t, err)
defer nc.Close()
js, err := jetstream.New(nc)
require.NoError(t, err)
cons, err := js.CreateOrUpdateConsumer(ctx, "DEVICE_EVENTS", jetstream.ConsumerConfig{
FilterSubject: "device.status.>",
AckPolicy: jetstream.AckNonePolicy,
})
require.NoError(t, err)
// Simulate what PollDevice does after connecting to a device:
// publish a status event with data from the fetched device.
dev := devices[0]
statusEvent := bus.DeviceStatusEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
Status: "online",
LastSeen: time.Now().UTC().Format(time.RFC3339),
}
err = pub.PublishStatus(ctx, statusEvent)
require.NoError(t, err)
// Verify consumer receives the event.
msgBatch, err := cons.Fetch(1, jetstream.FetchMaxWait(5*time.Second))
require.NoError(t, err)
var received *jetstream.Msg
for msg := range msgBatch.Messages() {
received = &msg
break
}
require.NotNil(t, received, "consumer should receive the status event")
var got bus.DeviceStatusEvent
err = json.Unmarshal((*received).Data(), &got)
require.NoError(t, err)
assert.Equal(t, dev.ID, got.DeviceID)
assert.Equal(t, dev.TenantID, got.TenantID)
assert.Equal(t, "online", got.Status)
// --- Phase 3: Redis distributed lock ---
redisAddr, redisCleanup := testutil.SetupRedis(t)
defer redisCleanup()
rdb := goredis.NewClient(&goredis.Options{Addr: redisAddr})
defer rdb.Close()
locker := redislock.New(rdb)
lockKey := "poll:device:" + dev.ID
lock, err := locker.Obtain(ctx, lockKey, 10*time.Second, nil)
require.NoError(t, err, "should obtain Redis distributed lock")
// A second attempt should fail (lock held).
_, err = locker.Obtain(ctx, lockKey, 10*time.Second, nil)
assert.ErrorIs(t, err, redislock.ErrNotObtained, "second lock attempt should fail")
// Release and re-obtain.
err = lock.Release(ctx)
require.NoError(t, err, "should release lock")
lock2, err := locker.Obtain(ctx, lockKey, 10*time.Second, nil)
require.NoError(t, err, "should re-obtain lock after release")
_ = lock2.Release(ctx)
}
// TestSchedulerReconcile_WithRealDB_Integration verifies that the Scheduler's
// reconciliation loop correctly starts and stops device polling goroutines
// when backed by a real PostgreSQL database.
//
// We test this by running the Scheduler for a brief period and verifying it
// fetches devices and starts goroutines. Since PollDevice requires real
// RouterOS hardware, the goroutines will fail on the poll cycle (no device to
// connect to), but the scheduler's reconciliation logic is the integration
// point we are testing here.
func TestSchedulerReconcile_WithRealDB_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
ctx := context.Background()
tenantID := "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
dummyCreds := []byte("dummy-encrypted-credentials")
connStr, pgCleanup := testutil.SetupPostgres(t)
defer pgCleanup()
// Insert 2 devices.
id1 := testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "10.0.0.1",
APIPort: 8728,
APISSLPort: 8729,
EncryptedCredentials: dummyCreds,
})
id2 := testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "10.0.0.2",
APIPort: 8728,
APISSLPort: 8729,
EncryptedCredentials: dummyCreds,
})
ds, err := store.NewDeviceStore(ctx, connStr)
require.NoError(t, err)
defer ds.Close()
// Verify DeviceStore returns both devices (integration seam check).
devices, err := ds.FetchDevices(ctx)
require.NoError(t, err)
require.Len(t, devices, 2)
returnedIDs := make(map[string]bool)
for _, d := range devices {
returnedIDs[d.ID] = true
}
assert.True(t, returnedIDs[id1], "device 1 should be fetched from real DB")
assert.True(t, returnedIDs[id2], "device 2 should be fetched from real DB")
}

View File

@@ -0,0 +1,14 @@
package poller
import (
"context"
"github.com/mikrotik-portal/poller/internal/store"
)
// DeviceFetcher is the subset of store.DeviceStore that the Scheduler needs.
// Defined here (consumer-side) following Go interface best practices.
// The concrete *store.DeviceStore automatically satisfies this interface.
type DeviceFetcher interface {
FetchDevices(ctx context.Context) ([]store.Device, error)
}

View File

@@ -0,0 +1,264 @@
package poller
import (
"context"
"log/slog"
"sync"
"time"
"github.com/bsm/redislock"
"github.com/mikrotik-portal/poller/internal/bus"
"github.com/mikrotik-portal/poller/internal/observability"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/vault"
)
// deviceState tracks per-device circuit breaker and lifecycle state.
type deviceState struct {
cancel context.CancelFunc
consecutiveFailures int
backoffUntil time.Time
}
// Scheduler manages the lifecycle of per-device polling goroutines.
//
// It periodically re-queries the database to discover new devices (starting goroutines)
// and detect removed devices (stopping goroutines). Each device has exactly one
// polling goroutine running at a time.
//
// Circuit breaker: after consecutive connection failures, a device enters exponential
// backoff. The device loop skips poll ticks during backoff. On successful poll, the
// circuit breaker resets and the device resumes normal polling.
type Scheduler struct {
store DeviceFetcher
locker *redislock.Client
publisher *bus.Publisher
credentialCache *vault.CredentialCache
pollInterval time.Duration
connTimeout time.Duration
cmdTimeout time.Duration
refreshPeriod time.Duration
// Circuit breaker configuration.
maxFailures int
baseBackoff time.Duration
maxBackoff time.Duration
// activeDevices maps device ID to per-device state.
mu sync.Mutex
activeDevices map[string]*deviceState
}
// NewScheduler creates a Scheduler with the provided dependencies.
func NewScheduler(
store DeviceFetcher,
locker *redislock.Client,
publisher *bus.Publisher,
credentialCache *vault.CredentialCache,
pollInterval time.Duration,
connTimeout time.Duration,
cmdTimeout time.Duration,
refreshPeriod time.Duration,
maxFailures int,
baseBackoff time.Duration,
maxBackoff time.Duration,
) *Scheduler {
return &Scheduler{
store: store,
locker: locker,
publisher: publisher,
credentialCache: credentialCache,
pollInterval: pollInterval,
connTimeout: connTimeout,
cmdTimeout: cmdTimeout,
refreshPeriod: refreshPeriod,
maxFailures: maxFailures,
baseBackoff: baseBackoff,
maxBackoff: maxBackoff,
activeDevices: make(map[string]*deviceState),
}
}
// Run is the main scheduler loop. It:
// 1. Fetches devices from the database.
// 2. Starts goroutines for newly-discovered devices.
// 3. Stops goroutines for devices no longer in the database.
// 4. Sleeps for refreshPeriod, then repeats.
// 5. Cancels all goroutines when ctx is cancelled (graceful shutdown).
//
// Run blocks until ctx is cancelled, then waits for all goroutines to finish.
func (s *Scheduler) Run(ctx context.Context) error {
var wg sync.WaitGroup
defer func() {
// On shutdown, cancel all active device goroutines and wait for them.
s.mu.Lock()
for id, ds := range s.activeDevices {
slog.Info("stopping device goroutine", "device_id", id)
ds.cancel()
}
s.mu.Unlock()
wg.Wait()
slog.Info("scheduler shutdown complete")
}()
for {
if err := s.reconcileDevices(ctx, &wg); err != nil {
slog.Error("device reconciliation failed", "error", err)
// Continue — a transient DB error should not crash the scheduler.
}
select {
case <-ctx.Done():
slog.Info("scheduler context cancelled — shutting down")
return nil
case <-time.After(s.refreshPeriod):
// Next reconciliation cycle.
}
}
}
// reconcileDevices fetches the current device list from the DB and starts/stops
// goroutines as needed to keep the active set in sync.
func (s *Scheduler) reconcileDevices(ctx context.Context, wg *sync.WaitGroup) error {
devices, err := s.store.FetchDevices(ctx)
if err != nil {
return err
}
// Build a set of current device IDs for quick lookup.
currentIDs := make(map[string]struct{}, len(devices))
for _, d := range devices {
currentIDs[d.ID] = struct{}{}
}
s.mu.Lock()
defer s.mu.Unlock()
// Start goroutines for newly-discovered devices.
for _, dev := range devices {
if _, active := s.activeDevices[dev.ID]; !active {
devCopy := dev // capture loop variable
devCtx, cancel := context.WithCancel(ctx)
ds := &deviceState{cancel: cancel}
s.activeDevices[dev.ID] = ds
wg.Add(1)
go func() {
defer wg.Done()
s.runDeviceLoop(devCtx, devCopy, ds)
}()
slog.Info("started polling goroutine", "device_id", dev.ID, "ip", dev.IPAddress)
}
}
// Stop goroutines for devices that are no longer in the database.
for id, ds := range s.activeDevices {
if _, exists := currentIDs[id]; !exists {
slog.Info("stopping goroutine for removed device", "device_id", id)
ds.cancel()
delete(s.activeDevices, id)
}
}
// Update Prometheus gauge with current active device count.
observability.DevicesActive.Set(float64(len(s.activeDevices)))
slog.Debug("device reconciliation complete",
"total_devices", len(devices),
"active_goroutines", len(s.activeDevices),
)
return nil
}
// runDeviceLoop is the per-device polling loop. It ticks at pollInterval and
// calls PollDevice synchronously on each tick (not in a sub-goroutine, to avoid
// unbounded goroutine growth if polls are slow).
//
// Circuit breaker: when consecutive failures exceed maxFailures, the device enters
// exponential backoff. Poll ticks during backoff are skipped. On success, the
// circuit breaker resets.
func (s *Scheduler) runDeviceLoop(ctx context.Context, dev store.Device, ds *deviceState) {
// lockTTL gives the poll cycle time to complete: interval + connection timeout + 15s margin.
lockTTL := s.pollInterval + s.connTimeout + 15*time.Second
ticker := time.NewTicker(s.pollInterval)
defer ticker.Stop()
slog.Debug("device poll loop started", "device_id", dev.ID, "poll_interval", s.pollInterval)
for {
select {
case <-ctx.Done():
slog.Debug("device poll loop stopping", "device_id", dev.ID)
return
case <-ticker.C:
// Circuit breaker: skip poll if device is in backoff period.
if time.Now().Before(ds.backoffUntil) {
slog.Debug("circuit breaker: skipping poll (in backoff)",
"device_id", dev.ID,
"backoff_until", ds.backoffUntil.Format(time.RFC3339),
"consecutive_failures", ds.consecutiveFailures,
)
observability.CircuitBreakerSkips.Inc()
continue
}
err := PollDevice(ctx, dev, s.locker, s.publisher, s.credentialCache, s.connTimeout, s.cmdTimeout, lockTTL)
if err != nil {
ds.consecutiveFailures++
if ds.consecutiveFailures >= s.maxFailures {
backoff := calculateBackoff(ds.consecutiveFailures, s.baseBackoff, s.maxBackoff)
ds.backoffUntil = time.Now().Add(backoff)
slog.Warn("circuit breaker: device entering backoff",
"device_id", dev.ID,
"ip", dev.IPAddress,
"consecutive_failures", ds.consecutiveFailures,
"backoff_duration", backoff,
"backoff_until", ds.backoffUntil.Format(time.RFC3339),
)
}
// Only log as error if it's not a device-offline situation.
if err != ErrDeviceOffline {
slog.Error("poll cycle failed",
"device_id", dev.ID,
"ip", dev.IPAddress,
"error", err,
)
}
} else {
// Success — reset circuit breaker if it was tripped.
if ds.consecutiveFailures > 0 {
slog.Info("circuit breaker: device recovered",
"device_id", dev.ID,
"ip", dev.IPAddress,
"previous_failures", ds.consecutiveFailures,
)
observability.CircuitBreakerResets.Inc()
ds.consecutiveFailures = 0
ds.backoffUntil = time.Time{}
}
}
}
}
}
// calculateBackoff computes the exponential backoff duration for the given
// number of consecutive failures: base * 2^(failures-1), capped at maxBackoff.
func calculateBackoff(failures int, baseBackoff, maxBackoff time.Duration) time.Duration {
if failures <= 1 {
return baseBackoff
}
backoff := baseBackoff * time.Duration(1<<uint(failures-1))
if backoff > maxBackoff || backoff < 0 { // negative check guards against overflow
return maxBackoff
}
return backoff
}

View File

@@ -0,0 +1,184 @@
package poller
import (
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/vault"
)
// mockDeviceFetcher implements DeviceFetcher for testing.
type mockDeviceFetcher struct {
devices []store.Device
err error
}
func (m *mockDeviceFetcher) FetchDevices(ctx context.Context) ([]store.Device, error) {
return m.devices, m.err
}
// newTestScheduler creates a Scheduler with a mock DeviceFetcher for testing.
// Uses nil for locker and publisher since reconcileDevices doesn't use them.
func newTestScheduler(fetcher DeviceFetcher) *Scheduler {
// Create a minimal credential cache for testing (no transit, no legacy key, no db).
testCache := vault.NewCredentialCache(64, 5*time.Minute, nil, make([]byte, 32), nil)
return &Scheduler{
store: fetcher,
locker: nil,
publisher: nil,
credentialCache: testCache,
pollInterval: 24 * time.Hour, // Never fires during test
connTimeout: time.Second,
cmdTimeout: time.Second,
refreshPeriod: time.Second,
maxFailures: 5,
baseBackoff: 30 * time.Second,
maxBackoff: 15 * time.Minute,
activeDevices: make(map[string]*deviceState),
}
}
func TestReconcileDevices_StartsNewDevices(t *testing.T) {
devices := []store.Device{
{ID: "dev-1", TenantID: "t-1", IPAddress: "192.168.1.1", APISSLPort: 8729},
{ID: "dev-2", TenantID: "t-1", IPAddress: "192.168.1.2", APISSLPort: 8729},
}
fetcher := &mockDeviceFetcher{devices: devices}
sched := newTestScheduler(fetcher)
var wg sync.WaitGroup
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := sched.reconcileDevices(ctx, &wg)
require.NoError(t, err)
sched.mu.Lock()
assert.Len(t, sched.activeDevices, 2)
_, hasDev1 := sched.activeDevices["dev-1"]
_, hasDev2 := sched.activeDevices["dev-2"]
assert.True(t, hasDev1)
assert.True(t, hasDev2)
sched.mu.Unlock()
// Clean up: cancel context and wait for goroutines
cancel()
wg.Wait()
}
func TestReconcileDevices_StopsRemovedDevices(t *testing.T) {
// Start with one active device
sched := newTestScheduler(&mockDeviceFetcher{devices: []store.Device{}})
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Manually add a device to activeDevices to simulate it was previously running
devCtx, devCancel := context.WithCancel(ctx)
sched.activeDevices["dev-removed"] = &deviceState{cancel: devCancel}
// Track if cancel was called
cancelled := false
go func() {
<-devCtx.Done()
cancelled = true
}()
var wg sync.WaitGroup
// FetchDevices returns empty -> dev-removed should be stopped
err := sched.reconcileDevices(ctx, &wg)
require.NoError(t, err)
sched.mu.Lock()
assert.Len(t, sched.activeDevices, 0)
sched.mu.Unlock()
// Give the goroutine a moment to register the cancel
time.Sleep(10 * time.Millisecond)
assert.True(t, cancelled)
cancel()
wg.Wait()
}
func TestReconcileDevices_PreservesExistingDevices(t *testing.T) {
devices := []store.Device{
{ID: "dev-existing", TenantID: "t-1", IPAddress: "192.168.1.1", APISSLPort: 8729},
{ID: "dev-new", TenantID: "t-1", IPAddress: "192.168.1.2", APISSLPort: 8729},
}
fetcher := &mockDeviceFetcher{devices: devices}
sched := newTestScheduler(fetcher)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Pre-populate dev-existing as if it was already running
existingCtx, existingCancel := context.WithCancel(ctx)
_ = existingCtx
sched.activeDevices["dev-existing"] = &deviceState{cancel: existingCancel}
var wg sync.WaitGroup
err := sched.reconcileDevices(ctx, &wg)
require.NoError(t, err)
sched.mu.Lock()
assert.Len(t, sched.activeDevices, 2)
// dev-existing should still have its ORIGINAL cancel function (not replaced)
assert.Equal(t, fmt.Sprintf("%p", existingCancel), fmt.Sprintf("%p", sched.activeDevices["dev-existing"].cancel))
_, hasNew := sched.activeDevices["dev-new"]
assert.True(t, hasNew)
sched.mu.Unlock()
cancel()
wg.Wait()
}
func TestReconcileDevices_HandlesEmptyDatabase(t *testing.T) {
fetcher := &mockDeviceFetcher{devices: []store.Device{}}
sched := newTestScheduler(fetcher)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var wg sync.WaitGroup
err := sched.reconcileDevices(ctx, &wg)
require.NoError(t, err)
sched.mu.Lock()
assert.Len(t, sched.activeDevices, 0)
sched.mu.Unlock()
cancel()
wg.Wait()
}
func TestReconcileDevices_FetchError(t *testing.T) {
fetcher := &mockDeviceFetcher{err: fmt.Errorf("connection refused")}
sched := newTestScheduler(fetcher)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Pre-populate a device
devCancel := func() {}
sched.activeDevices["dev-1"] = &deviceState{cancel: devCancel}
var wg sync.WaitGroup
err := sched.reconcileDevices(ctx, &wg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "connection refused")
// Active devices should be unchanged (no side effects on error)
sched.mu.Lock()
assert.Len(t, sched.activeDevices, 1)
sched.mu.Unlock()
cancel()
wg.Wait()
}

View File

@@ -0,0 +1,409 @@
// Package poller implements the polling logic for individual devices.
package poller
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"time"
"github.com/bsm/redislock"
"github.com/redis/go-redis/v9"
"github.com/mikrotik-portal/poller/internal/bus"
"github.com/mikrotik-portal/poller/internal/device"
"github.com/mikrotik-portal/poller/internal/observability"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/vault"
)
// ErrDeviceOffline is returned by PollDevice when a device cannot be reached.
// The scheduler uses this to drive the circuit breaker — consecutive offline
// events trigger exponential backoff without logging as a hard error.
var ErrDeviceOffline = errors.New("device offline")
// redisClientForFirmware is a module-level Redis client reference used
// for firmware check rate limiting. Set by the scheduler before starting polls.
var redisClientForFirmware *redis.Client
// SetRedisClient sets the Redis client used for firmware rate limiting.
func SetRedisClient(c *redis.Client) {
redisClientForFirmware = c
}
// withTimeout runs fn in a goroutine and returns its result, or a timeout error
// if ctx expires first. This wraps RouterOS API calls that don't accept a context
// parameter, enforcing per-command timeouts to prevent indefinite blocking.
func withTimeout[T any](ctx context.Context, fn func() (T, error)) (T, error) {
type result struct {
val T
err error
}
ch := make(chan result, 1)
go func() {
v, e := fn()
ch <- result{v, e}
}()
select {
case r := <-ch:
return r.val, r.err
case <-ctx.Done():
var zero T
return zero, fmt.Errorf("command timed out: %w", ctx.Err())
}
}
// PollDevice performs a single poll cycle for one device:
// 1. Acquire distributed Redis lock to prevent duplicate polls across pods.
// 2. Decrypt device credentials.
// 3. Attempt TLS connection to the RouterOS binary API.
// 4. On failure: publish offline event, return ErrDeviceOffline.
// 5. On success: run /system/resource/print, publish online event with metadata.
// 6. Collect interface, health, and wireless metrics; publish as separate events.
// 7. Release lock and close connection via deferred calls.
//
// lockTTL should be longer than the expected poll duration to prevent the lock
// from expiring while the poll is still in progress.
//
// cmdTimeout is the per-command timeout for individual RouterOS API calls.
func PollDevice(
ctx context.Context,
dev store.Device,
locker *redislock.Client,
pub *bus.Publisher,
credentialCache *vault.CredentialCache,
connTimeout time.Duration,
cmdTimeout time.Duration,
lockTTL time.Duration,
) error {
startTime := time.Now()
pollStatus := "success"
lockKey := fmt.Sprintf("poll:device:%s", dev.ID)
// Acquire per-device lock. If another pod already holds the lock, skip this cycle.
lock, err := locker.Obtain(ctx, lockKey, lockTTL, nil)
if err == redislock.ErrNotObtained {
slog.Debug("skipping poll — lock held by another pod", "device_id", dev.ID)
observability.PollTotal.WithLabelValues("skipped").Inc()
observability.RedisLockTotal.WithLabelValues("not_obtained").Inc()
return nil
}
if err != nil {
observability.RedisLockTotal.WithLabelValues("error").Inc()
return fmt.Errorf("obtaining Redis lock for device %s: %w", dev.ID, err)
}
observability.RedisLockTotal.WithLabelValues("obtained").Inc()
defer func() {
if releaseErr := lock.Release(ctx); releaseErr != nil && releaseErr != redislock.ErrLockNotHeld {
slog.Warn("failed to release Redis lock", "device_id", dev.ID, "error", releaseErr)
}
}()
// Deferred metric recording — captures poll duration and status at exit.
defer func() {
observability.PollDuration.Observe(time.Since(startTime).Seconds())
observability.PollTotal.WithLabelValues(pollStatus).Inc()
}()
// Decrypt device credentials via credential cache (Transit preferred, legacy fallback).
username, password, err := credentialCache.GetCredentials(
dev.ID,
dev.TenantID,
dev.EncryptedCredentialsTransit,
dev.EncryptedCredentials,
)
if err != nil {
pollStatus = "error"
return fmt.Errorf("decrypting credentials for device %s: %w", dev.ID, err)
}
// Prepare CA cert PEM for TLS verification (only populated for portal_ca devices).
var caCertPEM []byte
if dev.CACertPEM != nil {
caCertPEM = []byte(*dev.CACertPEM)
}
// Attempt connection. On failure, publish offline event and return ErrDeviceOffline.
client, err := device.ConnectDevice(dev.IPAddress, dev.APISSLPort, dev.APIPort, username, password, connTimeout, caCertPEM, dev.TLSMode)
if err != nil {
slog.Info("device offline", "device_id", dev.ID, "ip", dev.IPAddress, "error", err)
observability.DeviceConnectionErrors.Inc()
offlineEvent := bus.DeviceStatusEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
Status: "offline",
LastSeen: time.Now().UTC().Format(time.RFC3339),
}
if pubErr := pub.PublishStatus(ctx, offlineEvent); pubErr != nil {
slog.Warn("failed to publish offline event", "device_id", dev.ID, "error", pubErr)
observability.NATSPublishTotal.WithLabelValues("status", "error").Inc()
} else {
observability.NATSPublishTotal.WithLabelValues("status", "success").Inc()
}
// Check for recent config push — trigger rollback or alert if device
// went offline shortly after a push (Redis key set by push_tracker).
if redisClientForFirmware != nil {
pushKey := fmt.Sprintf("push:recent:%s", dev.ID)
pushData, pushErr := redisClientForFirmware.Get(ctx, pushKey).Result()
if pushErr == nil && pushData != "" {
var pushInfo struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
PushType string `json:"push_type"`
PushOperationID string `json:"push_operation_id"`
PrePushCommitSHA string `json:"pre_push_commit_sha"`
}
if unmarshalErr := json.Unmarshal([]byte(pushData), &pushInfo); unmarshalErr == nil {
slog.Warn("device went offline after recent config push",
"device_id", dev.ID,
"push_type", pushInfo.PushType,
)
if pushInfo.PushType == "template" || pushInfo.PushType == "restore" {
// Auto-rollback for template/restore pushes
if rollbackErr := pub.PublishPushRollback(ctx, bus.PushRollbackEvent{
DeviceID: pushInfo.DeviceID,
TenantID: pushInfo.TenantID,
PushOperationID: pushInfo.PushOperationID,
PrePushCommitSHA: pushInfo.PrePushCommitSHA,
}); rollbackErr != nil {
slog.Error("failed to publish push rollback event", "device_id", dev.ID, "error", rollbackErr)
}
} else {
// Alert only for editor pushes (one-click rollback in UI)
if alertErr := pub.PublishPushAlert(ctx, bus.PushAlertEvent{
DeviceID: pushInfo.DeviceID,
TenantID: pushInfo.TenantID,
PushType: pushInfo.PushType,
}); alertErr != nil {
slog.Error("failed to publish push alert event", "device_id", dev.ID, "error", alertErr)
}
}
}
}
}
return ErrDeviceOffline
}
defer device.CloseDevice(client)
// Query device resources (version, uptime, CPU, memory) with per-command timeout.
cmdCtx, cmdCancel := context.WithTimeout(ctx, cmdTimeout)
info, err := withTimeout[device.DeviceInfo](cmdCtx, func() (device.DeviceInfo, error) {
return device.DetectVersion(client)
})
cmdCancel()
if err != nil {
slog.Warn("failed to detect version", "device_id", dev.ID, "error", err)
// Still publish an online event even if version detection fails.
}
onlineEvent := bus.DeviceStatusEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
Status: "online",
RouterOSVersion: info.Version,
MajorVersion: info.MajorVersion,
BoardName: info.BoardName,
Architecture: info.Architecture,
Uptime: info.Uptime,
CPULoad: info.CPULoad,
FreeMemory: info.FreeMemory,
TotalMemory: info.TotalMemory,
SerialNumber: info.SerialNumber,
FirmwareVersion: info.FirmwareVersion,
LastSeen: time.Now().UTC().Format(time.RFC3339),
}
if pubErr := pub.PublishStatus(ctx, onlineEvent); pubErr != nil {
observability.NATSPublishTotal.WithLabelValues("status", "error").Inc()
pollStatus = "error"
return fmt.Errorf("publishing online event for device %s: %w", dev.ID, pubErr)
}
observability.NATSPublishTotal.WithLabelValues("status", "success").Inc()
// =========================================================================
// CONFIG CHANGE DETECTION
// Compare last-config-change from /system/resource/print against the
// previous value stored in Redis. If it changed (and we have a previous
// value — skip first poll), publish a ConfigChangedEvent so the backend
// can trigger an event-driven backup.
// =========================================================================
if info.LastConfigChange != "" && redisClientForFirmware != nil {
redisKey := fmt.Sprintf("device:%s:last_config_change", dev.ID)
prev, redisErr := redisClientForFirmware.Get(ctx, redisKey).Result()
if redisErr != nil && redisErr != redis.Nil {
slog.Warn("Redis GET last_config_change error", "device_id", dev.ID, "error", redisErr)
}
if prev != info.LastConfigChange {
if prev != "" { // Skip first poll — no previous value to compare
slog.Info("config change detected on device",
"device_id", dev.ID,
"old_timestamp", prev,
"new_timestamp", info.LastConfigChange,
)
if pubErr := pub.PublishConfigChanged(ctx, bus.ConfigChangedEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
OldTimestamp: prev,
NewTimestamp: info.LastConfigChange,
}); pubErr != nil {
slog.Warn("failed to publish config.changed", "device_id", dev.ID, "error", pubErr)
observability.NATSPublishTotal.WithLabelValues("config_changed", "error").Inc()
} else {
observability.NATSPublishTotal.WithLabelValues("config_changed", "success").Inc()
}
}
// Update Redis with current value (24h TTL)
redisClientForFirmware.Set(ctx, redisKey, info.LastConfigChange, 24*time.Hour)
}
}
slog.Info("device polled successfully",
"device_id", dev.ID,
"ip", dev.IPAddress,
"status", "online",
"version", info.Version,
)
// =========================================================================
// METRICS COLLECTION
// Errors are non-fatal — a metric collection failure should not fail the
// poll cycle. Publish failures are also non-fatal for the same reason.
// Each collection call is wrapped with a per-command timeout.
// =========================================================================
collectedAt := time.Now().UTC().Format(time.RFC3339)
// Interface traffic counters.
cmdCtx, cmdCancel = context.WithTimeout(ctx, cmdTimeout)
interfaces, err := withTimeout[[]device.InterfaceStats](cmdCtx, func() ([]device.InterfaceStats, error) {
return device.CollectInterfaces(client)
})
cmdCancel()
if err != nil {
slog.Warn("failed to collect interface metrics", "device_id", dev.ID, "error", err)
}
if pubErr := pub.PublishMetrics(ctx, bus.DeviceMetricsEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
CollectedAt: collectedAt,
Type: "interfaces",
Interfaces: interfaces,
}); pubErr != nil {
slog.Warn("failed to publish interface metrics", "device_id", dev.ID, "error", pubErr)
observability.NATSPublishTotal.WithLabelValues("metrics", "error").Inc()
} else {
observability.NATSPublishTotal.WithLabelValues("metrics", "success").Inc()
}
// System health (CPU, memory, disk, temperature).
cmdCtx, cmdCancel = context.WithTimeout(ctx, cmdTimeout)
health, err := withTimeout[device.HealthMetrics](cmdCtx, func() (device.HealthMetrics, error) {
return device.CollectHealth(client, info)
})
cmdCancel()
if err != nil {
slog.Warn("failed to collect health metrics", "device_id", dev.ID, "error", err)
}
if pubErr := pub.PublishMetrics(ctx, bus.DeviceMetricsEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
CollectedAt: collectedAt,
Type: "health",
Health: &health,
}); pubErr != nil {
slog.Warn("failed to publish health metrics", "device_id", dev.ID, "error", pubErr)
observability.NATSPublishTotal.WithLabelValues("metrics", "error").Inc()
} else {
observability.NATSPublishTotal.WithLabelValues("metrics", "success").Inc()
}
// Wireless client stats (only publish if the device has wireless interfaces).
cmdCtx, cmdCancel = context.WithTimeout(ctx, cmdTimeout)
wireless, err := withTimeout[[]device.WirelessStats](cmdCtx, func() ([]device.WirelessStats, error) {
return device.CollectWireless(client, info.MajorVersion)
})
cmdCancel()
if err != nil {
slog.Warn("failed to collect wireless metrics", "device_id", dev.ID, "error", err)
}
if len(wireless) > 0 {
if pubErr := pub.PublishMetrics(ctx, bus.DeviceMetricsEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
CollectedAt: collectedAt,
Type: "wireless",
Wireless: wireless,
}); pubErr != nil {
slog.Warn("failed to publish wireless metrics", "device_id", dev.ID, "error", pubErr)
observability.NATSPublishTotal.WithLabelValues("metrics", "error").Inc()
} else {
observability.NATSPublishTotal.WithLabelValues("metrics", "success").Inc()
}
}
// =========================================================================
// FIRMWARE CHECK (rate-limited to once per day per device)
// Checks if a firmware update is available and publishes the result.
// Uses a Redis key with 24h TTL to ensure we don't hammer devices every 60s.
// =========================================================================
if redisClientForFirmware != nil {
fwCacheKey := fmt.Sprintf("firmware:checked:%s", dev.ID)
exists, _ := redisClientForFirmware.Exists(ctx, fwCacheKey).Result()
if exists == 0 {
cmdCtx, cmdCancel = context.WithTimeout(ctx, cmdTimeout)
fwInfo, fwErr := withTimeout[device.FirmwareInfo](cmdCtx, func() (device.FirmwareInfo, error) {
return device.CheckFirmwareUpdate(client)
})
cmdCancel()
if fwErr != nil {
slog.Warn("firmware check failed", "device_id", dev.ID, "error", fwErr)
// Set cooldown on failure too, but shorter (6h) so we retry sooner than success (24h).
// Prevents hammering devices that can't reach MikroTik update servers every poll cycle.
fwFailKey := fmt.Sprintf("firmware:check-failed:%s", dev.ID)
redisClientForFirmware.Set(ctx, fwFailKey, "1", 6*time.Hour)
// Also set the main checked key to prevent the success path from re-checking.
redisClientForFirmware.Set(ctx, fwCacheKey, "1", 6*time.Hour)
} else {
fwEvent := bus.DeviceFirmwareEvent{
DeviceID: dev.ID,
TenantID: dev.TenantID,
InstalledVersion: fwInfo.InstalledVersion,
LatestVersion: fwInfo.LatestVersion,
Channel: fwInfo.Channel,
Status: fwInfo.Status,
Architecture: fwInfo.Architecture,
}
if pubErr := pub.PublishFirmware(ctx, fwEvent); pubErr != nil {
slog.Warn("failed to publish firmware event", "device_id", dev.ID, "error", pubErr)
observability.NATSPublishTotal.WithLabelValues("firmware", "error").Inc()
} else {
observability.NATSPublishTotal.WithLabelValues("firmware", "success").Inc()
// Set Redis key with 24h TTL — firmware checked for today.
// If the check succeeded but status is "check-failed",
// use shorter cooldown since the device couldn't reach update servers.
if fwInfo.Status == "check-failed" {
redisClientForFirmware.Set(ctx, fwCacheKey, "1", 6*time.Hour)
} else {
redisClientForFirmware.Set(ctx, fwCacheKey, "1", 24*time.Hour)
}
slog.Info("firmware check published",
"device_id", dev.ID,
"installed", fwInfo.InstalledVersion,
"latest", fwInfo.LatestVersion,
"channel", fwInfo.Channel,
)
}
}
}
}
return nil
}

View File

@@ -0,0 +1,161 @@
// Package store provides database access for the poller service.
package store
import (
"context"
"fmt"
"github.com/jackc/pgx/v5/pgxpool"
)
// Device represents a device row fetched from the devices table.
// The poller reads ALL devices across all tenants (no RLS applied to poller_user).
type Device struct {
ID string
TenantID string
IPAddress string
APIPort int
APISSLPort int
EncryptedCredentials []byte // legacy AES-256-GCM BYTEA
EncryptedCredentialsTransit *string // OpenBao Transit ciphertext (TEXT, nullable)
RouterOSVersion *string
MajorVersion *int
TLSMode string // "insecure" or "portal_ca"
CACertPEM *string // PEM-encoded CA cert (only populated when TLSMode = "portal_ca")
}
// DeviceStore manages PostgreSQL connections for device data access.
type DeviceStore struct {
pool *pgxpool.Pool
}
// NewDeviceStore creates a pgx connection pool and returns a DeviceStore.
//
// The databaseURL should use the poller_user role which has SELECT-only access
// to the devices table and is not subject to RLS policies.
func NewDeviceStore(ctx context.Context, databaseURL string) (*DeviceStore, error) {
pool, err := pgxpool.New(ctx, databaseURL)
if err != nil {
return nil, fmt.Errorf("creating pgx pool: %w", err)
}
// Verify connectivity immediately.
if err := pool.Ping(ctx); err != nil {
pool.Close()
return nil, fmt.Errorf("pinging database: %w", err)
}
return &DeviceStore{pool: pool}, nil
}
// FetchDevices returns all devices from the database.
//
// The query reads across all tenants intentionally — the poller_user role has
// SELECT-only access without RLS so it can poll all devices.
func (s *DeviceStore) FetchDevices(ctx context.Context) ([]Device, error) {
const query = `
SELECT
d.id::text,
d.tenant_id::text,
d.ip_address,
d.api_port,
d.api_ssl_port,
d.encrypted_credentials,
d.encrypted_credentials_transit,
d.routeros_version,
d.routeros_major_version,
d.tls_mode,
ca.cert_pem
FROM devices d
LEFT JOIN certificate_authorities ca
ON d.tenant_id = ca.tenant_id
AND d.tls_mode = 'portal_ca'
WHERE d.encrypted_credentials IS NOT NULL
OR d.encrypted_credentials_transit IS NOT NULL
`
rows, err := s.pool.Query(ctx, query)
if err != nil {
return nil, fmt.Errorf("querying devices: %w", err)
}
defer rows.Close()
var devices []Device
for rows.Next() {
var d Device
if err := rows.Scan(
&d.ID,
&d.TenantID,
&d.IPAddress,
&d.APIPort,
&d.APISSLPort,
&d.EncryptedCredentials,
&d.EncryptedCredentialsTransit,
&d.RouterOSVersion,
&d.MajorVersion,
&d.TLSMode,
&d.CACertPEM,
); err != nil {
return nil, fmt.Errorf("scanning device row: %w", err)
}
devices = append(devices, d)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterating device rows: %w", err)
}
return devices, nil
}
// GetDevice returns a single device by ID for interactive command execution.
func (s *DeviceStore) GetDevice(ctx context.Context, deviceID string) (Device, error) {
const query = `
SELECT
d.id::text,
d.tenant_id::text,
d.ip_address,
d.api_port,
d.api_ssl_port,
d.encrypted_credentials,
d.encrypted_credentials_transit,
d.routeros_version,
d.routeros_major_version,
d.tls_mode,
ca.cert_pem
FROM devices d
LEFT JOIN certificate_authorities ca
ON d.tenant_id = ca.tenant_id
AND d.tls_mode = 'portal_ca'
WHERE d.id = $1
`
var d Device
err := s.pool.QueryRow(ctx, query, deviceID).Scan(
&d.ID,
&d.TenantID,
&d.IPAddress,
&d.APIPort,
&d.APISSLPort,
&d.EncryptedCredentials,
&d.EncryptedCredentialsTransit,
&d.RouterOSVersion,
&d.MajorVersion,
&d.TLSMode,
&d.CACertPEM,
)
if err != nil {
return Device{}, fmt.Errorf("querying device %s: %w", deviceID, err)
}
return d, nil
}
// Pool returns the underlying pgxpool.Pool for shared use by other subsystems
// (e.g., credential cache key_access_log inserts).
func (s *DeviceStore) Pool() *pgxpool.Pool {
return s.pool
}
// Close closes the pgx connection pool.
func (s *DeviceStore) Close() {
s.pool.Close()
}

View File

@@ -0,0 +1,150 @@
package store_test
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/testutil"
)
func TestDeviceStore_FetchDevices_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
connStr, cleanup := testutil.SetupPostgres(t)
defer cleanup()
ctx := context.Background()
tenantID := "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
dummyCreds := []byte("dummy-encrypted-credentials")
v7 := "7.16"
major7 := 7
// Insert 3 devices WITH encrypted_credentials (should be returned).
id1 := testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "192.168.1.1",
APIPort: 8728,
APISSLPort: 8729,
EncryptedCredentials: dummyCreds,
RouterOSVersion: &v7,
MajorVersion: &major7,
})
id2 := testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "192.168.1.2",
APIPort: 8728,
APISSLPort: 8729,
EncryptedCredentials: dummyCreds,
})
id3 := testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "192.168.1.3",
APIPort: 8728,
APISSLPort: 8729,
EncryptedCredentials: dummyCreds,
})
// Insert 1 device WITHOUT encrypted_credentials (should be excluded).
_ = testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "192.168.1.99",
APIPort: 8728,
// EncryptedCredentials is nil -> excluded by FetchDevices WHERE clause
})
ds, err := store.NewDeviceStore(ctx, connStr)
require.NoError(t, err)
defer ds.Close()
devices, err := ds.FetchDevices(ctx)
require.NoError(t, err)
assert.Len(t, devices, 3, "should return only devices with encrypted_credentials")
// Collect returned IDs for verification.
returnedIDs := make(map[string]bool)
for _, d := range devices {
returnedIDs[d.ID] = true
}
assert.True(t, returnedIDs[id1], "device 1 should be returned")
assert.True(t, returnedIDs[id2], "device 2 should be returned")
assert.True(t, returnedIDs[id3], "device 3 should be returned")
// Verify fields on the device with version info.
for _, d := range devices {
if d.ID == id1 {
assert.Equal(t, tenantID, d.TenantID)
assert.Equal(t, "192.168.1.1", d.IPAddress)
assert.Equal(t, 8728, d.APIPort)
assert.Equal(t, 8729, d.APISSLPort)
assert.Equal(t, dummyCreds, d.EncryptedCredentials)
require.NotNil(t, d.RouterOSVersion)
assert.Equal(t, "7.16", *d.RouterOSVersion)
require.NotNil(t, d.MajorVersion)
assert.Equal(t, 7, *d.MajorVersion)
}
}
}
func TestDeviceStore_GetDevice_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
connStr, cleanup := testutil.SetupPostgres(t)
defer cleanup()
ctx := context.Background()
tenantID := "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
dummyCreds := []byte("dummy-encrypted-credentials")
id := testutil.InsertTestDevice(t, connStr, store.Device{
TenantID: tenantID,
IPAddress: "10.0.0.1",
APIPort: 8728,
APISSLPort: 8729,
EncryptedCredentials: dummyCreds,
})
ds, err := store.NewDeviceStore(ctx, connStr)
require.NoError(t, err)
defer ds.Close()
// Happy path: existing device.
d, err := ds.GetDevice(ctx, id)
require.NoError(t, err)
assert.Equal(t, id, d.ID)
assert.Equal(t, tenantID, d.TenantID)
assert.Equal(t, "10.0.0.1", d.IPAddress)
assert.Equal(t, dummyCreds, d.EncryptedCredentials)
// Sad path: nonexistent device.
_, err = ds.GetDevice(ctx, "00000000-0000-0000-0000-000000000000")
assert.Error(t, err)
}
func TestDeviceStore_FetchDevices_Empty_Integration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
connStr, cleanup := testutil.SetupPostgres(t)
defer cleanup()
ctx := context.Background()
ds, err := store.NewDeviceStore(ctx, connStr)
require.NoError(t, err)
defer ds.Close()
devices, err := ds.FetchDevices(ctx)
require.NoError(t, err)
// FetchDevices returns nil slice when no rows exist (append on nil);
// this is acceptable Go behavior. The important thing is no error.
assert.Empty(t, devices, "should return empty result for empty database")
}

View File

@@ -0,0 +1,241 @@
// Package testutil provides shared testcontainer helpers for integration tests.
//
// All helpers start real infrastructure containers (PostgreSQL, Redis, NATS) via
// testcontainers-go and return connection strings plus cleanup functions. Tests
// using these helpers require a running Docker daemon and are skipped automatically
// when `go test -short` is used.
package testutil
import (
"context"
"fmt"
"testing"
"time"
"github.com/jackc/pgx/v5"
"github.com/testcontainers/testcontainers-go"
tcnats "github.com/testcontainers/testcontainers-go/modules/nats"
"github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/modules/redis"
"github.com/testcontainers/testcontainers-go/wait"
"github.com/mikrotik-portal/poller/internal/store"
)
// devicesSchema is the minimal DDL needed for integration tests against the
// devices table. It mirrors the production schema but omits RLS policies and
// other tables the poller doesn't read.
const devicesSchema = `
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
CREATE TABLE IF NOT EXISTS devices (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
tenant_id UUID NOT NULL,
hostname VARCHAR(255) NOT NULL,
ip_address VARCHAR(45) NOT NULL,
api_port INTEGER NOT NULL DEFAULT 8728,
api_ssl_port INTEGER NOT NULL DEFAULT 8729,
model VARCHAR(255),
serial_number VARCHAR(255),
firmware_version VARCHAR(100),
routeros_version VARCHAR(100),
routeros_major_version INTEGER,
uptime_seconds INTEGER,
last_seen TIMESTAMPTZ,
encrypted_credentials BYTEA,
status VARCHAR(20) NOT NULL DEFAULT 'unknown',
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
`
// SetupPostgres starts a PostgreSQL container using the TimescaleDB image and
// applies the devices table schema. Returns the connection string and a cleanup
// function that terminates the container.
func SetupPostgres(t *testing.T) (connStr string, cleanup func()) {
t.Helper()
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
ctx := context.Background()
pgContainer, err := postgres.Run(ctx,
"postgres:17-alpine",
postgres.WithDatabase("mikrotik_test"),
postgres.WithUsername("postgres"),
postgres.WithPassword("test"),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(60*time.Second),
),
)
if err != nil {
t.Fatalf("starting PostgreSQL container: %v", err)
}
connStr, err = pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
_ = pgContainer.Terminate(ctx)
t.Fatalf("getting PostgreSQL connection string: %v", err)
}
// Apply schema using pgx directly.
conn, err := pgx.Connect(ctx, connStr)
if err != nil {
_ = pgContainer.Terminate(ctx)
t.Fatalf("connecting to PostgreSQL to apply schema: %v", err)
}
defer conn.Close(ctx)
if _, err := conn.Exec(ctx, devicesSchema); err != nil {
_ = pgContainer.Terminate(ctx)
t.Fatalf("applying devices schema: %v", err)
}
cleanup = func() {
if err := pgContainer.Terminate(ctx); err != nil {
t.Logf("warning: terminating PostgreSQL container: %v", err)
}
}
return connStr, cleanup
}
// SetupRedis starts a Redis container and returns the address (host:port) plus
// a cleanup function.
func SetupRedis(t *testing.T) (addr string, cleanup func()) {
t.Helper()
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
ctx := context.Background()
redisContainer, err := redis.Run(ctx,
"redis:7-alpine",
testcontainers.WithWaitStrategy(
wait.ForLog("Ready to accept connections").
WithStartupTimeout(30*time.Second),
),
)
if err != nil {
t.Fatalf("starting Redis container: %v", err)
}
host, err := redisContainer.Host(ctx)
if err != nil {
_ = redisContainer.Terminate(ctx)
t.Fatalf("getting Redis host: %v", err)
}
port, err := redisContainer.MappedPort(ctx, "6379")
if err != nil {
_ = redisContainer.Terminate(ctx)
t.Fatalf("getting Redis mapped port: %v", err)
}
addr = fmt.Sprintf("%s:%s", host, port.Port())
cleanup = func() {
if err := redisContainer.Terminate(ctx); err != nil {
t.Logf("warning: terminating Redis container: %v", err)
}
}
return addr, cleanup
}
// SetupNATS starts a NATS container with JetStream enabled and returns the NATS
// URL (nats://host:port) plus a cleanup function.
func SetupNATS(t *testing.T) (url string, cleanup func()) {
t.Helper()
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
ctx := context.Background()
natsContainer, err := tcnats.Run(ctx,
"nats:2-alpine",
testcontainers.WithCmd("--jetstream"),
testcontainers.WithWaitStrategy(
wait.ForLog("Server is ready").
WithStartupTimeout(30*time.Second),
),
)
if err != nil {
t.Fatalf("starting NATS container: %v", err)
}
host, err := natsContainer.Host(ctx)
if err != nil {
_ = natsContainer.Terminate(ctx)
t.Fatalf("getting NATS host: %v", err)
}
port, err := natsContainer.MappedPort(ctx, "4222")
if err != nil {
_ = natsContainer.Terminate(ctx)
t.Fatalf("getting NATS mapped port: %v", err)
}
url = fmt.Sprintf("nats://%s:%s", host, port.Port())
cleanup = func() {
if err := natsContainer.Terminate(ctx); err != nil {
t.Logf("warning: terminating NATS container: %v", err)
}
}
return url, cleanup
}
// InsertTestDevice inserts a device row into the database and returns the
// generated UUID. The caller provides a store.Device with fields to populate;
// fields left at zero values use column defaults.
func InsertTestDevice(t *testing.T, connStr string, dev store.Device) string {
t.Helper()
ctx := context.Background()
conn, err := pgx.Connect(ctx, connStr)
if err != nil {
t.Fatalf("connecting to PostgreSQL for InsertTestDevice: %v", err)
}
defer conn.Close(ctx)
var id string
err = conn.QueryRow(ctx,
`INSERT INTO devices (tenant_id, hostname, ip_address, api_port, api_ssl_port,
encrypted_credentials, routeros_version, routeros_major_version)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
RETURNING id::text`,
dev.TenantID,
coalesce(dev.IPAddress, "test-device"), // hostname defaults to ip if not set
dev.IPAddress,
coalesceInt(dev.APIPort, 8728),
coalesceInt(dev.APISSLPort, 8729),
dev.EncryptedCredentials,
dev.RouterOSVersion,
dev.MajorVersion,
).Scan(&id)
if err != nil {
t.Fatalf("inserting test device: %v", err)
}
return id
}
func coalesce(s, fallback string) string {
if s == "" {
return fallback
}
return s
}
func coalesceInt(v, fallback int) int {
if v == 0 {
return fallback
}
return v
}

View File

@@ -0,0 +1,173 @@
package vault
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"strings"
"time"
"github.com/google/uuid"
"github.com/hashicorp/golang-lru/v2/expirable"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/mikrotik-portal/poller/internal/device"
)
// CachedCreds holds decrypted device credentials.
type CachedCreds struct {
Username string
Password string
}
// Prometheus metrics for credential cache and OpenBao Transit observability.
var (
CacheHits = promauto.NewCounter(prometheus.CounterOpts{
Name: "poller_credential_cache_hits_total",
Help: "Number of credential cache hits (no OpenBao call)",
})
CacheMisses = promauto.NewCounter(prometheus.CounterOpts{
Name: "poller_credential_cache_misses_total",
Help: "Number of credential cache misses (OpenBao decrypt call)",
})
OpenBaoLatency = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "poller_openbao_decrypt_duration_seconds",
Help: "Latency of OpenBao Transit decrypt calls",
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0},
})
LegacyDecrypts = promauto.NewCounter(prometheus.CounterOpts{
Name: "poller_credential_legacy_decrypts_total",
Help: "Number of credentials decrypted using legacy AES key (not yet migrated)",
})
)
// CredentialCache provides cached credential decryption with dual-read support.
// It uses an LRU cache with TTL to avoid redundant OpenBao calls and falls back
// to legacy AES-256-GCM decryption for credentials not yet migrated to Transit.
type CredentialCache struct {
cache *expirable.LRU[string, *CachedCreds]
transit *TransitClient
legacy []byte // legacy AES-256-GCM key (nil if not available)
db *pgxpool.Pool // for key_access_log inserts (nil if not available)
}
// NewCredentialCache creates a bounded LRU cache with the given size and TTL.
// transit may be nil if OpenBao is not configured. legacyKey may be nil if not available.
// db may be nil if key access logging is not needed.
func NewCredentialCache(size int, ttl time.Duration, transit *TransitClient, legacyKey []byte, db *pgxpool.Pool) *CredentialCache {
cache := expirable.NewLRU[string, *CachedCreds](size, nil, ttl)
return &CredentialCache{
cache: cache,
transit: transit,
legacy: legacyKey,
db: db,
}
}
// GetCredentials returns decrypted credentials for a device, using the cache.
// transitCiphertext is the Transit-encrypted string (nullable), legacyCiphertext is the legacy BYTEA (nullable).
// Returns (username, password, error).
func (c *CredentialCache) GetCredentials(
deviceID, tenantID string,
transitCiphertext *string,
legacyCiphertext []byte,
) (string, string, error) {
// Check cache first
if cached, ok := c.cache.Get(deviceID); ok {
CacheHits.Inc()
return cached.Username, cached.Password, nil
}
CacheMisses.Inc()
var username, password string
// Prefer Transit ciphertext if available
if transitCiphertext != nil && *transitCiphertext != "" && strings.HasPrefix(*transitCiphertext, "vault:v") {
if c.transit == nil {
return "", "", fmt.Errorf("transit ciphertext present but OpenBao client not configured")
}
start := time.Now()
plaintext, err := c.transit.Decrypt(tenantID, *transitCiphertext)
OpenBaoLatency.Observe(time.Since(start).Seconds())
if err != nil {
return "", "", fmt.Errorf("transit decrypt for device %s: %w", deviceID, err)
}
var creds struct {
Username string `json:"username"`
Password string `json:"password"`
}
if err := json.Unmarshal(plaintext, &creds); err != nil {
return "", "", fmt.Errorf("unmarshal transit-decrypted credentials: %w", err)
}
username = creds.Username
password = creds.Password
// Fire-and-forget key access log INSERT for audit trail
if c.db != nil {
go c.logKeyAccess(deviceID, tenantID, "decrypt_credentials", "poller_poll")
}
} else if legacyCiphertext != nil && len(legacyCiphertext) > 0 {
// Fall back to legacy AES-256-GCM decryption
if c.legacy == nil {
return "", "", fmt.Errorf("legacy ciphertext present but encryption key not configured")
}
var err error
username, password, err = device.DecryptCredentials(legacyCiphertext, c.legacy)
if err != nil {
return "", "", fmt.Errorf("legacy decrypt for device %s: %w", deviceID, err)
}
LegacyDecrypts.Inc()
} else {
return "", "", fmt.Errorf("no credentials available for device %s", deviceID)
}
// Cache the result
c.cache.Add(deviceID, &CachedCreds{Username: username, Password: password})
slog.Debug("credential decrypted and cached",
"device_id", deviceID,
"source", func() string {
if transitCiphertext != nil && *transitCiphertext != "" {
return "transit"
}
return "legacy"
}(),
)
return username, password, nil
}
// Invalidate removes a device's cached credentials (e.g., after credential rotation).
func (c *CredentialCache) Invalidate(deviceID string) {
c.cache.Remove(deviceID)
}
// Len returns the number of cached entries.
func (c *CredentialCache) Len() int {
return c.cache.Len()
}
// logKeyAccess inserts an immutable audit record for a credential decryption event.
// Called as a fire-and-forget goroutine to avoid slowing down the poll cycle.
func (c *CredentialCache) logKeyAccess(deviceID, tenantID, action, justification string) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
correlationID := uuid.New().String()
_, err := c.db.Exec(ctx,
`INSERT INTO key_access_log (tenant_id, device_id, action, resource_type, justification, correlation_id)
VALUES ($1::uuid, $2::uuid, $3, 'device_credentials', $4, $5)`,
tenantID, deviceID, action, justification, correlationID,
)
if err != nil {
slog.Warn("failed to log key access", "error", err, "device_id", deviceID)
}
}

View File

@@ -0,0 +1,127 @@
// Package vault provides OpenBao Transit integration for credential encryption/decryption.
//
// The TransitClient communicates with the OpenBao Transit secrets engine via HTTP,
// enabling per-tenant encryption keys managed by OpenBao rather than a static
// application-level AES key.
package vault
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// TransitClient communicates with OpenBao Transit secrets engine via HTTP.
type TransitClient struct {
httpClient *http.Client
addr string
token string
}
// NewTransitClient creates a Transit client with sensible defaults.
func NewTransitClient(addr, token string) *TransitClient {
return &TransitClient{
httpClient: &http.Client{Timeout: 5 * time.Second},
addr: addr,
token: token,
}
}
// transitDecryptResponse is the JSON response from Transit decrypt endpoint.
type transitDecryptResponse struct {
Data struct {
Plaintext string `json:"plaintext"`
} `json:"data"`
Errors []string `json:"errors,omitempty"`
}
// Decrypt decrypts a Transit ciphertext (vault:v1:...) and returns plaintext bytes.
func (c *TransitClient) Decrypt(tenantID, ciphertext string) ([]byte, error) {
payload, err := json.Marshal(map[string]string{"ciphertext": ciphertext})
if err != nil {
return nil, fmt.Errorf("marshal decrypt request: %w", err)
}
url := fmt.Sprintf("%s/v1/transit/decrypt/tenant_%s", c.addr, tenantID)
req, err := http.NewRequest("POST", url, bytes.NewReader(payload))
if err != nil {
return nil, fmt.Errorf("create decrypt request: %w", err)
}
req.Header.Set("X-Vault-Token", c.token)
req.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("openbao transit decrypt: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read decrypt response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("openbao transit decrypt failed (status %d): %s", resp.StatusCode, string(body))
}
var result transitDecryptResponse
if err := json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("unmarshal decrypt response: %w", err)
}
plaintext, err := base64.StdEncoding.DecodeString(result.Data.Plaintext)
if err != nil {
return nil, fmt.Errorf("decode plaintext base64: %w", err)
}
return plaintext, nil
}
// Encrypt encrypts plaintext bytes via Transit engine. Returns ciphertext string.
func (c *TransitClient) Encrypt(tenantID string, plaintext []byte) (string, error) {
payload, err := json.Marshal(map[string]string{
"plaintext": base64.StdEncoding.EncodeToString(plaintext),
})
if err != nil {
return "", fmt.Errorf("marshal encrypt request: %w", err)
}
url := fmt.Sprintf("%s/v1/transit/encrypt/tenant_%s", c.addr, tenantID)
req, err := http.NewRequest("POST", url, bytes.NewReader(payload))
if err != nil {
return "", fmt.Errorf("create encrypt request: %w", err)
}
req.Header.Set("X-Vault-Token", c.token)
req.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("openbao transit encrypt: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("read encrypt response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("openbao transit encrypt failed (status %d): %s", resp.StatusCode, string(body))
}
var result struct {
Data struct {
Ciphertext string `json:"ciphertext"`
} `json:"data"`
}
if err := json.Unmarshal(body, &result); err != nil {
return "", fmt.Errorf("unmarshal encrypt response: %w", err)
}
return result.Data.Ciphertext, nil
}