feat: The Other Dude v9.0.1 — full-featured email system
ci: add GitHub Pages deployment workflow for docs site Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
322
poller/internal/bus/publisher.go
Normal file
322
poller/internal/bus/publisher.go
Normal file
@@ -0,0 +1,322 @@
|
||||
// Package bus provides NATS JetStream publishing for device events.
|
||||
package bus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"github.com/nats-io/nats.go"
|
||||
"github.com/nats-io/nats.go/jetstream"
|
||||
|
||||
"github.com/mikrotik-portal/poller/internal/device"
|
||||
)
|
||||
|
||||
// DeviceStatusEvent is the payload published to NATS JetStream when a device
|
||||
// is polled. Consumers subscribe to "device.status.>" to receive all events.
|
||||
type DeviceStatusEvent struct {
|
||||
DeviceID string `json:"device_id"`
|
||||
TenantID string `json:"tenant_id"`
|
||||
Status string `json:"status"` // "online" or "offline"
|
||||
RouterOSVersion string `json:"routeros_version,omitempty"`
|
||||
MajorVersion int `json:"major_version,omitempty"`
|
||||
BoardName string `json:"board_name,omitempty"`
|
||||
Architecture string `json:"architecture,omitempty"`
|
||||
Uptime string `json:"uptime,omitempty"`
|
||||
CPULoad string `json:"cpu_load,omitempty"`
|
||||
FreeMemory string `json:"free_memory,omitempty"`
|
||||
TotalMemory string `json:"total_memory,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
FirmwareVersion string `json:"firmware_version,omitempty"`
|
||||
LastSeen string `json:"last_seen"` // RFC3339
|
||||
}
|
||||
|
||||
// DeviceMetricsEvent is the payload published to NATS JetStream for metric data
|
||||
// collected from a RouterOS device on each poll cycle.
|
||||
//
|
||||
// Events are published to "device.metrics.{type}.{device_id}" where type is one
|
||||
// of "health", "interfaces", or "wireless". Only the field matching the type will
|
||||
// be populated; the others will be omitted from the JSON payload.
|
||||
type DeviceMetricsEvent struct {
|
||||
DeviceID string `json:"device_id"`
|
||||
TenantID string `json:"tenant_id"`
|
||||
CollectedAt string `json:"collected_at"` // RFC3339
|
||||
Type string `json:"type"` // "health", "interfaces", "wireless"
|
||||
Health *device.HealthMetrics `json:"health,omitempty"`
|
||||
Interfaces []device.InterfaceStats `json:"interfaces,omitempty"`
|
||||
Wireless []device.WirelessStats `json:"wireless,omitempty"`
|
||||
}
|
||||
|
||||
// ConfigChangedEvent is published when a device's config changes out-of-band.
|
||||
type ConfigChangedEvent struct {
|
||||
DeviceID string `json:"device_id"`
|
||||
TenantID string `json:"tenant_id"`
|
||||
OldTimestamp string `json:"old_timestamp"`
|
||||
NewTimestamp string `json:"new_timestamp"`
|
||||
}
|
||||
|
||||
// PushRollbackEvent triggers automatic rollback for template pushes.
|
||||
type PushRollbackEvent struct {
|
||||
DeviceID string `json:"device_id"`
|
||||
TenantID string `json:"tenant_id"`
|
||||
PushOperationID string `json:"push_operation_id"`
|
||||
PrePushCommitSHA string `json:"pre_push_commit_sha"`
|
||||
}
|
||||
|
||||
// PushAlertEvent triggers an alert for editor pushes (one-click rollback).
|
||||
type PushAlertEvent struct {
|
||||
DeviceID string `json:"device_id"`
|
||||
TenantID string `json:"tenant_id"`
|
||||
PushType string `json:"push_type"`
|
||||
}
|
||||
|
||||
// Publisher wraps a NATS JetStream connection for publishing device events.
|
||||
type Publisher struct {
|
||||
nc *nats.Conn
|
||||
js jetstream.JetStream
|
||||
}
|
||||
|
||||
// NewPublisher connects to NATS and ensures the DEVICE_EVENTS stream exists.
|
||||
//
|
||||
// The DEVICE_EVENTS stream covers device.status.>, device.metrics.>, and
|
||||
// device.firmware.> subjects. These are explicit to avoid capturing
|
||||
// device.cmd.* (used by CmdResponder for request-reply). This allows
|
||||
// the Python API to subscribe to either family via durable consumers.
|
||||
//
|
||||
// The connection uses unlimited reconnects with a 2-second wait between attempts
|
||||
// so the poller survives transient NATS restarts gracefully.
|
||||
func NewPublisher(natsURL string) (*Publisher, error) {
|
||||
nc, err := nats.Connect(natsURL,
|
||||
nats.MaxReconnects(-1),
|
||||
nats.ReconnectWait(2*time.Second),
|
||||
nats.DisconnectErrHandler(func(nc *nats.Conn, err error) {
|
||||
slog.Warn("NATS disconnected", "error", err)
|
||||
}),
|
||||
nats.ReconnectHandler(func(nc *nats.Conn) {
|
||||
slog.Info("NATS reconnected", "url", nc.ConnectedUrl())
|
||||
}),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("connecting to NATS at %s: %w", natsURL, err)
|
||||
}
|
||||
|
||||
js, err := jetstream.New(nc)
|
||||
if err != nil {
|
||||
nc.Close()
|
||||
return nil, fmt.Errorf("creating JetStream context: %w", err)
|
||||
}
|
||||
|
||||
// Ensure the DEVICE_EVENTS stream exists. CreateOrUpdateStream is idempotent.
|
||||
// Subjects are explicit (not "device.>") to avoid capturing device.cmd.*
|
||||
// which is used by CmdResponder for core NATS request-reply.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, err = js.CreateOrUpdateStream(ctx, jetstream.StreamConfig{
|
||||
Name: "DEVICE_EVENTS",
|
||||
Subjects: []string{
|
||||
"device.status.>",
|
||||
"device.metrics.>",
|
||||
"device.firmware.>",
|
||||
"device.credential_changed.>",
|
||||
"config.changed.>",
|
||||
"config.push.rollback.>",
|
||||
"config.push.alert.>",
|
||||
},
|
||||
MaxAge: 24 * time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
nc.Close()
|
||||
return nil, fmt.Errorf("ensuring DEVICE_EVENTS stream: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("NATS JetStream DEVICE_EVENTS stream ready")
|
||||
|
||||
return &Publisher{nc: nc, js: js}, nil
|
||||
}
|
||||
|
||||
// PublishStatus publishes a device status event to NATS JetStream.
|
||||
//
|
||||
// Events are published to "device.status.{DeviceID}" so consumers can subscribe
|
||||
// to individual devices or all events via "device.status.>".
|
||||
func (p *Publisher) PublishStatus(ctx context.Context, event DeviceStatusEvent) error {
|
||||
data, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshalling event: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("device.status.%s", event.DeviceID)
|
||||
|
||||
_, err = p.js.Publish(ctx, subject, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("publishing to %s: %w", subject, err)
|
||||
}
|
||||
|
||||
slog.Debug("published device status event",
|
||||
"device_id", event.DeviceID,
|
||||
"status", event.Status,
|
||||
"subject", subject,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PublishMetrics publishes a device metrics event to NATS JetStream.
|
||||
//
|
||||
// Events are published to "device.metrics.{type}.{device_id}" so consumers can
|
||||
// subscribe to all metrics via "device.metrics.>" or filter by type.
|
||||
func (p *Publisher) PublishMetrics(ctx context.Context, event DeviceMetricsEvent) error {
|
||||
data, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshalling metrics event: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("device.metrics.%s.%s", event.Type, event.DeviceID)
|
||||
|
||||
_, err = p.js.Publish(ctx, subject, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("publishing to %s: %w", subject, err)
|
||||
}
|
||||
|
||||
slog.Debug("published device metrics event",
|
||||
"device_id", event.DeviceID,
|
||||
"type", event.Type,
|
||||
"subject", subject,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeviceFirmwareEvent is the payload published to NATS JetStream when the poller
|
||||
// checks a device's firmware update status (rate-limited to once per day per device).
|
||||
type DeviceFirmwareEvent struct {
|
||||
DeviceID string `json:"device_id"`
|
||||
TenantID string `json:"tenant_id"`
|
||||
InstalledVersion string `json:"installed_version"`
|
||||
LatestVersion string `json:"latest_version,omitempty"`
|
||||
Channel string `json:"channel,omitempty"`
|
||||
Status string `json:"status"`
|
||||
Architecture string `json:"architecture"`
|
||||
}
|
||||
|
||||
// PublishFirmware publishes a device firmware status event to NATS JetStream.
|
||||
//
|
||||
// Events are published to "device.firmware.{DeviceID}" so the Python firmware
|
||||
// subscriber can process them and update the firmware_versions table.
|
||||
func (p *Publisher) PublishFirmware(ctx context.Context, event DeviceFirmwareEvent) error {
|
||||
data, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshalling firmware event: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("device.firmware.%s", event.DeviceID)
|
||||
|
||||
_, err = p.js.Publish(ctx, subject, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("publishing to %s: %w", subject, err)
|
||||
}
|
||||
|
||||
slog.Debug("published device firmware event",
|
||||
"device_id", event.DeviceID,
|
||||
"installed", event.InstalledVersion,
|
||||
"latest", event.LatestVersion,
|
||||
"subject", subject,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PublishConfigChanged publishes a config change event for a device.
|
||||
//
|
||||
// Events are published to "config.changed.{TenantID}.{DeviceID}" so the Python
|
||||
// backend can trigger event-driven backups when out-of-band changes are detected.
|
||||
func (p *Publisher) PublishConfigChanged(ctx context.Context, event ConfigChangedEvent) error {
|
||||
data, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal config changed event: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("config.changed.%s.%s", event.TenantID, event.DeviceID)
|
||||
|
||||
_, err = p.js.Publish(ctx, subject, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("publish config changed: %w", err)
|
||||
}
|
||||
|
||||
slog.Debug("published config changed event",
|
||||
"device_id", event.DeviceID,
|
||||
"tenant_id", event.TenantID,
|
||||
"old_timestamp", event.OldTimestamp,
|
||||
"new_timestamp", event.NewTimestamp,
|
||||
"subject", subject,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PublishPushRollback publishes a push rollback event when a device goes offline
|
||||
// after a template or restore config push, triggering automatic rollback.
|
||||
func (p *Publisher) PublishPushRollback(ctx context.Context, event PushRollbackEvent) error {
|
||||
data, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal push rollback event: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("config.push.rollback.%s.%s", event.TenantID, event.DeviceID)
|
||||
|
||||
_, err = p.js.Publish(ctx, subject, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("publishing to %s: %w", subject, err)
|
||||
}
|
||||
|
||||
slog.Info("published push rollback event",
|
||||
"device_id", event.DeviceID,
|
||||
"tenant_id", event.TenantID,
|
||||
"push_operation_id", event.PushOperationID,
|
||||
"subject", subject,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PublishPushAlert publishes a push alert event when a device goes offline
|
||||
// after an editor config push, enabling one-click rollback in the UI.
|
||||
func (p *Publisher) PublishPushAlert(ctx context.Context, event PushAlertEvent) error {
|
||||
data, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal push alert event: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("config.push.alert.%s.%s", event.TenantID, event.DeviceID)
|
||||
|
||||
_, err = p.js.Publish(ctx, subject, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("publishing to %s: %w", subject, err)
|
||||
}
|
||||
|
||||
slog.Info("published push alert event",
|
||||
"device_id", event.DeviceID,
|
||||
"tenant_id", event.TenantID,
|
||||
"push_type", event.PushType,
|
||||
"subject", subject,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Conn returns the raw NATS connection for use by other components
|
||||
// (e.g., CmdResponder for request-reply subscriptions).
|
||||
func (p *Publisher) Conn() *nats.Conn {
|
||||
return p.nc
|
||||
}
|
||||
|
||||
// Close drains the NATS connection, flushing pending messages before closing.
|
||||
func (p *Publisher) Close() {
|
||||
if p.nc != nil {
|
||||
if err := p.nc.Drain(); err != nil {
|
||||
slog.Warn("error draining NATS connection", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user