feat(02-01): add config backup env vars, NATS event, device SSH fields, migration, metrics

- Config: CONFIG_BACKUP_INTERVAL (21600s), CONFIG_BACKUP_MAX_CONCURRENT (10), CONFIG_BACKUP_COMMAND_TIMEOUT (60s)
- NATS: ConfigSnapshotEvent type, PublishConfigSnapshot method, config.snapshot.> stream subject
- Device: SSHPort/SSHHostKeyFingerprint fields, UpdateSSHHostKey method, updated queries/scans
- Migration 028: ssh_port, ssh_host_key_fingerprint, timestamp columns with poller_user grants
- Metrics: ConfigBackupTotal (counter), ConfigBackupDuration (histogram), ConfigBackupActive (gauge)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-12 20:48:12 -05:00
parent f1abb75cab
commit 4ae39d2cb3
5 changed files with 162 additions and 4 deletions

View File

@@ -65,6 +65,19 @@ type PushRollbackEvent struct {
PrePushCommitSHA string `json:"pre_push_commit_sha"`
}
// ConfigSnapshotEvent is the payload published to NATS JetStream when a config
// backup is successfully collected from a device. The backend subscribes to
// "config.snapshot.>" to store snapshots and compute diffs.
type ConfigSnapshotEvent struct {
DeviceID string `json:"device_id"`
TenantID string `json:"tenant_id"`
RouterOSVersion string `json:"routeros_version,omitempty"`
CollectedAt string `json:"collected_at"` // RFC3339
SHA256Hash string `json:"sha256_hash"`
ConfigText string `json:"config_text"`
NormalizationVersion int `json:"normalization_version"`
}
// PushAlertEvent triggers an alert for editor pushes (one-click rollback).
type PushAlertEvent struct {
DeviceID string `json:"device_id"`
@@ -122,6 +135,7 @@ func NewPublisher(natsURL string) (*Publisher, error) {
"device.firmware.>",
"device.credential_changed.>",
"config.changed.>",
"config.snapshot.>",
"config.push.rollback.>",
"config.push.alert.>",
"audit.session.end.>",
@@ -257,6 +271,33 @@ func (p *Publisher) PublishConfigChanged(ctx context.Context, event ConfigChange
return nil
}
// PublishConfigSnapshot publishes a config snapshot event to NATS JetStream.
//
// Events are published to "config.snapshot.create.{DeviceID}" so the Python
// backend can store the snapshot and compute diffs against the previous one.
func (p *Publisher) PublishConfigSnapshot(ctx context.Context, event ConfigSnapshotEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshalling config snapshot event: %w", err)
}
subject := fmt.Sprintf("config.snapshot.create.%s", event.DeviceID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publishing to %s: %w", subject, err)
}
slog.Debug("published config snapshot event",
"device_id", event.DeviceID,
"tenant_id", event.TenantID,
"sha256_hash", event.SHA256Hash,
"subject", subject,
)
return nil
}
// PublishPushRollback publishes a push rollback event when a device goes offline
// after a template or restore config push, triggering automatic rollback.
func (p *Publisher) PublishPushRollback(ctx context.Context, event PushRollbackEvent) error {