feat: add audit.session.end NATS pipeline for SSH session tracking

Poller publishes session end events via JetStream when SSH sessions
close (normal disconnect or idle timeout). Backend subscribes with a
durable consumer and writes ssh_session_end audit log entries with
duration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-12 16:07:10 -05:00
parent 7aaaeaa1d1
commit acf1790bed
5 changed files with 276 additions and 3 deletions

View File

@@ -220,7 +220,7 @@ func main() {
// -----------------------------------------------------------------------
// Initialize SSH relay server and HTTP listener
// -----------------------------------------------------------------------
sshServer := sshrelay.NewServer(redisClient, credentialCache, deviceStore, sshrelay.Config{
sshServer := sshrelay.NewServer(redisClient, credentialCache, deviceStore, publisher, sshrelay.Config{
IdleTimeout: time.Duration(cfg.SSHIdleTimeout) * time.Second,
MaxSessions: cfg.SSHMaxSessions,
MaxPerUser: cfg.SSHMaxPerUser,

View File

@@ -124,6 +124,7 @@ func NewPublisher(natsURL string) (*Publisher, error) {
"config.changed.>",
"config.push.rollback.>",
"config.push.alert.>",
"audit.session.end.>",
},
MaxAge: 24 * time.Hour,
})
@@ -306,6 +307,43 @@ func (p *Publisher) PublishPushAlert(ctx context.Context, event PushAlertEvent)
return nil
}
// SessionEndEvent is the payload published to NATS JetStream when an SSH
// relay session ends. The backend subscribes to audit.session.end.> and
// writes an audit log entry with the session duration.
type SessionEndEvent struct {
SessionID string `json:"session_id"`
UserID string `json:"user_id"`
TenantID string `json:"tenant_id"`
DeviceID string `json:"device_id"`
StartTime string `json:"start_time"` // RFC3339
EndTime string `json:"end_time"` // RFC3339
SourceIP string `json:"source_ip"`
Reason string `json:"reason"` // "normal", "idle_timeout", "shutdown"
}
// PublishSessionEnd publishes an SSH session end event to NATS JetStream.
func (p *Publisher) PublishSessionEnd(ctx context.Context, event SessionEndEvent) error {
data, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("marshalling session end event: %w", err)
}
subject := fmt.Sprintf("audit.session.end.%s", event.SessionID)
_, err = p.js.Publish(ctx, subject, data)
if err != nil {
return fmt.Errorf("publishing to %s: %w", subject, err)
}
slog.Debug("published session end event",
"session_id", event.SessionID,
"device_id", event.DeviceID,
"subject", subject,
)
return nil
}
// Conn returns the raw NATS connection for use by other components
// (e.g., CmdResponder for request-reply subscriptions).
func (p *Publisher) Conn() *nats.Conn {

View File

@@ -11,6 +11,7 @@ import (
"time"
"github.com/google/uuid"
"github.com/mikrotik-portal/poller/internal/bus"
"github.com/mikrotik-portal/poller/internal/store"
"github.com/mikrotik-portal/poller/internal/vault"
"github.com/redis/go-redis/v9"
@@ -35,6 +36,7 @@ type Server struct {
redis *redis.Client
credCache *vault.CredentialCache
deviceStore *store.DeviceStore
publisher *bus.Publisher
sessions map[string]*Session
mu sync.Mutex
idleTime time.Duration
@@ -53,12 +55,13 @@ type Config struct {
}
// NewServer creates and starts a new SSH relay server.
func NewServer(rc *redis.Client, cc *vault.CredentialCache, ds *store.DeviceStore, cfg Config) *Server {
func NewServer(rc *redis.Client, cc *vault.CredentialCache, ds *store.DeviceStore, pub *bus.Publisher, cfg Config) *Server {
ctx, cancel := context.WithCancel(context.Background())
s := &Server{
redis: rc,
credCache: cc,
deviceStore: ds,
publisher: pub,
sessions: make(map[string]*Session),
idleTime: cfg.IdleTimeout,
maxSessions: cfg.MaxSessions,
@@ -255,12 +258,15 @@ func (s *Server) handleSSH(w http.ResponseWriter, r *http.Request) {
delete(s.sessions, sess.ID)
s.mu.Unlock()
duration := time.Since(sess.StartTime)
endTime := time.Now()
duration := endTime.Sub(sess.StartTime)
slog.Info("ssh session ended",
"session_id", sess.ID,
"device_id", payload.DeviceID,
"duration", duration.String(),
)
s.publishSessionEnd(sess, endTime, "normal")
}
// validateToken performs a Redis GETDEL to atomically consume a single-use token.
@@ -331,6 +337,36 @@ func (s *Server) cleanupIdle() {
for _, sess := range toCancel {
slog.Info("ssh session idle timeout", "session_id", sess.ID)
sess.cancel()
s.publishSessionEnd(sess, time.Now(), "idle_timeout")
}
}
// publishSessionEnd publishes an audit.session.end event via NATS JetStream.
// Errors are logged but never block session cleanup.
func (s *Server) publishSessionEnd(sess *Session, endTime time.Time, reason string) {
if s.publisher == nil {
return
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
event := bus.SessionEndEvent{
SessionID: sess.ID,
UserID: sess.UserID,
TenantID: sess.TenantID,
DeviceID: sess.DeviceID,
StartTime: sess.StartTime.Format(time.RFC3339),
EndTime: endTime.Format(time.RFC3339),
SourceIP: sess.SourceIP,
Reason: reason,
}
if err := s.publisher.PublishSessionEnd(ctx, event); err != nil {
slog.Error("failed to publish session end event",
"session_id", sess.ID,
"error", err,
)
}
}