feat: add audit.session.end NATS pipeline for SSH session tracking
Poller publishes session end events via JetStream when SSH sessions close (normal disconnect or idle timeout). Backend subscribes with a durable consumer and writes ssh_session_end audit log entries with duration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -220,7 +220,7 @@ func main() {
|
||||
// -----------------------------------------------------------------------
|
||||
// Initialize SSH relay server and HTTP listener
|
||||
// -----------------------------------------------------------------------
|
||||
sshServer := sshrelay.NewServer(redisClient, credentialCache, deviceStore, sshrelay.Config{
|
||||
sshServer := sshrelay.NewServer(redisClient, credentialCache, deviceStore, publisher, sshrelay.Config{
|
||||
IdleTimeout: time.Duration(cfg.SSHIdleTimeout) * time.Second,
|
||||
MaxSessions: cfg.SSHMaxSessions,
|
||||
MaxPerUser: cfg.SSHMaxPerUser,
|
||||
|
||||
@@ -124,6 +124,7 @@ func NewPublisher(natsURL string) (*Publisher, error) {
|
||||
"config.changed.>",
|
||||
"config.push.rollback.>",
|
||||
"config.push.alert.>",
|
||||
"audit.session.end.>",
|
||||
},
|
||||
MaxAge: 24 * time.Hour,
|
||||
})
|
||||
@@ -306,6 +307,43 @@ func (p *Publisher) PublishPushAlert(ctx context.Context, event PushAlertEvent)
|
||||
return nil
|
||||
}
|
||||
|
||||
// SessionEndEvent is the payload published to NATS JetStream when an SSH
|
||||
// relay session ends. The backend subscribes to audit.session.end.> and
|
||||
// writes an audit log entry with the session duration.
|
||||
type SessionEndEvent struct {
|
||||
SessionID string `json:"session_id"`
|
||||
UserID string `json:"user_id"`
|
||||
TenantID string `json:"tenant_id"`
|
||||
DeviceID string `json:"device_id"`
|
||||
StartTime string `json:"start_time"` // RFC3339
|
||||
EndTime string `json:"end_time"` // RFC3339
|
||||
SourceIP string `json:"source_ip"`
|
||||
Reason string `json:"reason"` // "normal", "idle_timeout", "shutdown"
|
||||
}
|
||||
|
||||
// PublishSessionEnd publishes an SSH session end event to NATS JetStream.
|
||||
func (p *Publisher) PublishSessionEnd(ctx context.Context, event SessionEndEvent) error {
|
||||
data, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshalling session end event: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("audit.session.end.%s", event.SessionID)
|
||||
|
||||
_, err = p.js.Publish(ctx, subject, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("publishing to %s: %w", subject, err)
|
||||
}
|
||||
|
||||
slog.Debug("published session end event",
|
||||
"session_id", event.SessionID,
|
||||
"device_id", event.DeviceID,
|
||||
"subject", subject,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Conn returns the raw NATS connection for use by other components
|
||||
// (e.g., CmdResponder for request-reply subscriptions).
|
||||
func (p *Publisher) Conn() *nats.Conn {
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/mikrotik-portal/poller/internal/bus"
|
||||
"github.com/mikrotik-portal/poller/internal/store"
|
||||
"github.com/mikrotik-portal/poller/internal/vault"
|
||||
"github.com/redis/go-redis/v9"
|
||||
@@ -35,6 +36,7 @@ type Server struct {
|
||||
redis *redis.Client
|
||||
credCache *vault.CredentialCache
|
||||
deviceStore *store.DeviceStore
|
||||
publisher *bus.Publisher
|
||||
sessions map[string]*Session
|
||||
mu sync.Mutex
|
||||
idleTime time.Duration
|
||||
@@ -53,12 +55,13 @@ type Config struct {
|
||||
}
|
||||
|
||||
// NewServer creates and starts a new SSH relay server.
|
||||
func NewServer(rc *redis.Client, cc *vault.CredentialCache, ds *store.DeviceStore, cfg Config) *Server {
|
||||
func NewServer(rc *redis.Client, cc *vault.CredentialCache, ds *store.DeviceStore, pub *bus.Publisher, cfg Config) *Server {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
s := &Server{
|
||||
redis: rc,
|
||||
credCache: cc,
|
||||
deviceStore: ds,
|
||||
publisher: pub,
|
||||
sessions: make(map[string]*Session),
|
||||
idleTime: cfg.IdleTimeout,
|
||||
maxSessions: cfg.MaxSessions,
|
||||
@@ -255,12 +258,15 @@ func (s *Server) handleSSH(w http.ResponseWriter, r *http.Request) {
|
||||
delete(s.sessions, sess.ID)
|
||||
s.mu.Unlock()
|
||||
|
||||
duration := time.Since(sess.StartTime)
|
||||
endTime := time.Now()
|
||||
duration := endTime.Sub(sess.StartTime)
|
||||
slog.Info("ssh session ended",
|
||||
"session_id", sess.ID,
|
||||
"device_id", payload.DeviceID,
|
||||
"duration", duration.String(),
|
||||
)
|
||||
|
||||
s.publishSessionEnd(sess, endTime, "normal")
|
||||
}
|
||||
|
||||
// validateToken performs a Redis GETDEL to atomically consume a single-use token.
|
||||
@@ -331,6 +337,36 @@ func (s *Server) cleanupIdle() {
|
||||
for _, sess := range toCancel {
|
||||
slog.Info("ssh session idle timeout", "session_id", sess.ID)
|
||||
sess.cancel()
|
||||
s.publishSessionEnd(sess, time.Now(), "idle_timeout")
|
||||
}
|
||||
}
|
||||
|
||||
// publishSessionEnd publishes an audit.session.end event via NATS JetStream.
|
||||
// Errors are logged but never block session cleanup.
|
||||
func (s *Server) publishSessionEnd(sess *Session, endTime time.Time, reason string) {
|
||||
if s.publisher == nil {
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
event := bus.SessionEndEvent{
|
||||
SessionID: sess.ID,
|
||||
UserID: sess.UserID,
|
||||
TenantID: sess.TenantID,
|
||||
DeviceID: sess.DeviceID,
|
||||
StartTime: sess.StartTime.Format(time.RFC3339),
|
||||
EndTime: endTime.Format(time.RFC3339),
|
||||
SourceIP: sess.SourceIP,
|
||||
Reason: reason,
|
||||
}
|
||||
|
||||
if err := s.publisher.PublishSessionEnd(ctx, event); err != nil {
|
||||
slog.Error("failed to publish session end event",
|
||||
"session_id", sess.ID,
|
||||
"error", err,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user