248 lines
6.4 KiB
Go
248 lines
6.4 KiB
Go
package poller
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/alicebob/miniredis/v2"
|
|
"github.com/redis/go-redis/v9"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/staack/the-other-dude/poller/internal/device"
|
|
"github.com/staack/the-other-dude/poller/internal/store"
|
|
)
|
|
|
|
// mockSSHHostKeyUpdater implements SSHHostKeyUpdater for testing.
|
|
type mockSSHHostKeyUpdater struct {
|
|
mu sync.Mutex
|
|
updatedKeys map[string]string // device_id -> fingerprint
|
|
err error
|
|
}
|
|
|
|
func newMockSSHHostKeyUpdater() *mockSSHHostKeyUpdater {
|
|
return &mockSSHHostKeyUpdater{updatedKeys: make(map[string]string)}
|
|
}
|
|
|
|
func (m *mockSSHHostKeyUpdater) UpdateSSHHostKey(ctx context.Context, deviceID string, fingerprint string) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
if m.err != nil {
|
|
return m.err
|
|
}
|
|
m.updatedKeys[deviceID] = fingerprint
|
|
return nil
|
|
}
|
|
|
|
func TestRandomJitter(t *testing.T) {
|
|
// randomJitter(30, 300) returns value in [30s, 300s] range
|
|
for i := 0; i < 100; i++ {
|
|
j := randomJitter(30, 300)
|
|
assert.GreaterOrEqual(t, j, 30*time.Second, "jitter should be >= 30s")
|
|
assert.LessOrEqual(t, j, 300*time.Second, "jitter should be <= 300s")
|
|
}
|
|
}
|
|
|
|
func TestRandomJitter_MinEqualsMax(t *testing.T) {
|
|
j := randomJitter(60, 60)
|
|
assert.Equal(t, 60*time.Second, j)
|
|
}
|
|
|
|
func TestCalculateBackupBackoff(t *testing.T) {
|
|
// 1 failure: 5 min
|
|
assert.Equal(t, 5*time.Minute, calculateBackupBackoff(1))
|
|
// 2 failures: 15 min
|
|
assert.Equal(t, 15*time.Minute, calculateBackupBackoff(2))
|
|
// 3 failures: 1 hour (cap)
|
|
assert.Equal(t, 1*time.Hour, calculateBackupBackoff(3))
|
|
// 10 failures: still capped at 1 hour
|
|
assert.Equal(t, 1*time.Hour, calculateBackupBackoff(10))
|
|
// 0 failures: 5 min (floor)
|
|
assert.Equal(t, 5*time.Minute, calculateBackupBackoff(0))
|
|
}
|
|
|
|
func TestShouldRetry_AuthFailedBlocks(t *testing.T) {
|
|
state := &backupDeviceState{
|
|
lastErrorKind: device.ErrAuthFailed,
|
|
}
|
|
assert.False(t, shouldRetry(state), "auth failure should block retry")
|
|
}
|
|
|
|
func TestShouldRetry_HostKeyMismatchBlocks(t *testing.T) {
|
|
state := &backupDeviceState{
|
|
lastErrorKind: device.ErrHostKeyMismatch,
|
|
}
|
|
assert.False(t, shouldRetry(state), "host key mismatch should block retry")
|
|
}
|
|
|
|
func TestShouldRetry_TransientErrorAllows(t *testing.T) {
|
|
state := &backupDeviceState{
|
|
lastErrorKind: device.ErrTimeout,
|
|
}
|
|
assert.True(t, shouldRetry(state), "transient errors should allow retry")
|
|
}
|
|
|
|
func TestShouldRetry_NoError(t *testing.T) {
|
|
state := &backupDeviceState{}
|
|
assert.True(t, shouldRetry(state), "no previous error should allow retry")
|
|
}
|
|
|
|
func TestShouldRetry_UnknownErrorAllows(t *testing.T) {
|
|
state := &backupDeviceState{
|
|
lastErrorKind: device.ErrUnknown,
|
|
}
|
|
assert.True(t, shouldRetry(state), "unknown errors should allow retry")
|
|
}
|
|
|
|
func TestBackupScheduler_OnlineOnlyGating(t *testing.T) {
|
|
// Device not in Redis (no status key) -> should be allowed (first poll hasn't happened)
|
|
mr, err := miniredis.Run()
|
|
require.NoError(t, err)
|
|
defer mr.Close()
|
|
|
|
rc := redis.NewClient(&redis.Options{Addr: mr.Addr()})
|
|
defer rc.Close()
|
|
|
|
// No status key set -> isDeviceOnline should return true (assume might be online)
|
|
online := isDeviceOnline(context.Background(), rc, "dev-1")
|
|
assert.True(t, online, "device with no status key should be considered potentially online")
|
|
|
|
// Set status to "online" -> should return true
|
|
mr.Set("device:dev-2:status", "online")
|
|
online = isDeviceOnline(context.Background(), rc, "dev-2")
|
|
assert.True(t, online, "device with online status should be online")
|
|
|
|
// Set status to "offline" -> should return false
|
|
mr.Set("device:dev-3:status", "offline")
|
|
online = isDeviceOnline(context.Background(), rc, "dev-3")
|
|
assert.False(t, online, "device with offline status should not be online")
|
|
}
|
|
|
|
func TestBackupScheduler_ConcurrencySemaphore(t *testing.T) {
|
|
// When semaphore is full, backup waits (does not drop)
|
|
maxConcurrent := 2
|
|
sem := make(chan struct{}, maxConcurrent)
|
|
|
|
// Fill the semaphore
|
|
sem <- struct{}{}
|
|
sem <- struct{}{}
|
|
|
|
// Try to acquire in a goroutine -- should block
|
|
acquired := make(chan struct{})
|
|
go func() {
|
|
sem <- struct{}{} // This should block
|
|
close(acquired)
|
|
}()
|
|
|
|
// Give a moment and verify it hasn't acquired
|
|
select {
|
|
case <-acquired:
|
|
t.Fatal("semaphore should have blocked but didn't")
|
|
case <-time.After(50 * time.Millisecond):
|
|
// Expected: still blocked
|
|
}
|
|
|
|
// Release one slot
|
|
<-sem
|
|
|
|
// Now the goroutine should acquire
|
|
select {
|
|
case <-acquired:
|
|
// Expected: unblocked after release
|
|
case <-time.After(time.Second):
|
|
t.Fatal("semaphore should have unblocked after release")
|
|
}
|
|
|
|
// Drain remaining
|
|
<-sem
|
|
<-sem
|
|
}
|
|
|
|
func TestBackupScheduler_ReconcileStartsNewDevices(t *testing.T) {
|
|
mr, err := miniredis.Run()
|
|
require.NoError(t, err)
|
|
defer mr.Close()
|
|
|
|
rc := redis.NewClient(&redis.Options{Addr: mr.Addr()})
|
|
defer rc.Close()
|
|
|
|
devices := []store.Device{
|
|
{ID: "dev-1", TenantID: "t-1", IPAddress: "10.0.0.1", SSHPort: 22},
|
|
{ID: "dev-2", TenantID: "t-1", IPAddress: "10.0.0.2", SSHPort: 22},
|
|
}
|
|
fetcher := &mockDeviceFetcher{devices: devices}
|
|
hostKeyUpdater := newMockSSHHostKeyUpdater()
|
|
|
|
bs := NewBackupScheduler(
|
|
fetcher,
|
|
hostKeyUpdater,
|
|
nil, // locker
|
|
nil, // publisher
|
|
nil, // credentialCache
|
|
rc,
|
|
6*time.Hour,
|
|
60*time.Second,
|
|
60*time.Second,
|
|
10,
|
|
)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
var wg sync.WaitGroup
|
|
err = bs.reconcileBackupDevices(ctx, &wg)
|
|
require.NoError(t, err)
|
|
|
|
bs.mu.Lock()
|
|
assert.Len(t, bs.activeDevices, 2)
|
|
_, hasDev1 := bs.activeDevices["dev-1"]
|
|
_, hasDev2 := bs.activeDevices["dev-2"]
|
|
assert.True(t, hasDev1)
|
|
assert.True(t, hasDev2)
|
|
bs.mu.Unlock()
|
|
|
|
cancel()
|
|
wg.Wait()
|
|
}
|
|
|
|
func TestBackupScheduler_ReconcileStopsRemovedDevices(t *testing.T) {
|
|
mr, err := miniredis.Run()
|
|
require.NoError(t, err)
|
|
defer mr.Close()
|
|
|
|
rc := redis.NewClient(&redis.Options{Addr: mr.Addr()})
|
|
defer rc.Close()
|
|
|
|
fetcher := &mockDeviceFetcher{devices: []store.Device{}}
|
|
hostKeyUpdater := newMockSSHHostKeyUpdater()
|
|
|
|
bs := NewBackupScheduler(
|
|
fetcher,
|
|
hostKeyUpdater,
|
|
nil, nil, nil,
|
|
rc,
|
|
6*time.Hour, 60*time.Second, 60*time.Second, 10,
|
|
)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
// Pre-populate a device
|
|
devCtx, devCancel := context.WithCancel(ctx)
|
|
_ = devCtx
|
|
bs.activeDevices["dev-removed"] = &backupDeviceState{cancel: devCancel}
|
|
|
|
var wg sync.WaitGroup
|
|
err = bs.reconcileBackupDevices(ctx, &wg)
|
|
require.NoError(t, err)
|
|
|
|
bs.mu.Lock()
|
|
assert.Len(t, bs.activeDevices, 0)
|
|
bs.mu.Unlock()
|
|
|
|
cancel()
|
|
wg.Wait()
|
|
}
|