feat(09-01): implement retention cleanup service with configurable retention period
- Add CONFIG_RETENTION_DAYS setting (default 90) to config.py - Create retention_service.py with cleanup_expired_snapshots (parameterized SQL via make_interval) - APScheduler IntervalTrigger runs cleanup every 24h with 1h jitter - Prometheus counter and histogram for observability - CASCADE FKs handle diff/change deletion automatically - All 4 unit tests pass Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -125,6 +125,9 @@ class Settings(BaseSettings):
|
||||
PASSWORD_RESET_TOKEN_EXPIRE_MINUTES: int = 30
|
||||
APP_BASE_URL: str = "http://localhost:3000"
|
||||
|
||||
# Retention cleanup — delete config snapshots older than N days
|
||||
CONFIG_RETENTION_DAYS: int = 90
|
||||
|
||||
# App settings
|
||||
APP_NAME: str = "TOD - The Other Dude"
|
||||
APP_VERSION: str = "0.1.0"
|
||||
|
||||
85
backend/app/services/retention_service.py
Normal file
85
backend/app/services/retention_service.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Retention cleanup service — deletes config snapshots older than CONFIG_RETENTION_DAYS.
|
||||
|
||||
Runs as an APScheduler IntervalTrigger job (every 24h). CASCADE FK constraints
|
||||
on router_config_diffs and router_config_changes handle associated data automatically.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from prometheus_client import Counter, Histogram
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.config import settings
|
||||
from app.database import AdminAsyncSessionLocal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_scheduler: Optional[AsyncIOScheduler] = None
|
||||
|
||||
# Prometheus metrics
|
||||
config_snapshots_cleaned_total = Counter(
|
||||
"config_snapshots_cleaned_total",
|
||||
"Cumulative count of expired config snapshots deleted by retention cleanup",
|
||||
)
|
||||
config_retention_cleanup_duration_seconds = Histogram(
|
||||
"config_retention_cleanup_duration_seconds",
|
||||
"Duration of retention cleanup execution",
|
||||
)
|
||||
|
||||
|
||||
async def cleanup_expired_snapshots() -> int:
|
||||
"""Delete config snapshots older than CONFIG_RETENTION_DAYS.
|
||||
|
||||
CASCADE FK constraints on router_config_diffs and router_config_changes
|
||||
automatically remove associated rows.
|
||||
|
||||
Returns the number of deleted snapshots.
|
||||
"""
|
||||
days = settings.CONFIG_RETENTION_DAYS
|
||||
|
||||
with config_retention_cleanup_duration_seconds.time():
|
||||
async with AdminAsyncSessionLocal() as session:
|
||||
result = await session.execute(
|
||||
text(
|
||||
"DELETE FROM router_config_snapshots "
|
||||
"WHERE collected_at < NOW() - make_interval(days => :days)"
|
||||
),
|
||||
{"days": days},
|
||||
)
|
||||
await session.commit()
|
||||
deleted = result.rowcount
|
||||
|
||||
config_snapshots_cleaned_total.inc(deleted)
|
||||
logger.info("retention cleanup complete", extra={"deleted_snapshots": deleted, "retention_days": days})
|
||||
return deleted
|
||||
|
||||
|
||||
async def start_retention_scheduler() -> None:
|
||||
"""Start APScheduler with a 24-hour interval job for retention cleanup."""
|
||||
global _scheduler
|
||||
_scheduler = AsyncIOScheduler(timezone="UTC")
|
||||
_scheduler.add_job(
|
||||
cleanup_expired_snapshots,
|
||||
trigger=IntervalTrigger(hours=24, jitter=3600),
|
||||
id="retention_cleanup",
|
||||
name="Config snapshot retention cleanup",
|
||||
max_instances=1,
|
||||
replace_existing=True,
|
||||
)
|
||||
_scheduler.start()
|
||||
logger.info(
|
||||
"retention scheduler started (every 24h, retention_days=%d)",
|
||||
settings.CONFIG_RETENTION_DAYS,
|
||||
)
|
||||
|
||||
|
||||
async def stop_retention_scheduler() -> None:
|
||||
"""Gracefully shutdown the retention scheduler."""
|
||||
global _scheduler
|
||||
if _scheduler:
|
||||
_scheduler.shutdown(wait=False)
|
||||
_scheduler = None
|
||||
logger.info("retention scheduler stopped")
|
||||
Reference in New Issue
Block a user