feat(09-01): implement retention cleanup service with configurable retention period
- Add CONFIG_RETENTION_DAYS setting (default 90) to config.py - Create retention_service.py with cleanup_expired_snapshots (parameterized SQL via make_interval) - APScheduler IntervalTrigger runs cleanup every 24h with 1h jitter - Prometheus counter and histogram for observability - CASCADE FKs handle diff/change deletion automatically - All 4 unit tests pass Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -125,6 +125,9 @@ class Settings(BaseSettings):
|
|||||||
PASSWORD_RESET_TOKEN_EXPIRE_MINUTES: int = 30
|
PASSWORD_RESET_TOKEN_EXPIRE_MINUTES: int = 30
|
||||||
APP_BASE_URL: str = "http://localhost:3000"
|
APP_BASE_URL: str = "http://localhost:3000"
|
||||||
|
|
||||||
|
# Retention cleanup — delete config snapshots older than N days
|
||||||
|
CONFIG_RETENTION_DAYS: int = 90
|
||||||
|
|
||||||
# App settings
|
# App settings
|
||||||
APP_NAME: str = "TOD - The Other Dude"
|
APP_NAME: str = "TOD - The Other Dude"
|
||||||
APP_VERSION: str = "0.1.0"
|
APP_VERSION: str = "0.1.0"
|
||||||
|
|||||||
85
backend/app/services/retention_service.py
Normal file
85
backend/app/services/retention_service.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""Retention cleanup service — deletes config snapshots older than CONFIG_RETENTION_DAYS.
|
||||||
|
|
||||||
|
Runs as an APScheduler IntervalTrigger job (every 24h). CASCADE FK constraints
|
||||||
|
on router_config_diffs and router_config_changes handle associated data automatically.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
|
from apscheduler.triggers.interval import IntervalTrigger
|
||||||
|
from prometheus_client import Counter, Histogram
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.database import AdminAsyncSessionLocal
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_scheduler: Optional[AsyncIOScheduler] = None
|
||||||
|
|
||||||
|
# Prometheus metrics
|
||||||
|
config_snapshots_cleaned_total = Counter(
|
||||||
|
"config_snapshots_cleaned_total",
|
||||||
|
"Cumulative count of expired config snapshots deleted by retention cleanup",
|
||||||
|
)
|
||||||
|
config_retention_cleanup_duration_seconds = Histogram(
|
||||||
|
"config_retention_cleanup_duration_seconds",
|
||||||
|
"Duration of retention cleanup execution",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_expired_snapshots() -> int:
|
||||||
|
"""Delete config snapshots older than CONFIG_RETENTION_DAYS.
|
||||||
|
|
||||||
|
CASCADE FK constraints on router_config_diffs and router_config_changes
|
||||||
|
automatically remove associated rows.
|
||||||
|
|
||||||
|
Returns the number of deleted snapshots.
|
||||||
|
"""
|
||||||
|
days = settings.CONFIG_RETENTION_DAYS
|
||||||
|
|
||||||
|
with config_retention_cleanup_duration_seconds.time():
|
||||||
|
async with AdminAsyncSessionLocal() as session:
|
||||||
|
result = await session.execute(
|
||||||
|
text(
|
||||||
|
"DELETE FROM router_config_snapshots "
|
||||||
|
"WHERE collected_at < NOW() - make_interval(days => :days)"
|
||||||
|
),
|
||||||
|
{"days": days},
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
deleted = result.rowcount
|
||||||
|
|
||||||
|
config_snapshots_cleaned_total.inc(deleted)
|
||||||
|
logger.info("retention cleanup complete", extra={"deleted_snapshots": deleted, "retention_days": days})
|
||||||
|
return deleted
|
||||||
|
|
||||||
|
|
||||||
|
async def start_retention_scheduler() -> None:
|
||||||
|
"""Start APScheduler with a 24-hour interval job for retention cleanup."""
|
||||||
|
global _scheduler
|
||||||
|
_scheduler = AsyncIOScheduler(timezone="UTC")
|
||||||
|
_scheduler.add_job(
|
||||||
|
cleanup_expired_snapshots,
|
||||||
|
trigger=IntervalTrigger(hours=24, jitter=3600),
|
||||||
|
id="retention_cleanup",
|
||||||
|
name="Config snapshot retention cleanup",
|
||||||
|
max_instances=1,
|
||||||
|
replace_existing=True,
|
||||||
|
)
|
||||||
|
_scheduler.start()
|
||||||
|
logger.info(
|
||||||
|
"retention scheduler started (every 24h, retention_days=%d)",
|
||||||
|
settings.CONFIG_RETENTION_DAYS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def stop_retention_scheduler() -> None:
|
||||||
|
"""Gracefully shutdown the retention scheduler."""
|
||||||
|
global _scheduler
|
||||||
|
if _scheduler:
|
||||||
|
_scheduler.shutdown(wait=False)
|
||||||
|
_scheduler = None
|
||||||
|
logger.info("retention scheduler stopped")
|
||||||
Reference in New Issue
Block a user