feat(17-02): add snmp_custom handler and NAK safety net to metrics subscriber

- Add _insert_snmp_custom_metrics handler for custom SNMP OID events
- Insert all 9 columns into snmp_metrics hypertable
- Change unknown metric types from ACK to NAK for redelivery safety
- Prevents permanent data loss during deployment ordering mismatches

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-21 18:51:02 -05:00
parent ad75a19f5d
commit 390df0531d

View File

@@ -4,6 +4,7 @@ Subscribes to device.metrics.> and inserts into TimescaleDB hypertables:
- interface_metrics — per-interface rx/tx byte counters
- health_metrics — CPU, memory, disk, temperature per device
- wireless_metrics — per-wireless-interface aggregated client stats
- snmp_metrics — custom SNMP OID metrics (UPS, vendor, tenant profiles)
Also maintains denormalized last_cpu_load and last_memory_used_pct columns
on the devices table for efficient fleet table display.
@@ -178,6 +179,41 @@ async def _insert_wireless_metrics(session, data: dict) -> None:
)
async def _insert_snmp_custom_metrics(session, data: dict) -> None:
"""Insert custom SNMP OID metrics into snmp_metrics hypertable."""
metrics = data.get("metrics")
if not metrics:
logger.warning("snmp_custom event missing 'metrics' field — skipping")
return
device_id = data.get("device_id")
tenant_id = data.get("tenant_id")
collected_at = _parse_timestamp(data.get("collected_at"))
for m in metrics:
await session.execute(
text("""
INSERT INTO snmp_metrics
(time, device_id, tenant_id, metric_name, metric_group,
value_numeric, value_text, oid, index_value)
VALUES
(:time, :device_id, :tenant_id, :metric_name, :metric_group,
:value_numeric, :value_text, :oid, :index_value)
"""),
{
"time": collected_at,
"device_id": device_id,
"tenant_id": tenant_id,
"metric_name": m.get("metric_name"),
"metric_group": m.get("metric_group"),
"value_numeric": m.get("value_numeric"),
"value_text": m.get("value_text"),
"oid": m.get("oid"),
"index_value": m.get("index_value"),
},
)
# =============================================================================
# MAIN MESSAGE HANDLER
# =============================================================================
@@ -190,7 +226,10 @@ async def on_device_metrics(msg) -> None:
- "health" → _insert_health_metrics + update devices
- "interfaces" → _insert_interface_metrics
- "wireless" → _insert_wireless_metrics
- "snmp_custom" → _insert_snmp_custom_metrics (custom SNMP OID data)
Unknown types are NAKed (not ACKed) so NATS can redeliver once the
subscriber is updated -- prevents permanent data loss during deployments.
On success, acknowledges the message. On error, NAKs so NATS can redeliver.
"""
try:
@@ -210,9 +249,11 @@ async def on_device_metrics(msg) -> None:
await _insert_interface_metrics(session, data)
elif metric_type == "wireless":
await _insert_wireless_metrics(session, data)
elif metric_type == "snmp_custom":
await _insert_snmp_custom_metrics(session, data)
else:
logger.warning("Unknown metric type '%s'skipping", metric_type)
await msg.ack()
logger.warning("Unknown metric type '%s'NAKing for redelivery", metric_type)
await msg.nak()
return
await session.commit()