feat(17-02): add snmp_custom handler and NAK safety net to metrics subscriber
- Add _insert_snmp_custom_metrics handler for custom SNMP OID events - Insert all 9 columns into snmp_metrics hypertable - Change unknown metric types from ACK to NAK for redelivery safety - Prevents permanent data loss during deployment ordering mismatches Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ Subscribes to device.metrics.> and inserts into TimescaleDB hypertables:
|
|||||||
- interface_metrics — per-interface rx/tx byte counters
|
- interface_metrics — per-interface rx/tx byte counters
|
||||||
- health_metrics — CPU, memory, disk, temperature per device
|
- health_metrics — CPU, memory, disk, temperature per device
|
||||||
- wireless_metrics — per-wireless-interface aggregated client stats
|
- wireless_metrics — per-wireless-interface aggregated client stats
|
||||||
|
- snmp_metrics — custom SNMP OID metrics (UPS, vendor, tenant profiles)
|
||||||
|
|
||||||
Also maintains denormalized last_cpu_load and last_memory_used_pct columns
|
Also maintains denormalized last_cpu_load and last_memory_used_pct columns
|
||||||
on the devices table for efficient fleet table display.
|
on the devices table for efficient fleet table display.
|
||||||
@@ -178,6 +179,41 @@ async def _insert_wireless_metrics(session, data: dict) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _insert_snmp_custom_metrics(session, data: dict) -> None:
|
||||||
|
"""Insert custom SNMP OID metrics into snmp_metrics hypertable."""
|
||||||
|
metrics = data.get("metrics")
|
||||||
|
if not metrics:
|
||||||
|
logger.warning("snmp_custom event missing 'metrics' field — skipping")
|
||||||
|
return
|
||||||
|
|
||||||
|
device_id = data.get("device_id")
|
||||||
|
tenant_id = data.get("tenant_id")
|
||||||
|
collected_at = _parse_timestamp(data.get("collected_at"))
|
||||||
|
|
||||||
|
for m in metrics:
|
||||||
|
await session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO snmp_metrics
|
||||||
|
(time, device_id, tenant_id, metric_name, metric_group,
|
||||||
|
value_numeric, value_text, oid, index_value)
|
||||||
|
VALUES
|
||||||
|
(:time, :device_id, :tenant_id, :metric_name, :metric_group,
|
||||||
|
:value_numeric, :value_text, :oid, :index_value)
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"time": collected_at,
|
||||||
|
"device_id": device_id,
|
||||||
|
"tenant_id": tenant_id,
|
||||||
|
"metric_name": m.get("metric_name"),
|
||||||
|
"metric_group": m.get("metric_group"),
|
||||||
|
"value_numeric": m.get("value_numeric"),
|
||||||
|
"value_text": m.get("value_text"),
|
||||||
|
"oid": m.get("oid"),
|
||||||
|
"index_value": m.get("index_value"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# MAIN MESSAGE HANDLER
|
# MAIN MESSAGE HANDLER
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -187,10 +223,13 @@ async def on_device_metrics(msg) -> None:
|
|||||||
"""Handle a device.metrics event published by the Go poller.
|
"""Handle a device.metrics event published by the Go poller.
|
||||||
|
|
||||||
Dispatches to the appropriate insert handler based on the 'type' field:
|
Dispatches to the appropriate insert handler based on the 'type' field:
|
||||||
- "health" → _insert_health_metrics + update devices
|
- "health" → _insert_health_metrics + update devices
|
||||||
- "interfaces" → _insert_interface_metrics
|
- "interfaces" → _insert_interface_metrics
|
||||||
- "wireless" → _insert_wireless_metrics
|
- "wireless" → _insert_wireless_metrics
|
||||||
|
- "snmp_custom" → _insert_snmp_custom_metrics (custom SNMP OID data)
|
||||||
|
|
||||||
|
Unknown types are NAKed (not ACKed) so NATS can redeliver once the
|
||||||
|
subscriber is updated -- prevents permanent data loss during deployments.
|
||||||
On success, acknowledges the message. On error, NAKs so NATS can redeliver.
|
On success, acknowledges the message. On error, NAKs so NATS can redeliver.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
@@ -210,9 +249,11 @@ async def on_device_metrics(msg) -> None:
|
|||||||
await _insert_interface_metrics(session, data)
|
await _insert_interface_metrics(session, data)
|
||||||
elif metric_type == "wireless":
|
elif metric_type == "wireless":
|
||||||
await _insert_wireless_metrics(session, data)
|
await _insert_wireless_metrics(session, data)
|
||||||
|
elif metric_type == "snmp_custom":
|
||||||
|
await _insert_snmp_custom_metrics(session, data)
|
||||||
else:
|
else:
|
||||||
logger.warning("Unknown metric type '%s' — skipping", metric_type)
|
logger.warning("Unknown metric type '%s' — NAKing for redelivery", metric_type)
|
||||||
await msg.ack()
|
await msg.nak()
|
||||||
return
|
return
|
||||||
|
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|||||||
Reference in New Issue
Block a user