fix: cap NATS JetStream streams to prevent OOM crash

WIRELESS_REGISTRATIONS stream had a 256MB MaxBytes cap in a 256MB
container — guaranteed to crash under load. ALERT_EVENTS and
OPERATION_EVENTS had no byte limit at all.

- Reduce WIRELESS_REGISTRATIONS MaxBytes from 256MB to 128MB
- Add 16MB MaxBytes cap to ALERT_EVENTS and OPERATION_EVENTS
- Bump NATS container memory limit from 256MB to 384MB
- Add restart: unless-stopped to NATS in base compose
- Bump version to 9.8.2

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jason Staack
2026-03-23 07:52:07 -05:00
parent 231154d28b
commit e1d81b40ac
10 changed files with 13 additions and 10 deletions

View File

@@ -1 +1 @@
9.8.1 9.8.2

View File

@@ -144,7 +144,7 @@ class Settings(BaseSettings):
# App settings # App settings
APP_NAME: str = "TOD - The Other Dude" APP_NAME: str = "TOD - The Other Dude"
APP_VERSION: str = "9.8.1" APP_VERSION: str = "9.8.2"
DEBUG: bool = False DEBUG: bool = False
@field_validator("CREDENTIAL_ENCRYPTION_KEY") @field_validator("CREDENTIAL_ENCRYPTION_KEY")

View File

@@ -63,6 +63,7 @@ async def ensure_sse_streams() -> None:
name="ALERT_EVENTS", name="ALERT_EVENTS",
subjects=["alert.fired.>", "alert.resolved.>"], subjects=["alert.fired.>", "alert.resolved.>"],
max_age=3600, # 1 hour retention max_age=3600, # 1 hour retention
max_bytes=16 * 1024 * 1024, # 16MB cap
) )
) )
logger.info("nats.stream.ensured", stream="ALERT_EVENTS") logger.info("nats.stream.ensured", stream="ALERT_EVENTS")
@@ -72,6 +73,7 @@ async def ensure_sse_streams() -> None:
name="OPERATION_EVENTS", name="OPERATION_EVENTS",
subjects=["firmware.progress.>"], subjects=["firmware.progress.>"],
max_age=3600, # 1 hour retention max_age=3600, # 1 hour retention
max_bytes=16 * 1024 * 1024, # 16MB cap
) )
) )
logger.info("nats.stream.ensured", stream="OPERATION_EVENTS") logger.info("nats.stream.ensured", stream="OPERATION_EVENTS")

View File

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "the-other-dude-backend" name = "the-other-dude-backend"
version = "9.8.1" version = "9.8.2"
description = "MikroTik Fleet Management Portal - Backend API" description = "MikroTik Fleet Management Portal - Backend API"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [

View File

@@ -70,10 +70,11 @@ services:
interval: 5s interval: 5s
timeout: 5s timeout: 5s
retries: 5 retries: 5
restart: unless-stopped
deploy: deploy:
resources: resources:
limits: limits:
memory: 256M memory: 384M
networks: networks:
- tod - tod

View File

@@ -9,7 +9,7 @@ TOD uses Pydantic Settings for configuration. All values can be set via environm
| Variable | Default | Description | | Variable | Default | Description |
|----------|---------|-------------| |----------|---------|-------------|
| `APP_NAME` | `TOD - The Other Dude` | Application display name | | `APP_NAME` | `TOD - The Other Dude` | Application display name |
| `APP_VERSION` | `9.8.1` | Semantic version string (see VERSION file at project root) | | `APP_VERSION` | `9.8.2` | Semantic version string (see VERSION file at project root) |
| `TOD_VERSION` | `latest` | Docker image tag for pre-built images (set by setup.py) | | `TOD_VERSION` | `latest` | Docker image tag for pre-built images (set by setup.py) |
| `ENVIRONMENT` | `dev` | Runtime environment: `dev`, `staging`, or `production` | | `ENVIRONMENT` | `dev` | Runtime environment: `dev`, `staging`, or `production` |
| `DEBUG` | `false` | Enable debug mode | | `DEBUG` | `false` | Enable debug mode |

View File

@@ -55,7 +55,7 @@
"Zero-knowledge authentication (SRP-6a)" "Zero-knowledge authentication (SRP-6a)"
], ],
"softwareRequirements": "Docker, PostgreSQL 17, Redis, NATS", "softwareRequirements": "Docker, PostgreSQL 17, Redis, NATS",
"softwareVersion": "9.8.1", "softwareVersion": "9.8.2",
"license": "https://mariadb.com/bsl11/" "license": "https://mariadb.com/bsl11/"
} }
</script> </script>
@@ -547,7 +547,7 @@
<section class="wp-section"> <section class="wp-section">
<h2>Status</h2> <h2>Status</h2>
<table class="wp-status-table"> <table class="wp-status-table">
<tr><td>Version</td><td>9.8.1</td></tr> <tr><td>Version</td><td>9.8.2</td></tr>
<tr><td>License</td><td>BSL 1.1 (converts to Apache 2.0 in 2030)</td></tr> <tr><td>License</td><td>BSL 1.1 (converts to Apache 2.0 in 2030)</td></tr>
<tr><td>Free tier</td><td>250 devices</td></tr> <tr><td>Free tier</td><td>250 devices</td></tr>
<tr><td>Stability</td><td>Breaking changes expected before v11</td></tr> <tr><td>Stability</td><td>Breaking changes expected before v11</td></tr>

View File

@@ -1,7 +1,7 @@
{ {
"name": "frontend", "name": "frontend",
"private": true, "private": true,
"version": "9.8.1", "version": "9.8.2",
"type": "module", "type": "module",
"scripts": { "scripts": {
"dev": "vite", "dev": "vite",

View File

@@ -3,7 +3,7 @@ name: tod
description: The Other Dude — MikroTik fleet management platform description: The Other Dude — MikroTik fleet management platform
type: application type: application
version: 1.0.0 version: 1.0.0
appVersion: "9.8.1" appVersion: "9.8.2"
kubeVersion: ">=1.28.0-0" kubeVersion: ">=1.28.0-0"
keywords: keywords:
- mikrotik - mikrotik

View File

@@ -163,7 +163,7 @@ func NewPublisher(natsURL string) (*Publisher, error) {
Name: "WIRELESS_REGISTRATIONS", Name: "WIRELESS_REGISTRATIONS",
Subjects: []string{"wireless.registrations.>"}, Subjects: []string{"wireless.registrations.>"},
MaxAge: 30 * 24 * time.Hour, // 30-day retention MaxAge: 30 * 24 * time.Hour, // 30-day retention
MaxBytes: 256 * 1024 * 1024, // 256MB cap MaxBytes: 128 * 1024 * 1024, // 128MB cap
Discard: jetstream.DiscardOld, Discard: jetstream.DiscardOld,
}) })
if err != nil { if err != nil {