ci: add GitHub Pages deployment workflow for docs site Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
379 lines
14 KiB
Python
379 lines
14 KiB
Python
"""SSH-based config capture service for RouterOS devices.
|
|
|
|
This service handles:
|
|
1. capture_export() — SSH to device, run /export compact, return stdout text
|
|
2. capture_binary_backup() — SSH to device, trigger /system backup save, SFTP-download result
|
|
3. run_backup() — Orchestrate a full backup: capture + git commit + DB record
|
|
|
|
All functions are async (asyncssh is asyncio-native).
|
|
|
|
Security policy:
|
|
known_hosts=None is intentional — RouterOS devices use self-signed SSH host keys
|
|
that change on reset or key regeneration. This mirrors InsecureSkipVerify=true
|
|
used in the poller's TLS connection. The threat model accepts device impersonation
|
|
risk in exchange for operational simplicity (no pre-enrollment of host keys needed).
|
|
See Pitfall 2 in 04-RESEARCH.md.
|
|
|
|
pygit2 calls are synchronous C bindings and MUST be wrapped in run_in_executor.
|
|
See Pitfall 3 in 04-RESEARCH.md.
|
|
|
|
Phase 30: ALL backups (manual, scheduled, pre-restore) are encrypted via OpenBao
|
|
Transit (Tier 2) before git commit. The server retains decrypt capability for
|
|
on-demand viewing. Raw files in git are ciphertext; the API decrypts on GET.
|
|
"""
|
|
|
|
import asyncio
|
|
import base64
|
|
import io
|
|
import json
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
|
|
import asyncssh
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.config import settings
|
|
from app.database import AdminAsyncSessionLocal, set_tenant_context
|
|
from app.models.config_backup import ConfigBackupRun
|
|
from app.models.device import Device
|
|
from app.services import git_store
|
|
from app.services.crypto import decrypt_credentials_hybrid
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Fixed backup file name on device flash — overwrites on each run so files
|
|
# don't accumulate. See Pitfall 4 in 04-RESEARCH.md.
|
|
_BACKUP_NAME = "portal-backup"
|
|
|
|
|
|
async def capture_export(
|
|
ip: str,
|
|
port: int = 22,
|
|
username: str = "",
|
|
password: str = "",
|
|
) -> str:
|
|
"""SSH to a RouterOS device and capture /export compact output.
|
|
|
|
Args:
|
|
ip: Device IP address.
|
|
port: SSH port (default 22; RouterOS default is 22).
|
|
username: SSH login username.
|
|
password: SSH login password.
|
|
|
|
Returns:
|
|
The raw RSC text from /export compact (may include RouterOS header line).
|
|
|
|
Raises:
|
|
asyncssh.Error: On SSH connection or command execution failure.
|
|
"""
|
|
async with asyncssh.connect(
|
|
ip,
|
|
port=port,
|
|
username=username,
|
|
password=password,
|
|
known_hosts=None, # RouterOS self-signed host keys — see module docstring
|
|
connect_timeout=30,
|
|
) as conn:
|
|
result = await conn.run("/export compact", check=True)
|
|
return result.stdout
|
|
|
|
|
|
async def capture_binary_backup(
|
|
ip: str,
|
|
port: int = 22,
|
|
username: str = "",
|
|
password: str = "",
|
|
) -> bytes:
|
|
"""SSH to a RouterOS device, create a binary backup, SFTP-download it, then clean up.
|
|
|
|
Uses a fixed backup name ({_BACKUP_NAME}.backup) so the file overwrites
|
|
on subsequent runs, preventing flash storage accumulation.
|
|
|
|
The cleanup (removing the file from device flash) runs in a try/finally
|
|
block so cleanup failures don't mask the actual backup error but are
|
|
logged for observability. See Pitfall 4 in 04-RESEARCH.md.
|
|
|
|
Args:
|
|
ip: Device IP address.
|
|
port: SSH port (default 22).
|
|
username: SSH login username.
|
|
password: SSH login password.
|
|
|
|
Returns:
|
|
Raw bytes of the binary backup file.
|
|
|
|
Raises:
|
|
asyncssh.Error: On SSH connection, command, or SFTP failure.
|
|
"""
|
|
async with asyncssh.connect(
|
|
ip,
|
|
port=port,
|
|
username=username,
|
|
password=password,
|
|
known_hosts=None,
|
|
connect_timeout=30,
|
|
) as conn:
|
|
# Step 1: Trigger backup creation on device flash.
|
|
await conn.run(
|
|
f"/system backup save name={_BACKUP_NAME} dont-encrypt=yes",
|
|
check=True,
|
|
)
|
|
|
|
buf = io.BytesIO()
|
|
try:
|
|
# Step 2: SFTP-download the backup file.
|
|
async with conn.start_sftp_client() as sftp:
|
|
async with sftp.open(f"{_BACKUP_NAME}.backup", "rb") as f:
|
|
buf.write(await f.read())
|
|
finally:
|
|
# Step 3: Remove backup file from device flash (best-effort cleanup).
|
|
try:
|
|
await conn.run(f"/file remove {_BACKUP_NAME}.backup", check=True)
|
|
except Exception as cleanup_err:
|
|
logger.warning(
|
|
"Failed to remove backup file from device %s: %s",
|
|
ip,
|
|
cleanup_err,
|
|
)
|
|
|
|
return buf.getvalue()
|
|
|
|
|
|
async def run_backup(
|
|
device_id: str,
|
|
tenant_id: str,
|
|
trigger_type: str,
|
|
db_session: AsyncSession | None = None,
|
|
) -> dict:
|
|
"""Orchestrate a full config backup for a device.
|
|
|
|
Steps:
|
|
1. Load device from DB (ip_address, encrypted_credentials).
|
|
2. Decrypt credentials using crypto.decrypt_credentials().
|
|
3. Capture /export compact and binary backup concurrently via asyncio.gather().
|
|
4. Compute line delta vs the most recent export.rsc in git (None for first backup).
|
|
5. Commit both files to the tenant's bare git repo (run_in_executor for pygit2).
|
|
6. Insert ConfigBackupRun record with commit SHA, trigger type, line deltas.
|
|
7. Return summary dict.
|
|
|
|
Args:
|
|
device_id: Device UUID as string.
|
|
tenant_id: Tenant UUID as string.
|
|
trigger_type: 'scheduled' | 'manual' | 'pre-restore'
|
|
db_session: Optional AsyncSession with RLS context already set.
|
|
If None, uses AdminAsyncSessionLocal (for scheduler context).
|
|
|
|
Returns:
|
|
Dict: {"commit_sha": str, "trigger_type": str, "lines_added": int|None, "lines_removed": int|None}
|
|
|
|
Raises:
|
|
ValueError: If device not found or missing credentials.
|
|
asyncssh.Error: On SSH/SFTP failure.
|
|
"""
|
|
loop = asyncio.get_event_loop()
|
|
ts = datetime.now(timezone.utc).isoformat()
|
|
|
|
# -----------------------------------------------------------------------
|
|
# Step 1: Load device from DB
|
|
# -----------------------------------------------------------------------
|
|
if db_session is not None:
|
|
session = db_session
|
|
should_close = False
|
|
else:
|
|
# Scheduler context: use admin session (cross-tenant; RLS bypassed)
|
|
session = AdminAsyncSessionLocal()
|
|
should_close = True
|
|
|
|
try:
|
|
from sqlalchemy import select
|
|
|
|
if should_close:
|
|
# Admin session doesn't have RLS context — query directly.
|
|
result = await session.execute(
|
|
select(Device).where(
|
|
Device.id == device_id, # type: ignore[arg-type]
|
|
Device.tenant_id == tenant_id, # type: ignore[arg-type]
|
|
)
|
|
)
|
|
else:
|
|
result = await session.execute(
|
|
select(Device).where(Device.id == device_id) # type: ignore[arg-type]
|
|
)
|
|
|
|
device = result.scalar_one_or_none()
|
|
if device is None:
|
|
raise ValueError(f"Device {device_id!r} not found for tenant {tenant_id!r}")
|
|
|
|
if not device.encrypted_credentials_transit and not device.encrypted_credentials:
|
|
raise ValueError(
|
|
f"Device {device_id!r} has no stored credentials — cannot perform backup"
|
|
)
|
|
|
|
# -----------------------------------------------------------------------
|
|
# Step 2: Decrypt credentials (dual-read: Transit preferred, legacy fallback)
|
|
# -----------------------------------------------------------------------
|
|
key = settings.get_encryption_key_bytes()
|
|
creds_json = await decrypt_credentials_hybrid(
|
|
device.encrypted_credentials_transit,
|
|
device.encrypted_credentials,
|
|
str(device.tenant_id),
|
|
key,
|
|
)
|
|
creds = json.loads(creds_json)
|
|
ssh_username = creds.get("username", "")
|
|
ssh_password = creds.get("password", "")
|
|
ip = device.ip_address
|
|
|
|
hostname = device.hostname or ip
|
|
|
|
# -----------------------------------------------------------------------
|
|
# Step 3: Capture export and binary backup concurrently
|
|
# -----------------------------------------------------------------------
|
|
logger.info(
|
|
"Starting %s backup for device %s (%s) tenant %s",
|
|
trigger_type,
|
|
hostname,
|
|
ip,
|
|
tenant_id,
|
|
)
|
|
|
|
export_text, binary_backup = await asyncio.gather(
|
|
capture_export(ip, username=ssh_username, password=ssh_password),
|
|
capture_binary_backup(ip, username=ssh_username, password=ssh_password),
|
|
)
|
|
|
|
# -----------------------------------------------------------------------
|
|
# Step 4: Compute line delta vs prior version
|
|
# -----------------------------------------------------------------------
|
|
lines_added: int | None = None
|
|
lines_removed: int | None = None
|
|
|
|
prior_commits = await loop.run_in_executor(
|
|
None, git_store.list_device_commits, tenant_id, device_id
|
|
)
|
|
|
|
if prior_commits:
|
|
try:
|
|
prior_export_bytes = await loop.run_in_executor(
|
|
None, git_store.read_file, tenant_id, prior_commits[0]["sha"], device_id, "export.rsc"
|
|
)
|
|
prior_text = prior_export_bytes.decode("utf-8", errors="replace")
|
|
lines_added, lines_removed = await loop.run_in_executor(
|
|
None, git_store.compute_line_delta, prior_text, export_text
|
|
)
|
|
except Exception as delta_err:
|
|
logger.warning(
|
|
"Failed to compute line delta for device %s: %s",
|
|
device_id,
|
|
delta_err,
|
|
)
|
|
# Keep lines_added/lines_removed as None on error — non-fatal
|
|
else:
|
|
# First backup: all lines are "added", none removed
|
|
all_lines = len(export_text.splitlines())
|
|
lines_added = all_lines
|
|
lines_removed = 0
|
|
|
|
# -----------------------------------------------------------------------
|
|
# Step 5: Encrypt ALL backups via Transit (Tier 2: OpenBao Transit)
|
|
# -----------------------------------------------------------------------
|
|
encryption_tier: int | None = None
|
|
git_export_content = export_text
|
|
git_binary_content = binary_backup
|
|
|
|
try:
|
|
from app.services.crypto import encrypt_data_transit
|
|
|
|
encrypted_export = await encrypt_data_transit(
|
|
export_text, tenant_id
|
|
)
|
|
encrypted_binary = await encrypt_data_transit(
|
|
base64.b64encode(binary_backup).decode(), tenant_id
|
|
)
|
|
# Transit ciphertext is text — store directly in git
|
|
git_export_content = encrypted_export
|
|
git_binary_content = encrypted_binary.encode("utf-8")
|
|
encryption_tier = 2
|
|
logger.info(
|
|
"Tier 2 Transit encryption applied for %s backup of device %s",
|
|
trigger_type,
|
|
device_id,
|
|
)
|
|
except Exception as enc_err:
|
|
# Transit unavailable — fall back to plaintext (non-fatal)
|
|
logger.warning(
|
|
"Transit encryption failed for %s backup of device %s, "
|
|
"storing plaintext: %s",
|
|
trigger_type,
|
|
device_id,
|
|
enc_err,
|
|
)
|
|
# Keep encryption_tier = None (plaintext fallback)
|
|
|
|
# -----------------------------------------------------------------------
|
|
# Step 6: Commit to git (wrapped in run_in_executor — pygit2 is sync C bindings)
|
|
# -----------------------------------------------------------------------
|
|
commit_message = (
|
|
f"{trigger_type}: {hostname} ({ip}) at {ts}"
|
|
)
|
|
|
|
commit_sha = await loop.run_in_executor(
|
|
None,
|
|
git_store.commit_backup,
|
|
tenant_id,
|
|
device_id,
|
|
git_export_content,
|
|
git_binary_content,
|
|
commit_message,
|
|
)
|
|
|
|
logger.info(
|
|
"Committed backup for device %s to git SHA %s (tier=%s)",
|
|
device_id,
|
|
commit_sha[:8],
|
|
encryption_tier,
|
|
)
|
|
|
|
# -----------------------------------------------------------------------
|
|
# Step 7: Insert ConfigBackupRun record
|
|
# -----------------------------------------------------------------------
|
|
if not should_close:
|
|
# RLS-scoped session from API context — record directly
|
|
backup_run = ConfigBackupRun(
|
|
device_id=device.id,
|
|
tenant_id=device.tenant_id,
|
|
commit_sha=commit_sha,
|
|
trigger_type=trigger_type,
|
|
lines_added=lines_added,
|
|
lines_removed=lines_removed,
|
|
encryption_tier=encryption_tier,
|
|
)
|
|
session.add(backup_run)
|
|
await session.flush()
|
|
else:
|
|
# Admin session — set tenant context before insert so RLS policy is satisfied
|
|
async with AdminAsyncSessionLocal() as admin_session:
|
|
await set_tenant_context(admin_session, str(device.tenant_id))
|
|
backup_run = ConfigBackupRun(
|
|
device_id=device.id,
|
|
tenant_id=device.tenant_id,
|
|
commit_sha=commit_sha,
|
|
trigger_type=trigger_type,
|
|
lines_added=lines_added,
|
|
lines_removed=lines_removed,
|
|
encryption_tier=encryption_tier,
|
|
)
|
|
admin_session.add(backup_run)
|
|
await admin_session.commit()
|
|
|
|
return {
|
|
"commit_sha": commit_sha,
|
|
"trigger_type": trigger_type,
|
|
"lines_added": lines_added,
|
|
"lines_removed": lines_removed,
|
|
}
|
|
|
|
finally:
|
|
if should_close:
|
|
await session.close()
|