Fix agent crash on missing DISPLAY and relay disconnect error

- Auto-detect DISPLAY on Linux by scanning /tmp/.X11-unix/ sockets,
  falling back to 'w' output, then :0 — runs before mss/pynput import
- ScreenCapture no longer raises on init failure; agent stays connected
  and notifies the viewer with an error message if capture unavailable
- stream_frames skips None frames instead of crashing the WebSocket
- Relay: check for websocket.disconnect message type to avoid
  'Cannot call receive once a disconnect message has been received'

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
monoadmin
2026-04-10 16:38:58 -07:00
parent e808112aa4
commit c361922005
3 changed files with 84 additions and 13 deletions

View File

@@ -27,6 +27,46 @@ from typing import Optional
import argparse import argparse
import signal import signal
# ── Display detection (Linux) — must run before mss/pynput imports ────────────
def _ensure_display():
"""Auto-detect and set $DISPLAY on Linux if not already set."""
if platform.system() != "Linux":
return
if os.environ.get("DISPLAY"):
return
import glob
# 1. Look for X11 Unix sockets in /tmp/.X11-unix/
sockets = sorted(glob.glob("/tmp/.X11-unix/X*"))
if sockets:
num = sockets[0].replace("/tmp/.X11-unix/X", "")
os.environ["DISPLAY"] = f":{num}"
log.info(f"Auto-detected DISPLAY=:{num}")
return
# 2. Ask 'w' which displays logged-in users are on
try:
out = subprocess.check_output(
["bash", "-c", "w -h 2>/dev/null | awk '{print $3}' | grep '^:' | head -1"],
timeout=2, text=True
).strip()
if out.startswith(":"):
os.environ["DISPLAY"] = out
log.info(f"Auto-detected DISPLAY={out} from w")
return
except Exception:
pass
# 3. Fallback — :0 is correct on most single-user desktop systems
os.environ["DISPLAY"] = ":0"
log.info("No display found via socket scan — falling back to DISPLAY=:0")
# Run before importing mss / pynput (they read DISPLAY at import time on Linux)
_ensure_display()
# Third-party — installed via requirements.txt / bundled by PyInstaller # Third-party — installed via requirements.txt / bundled by PyInstaller
import httpx import httpx
import websockets import websockets
@@ -128,20 +168,34 @@ class ScreenCapture:
self._sct = None self._sct = None
def __enter__(self): def __enter__(self):
self._sct = mss() try:
self._sct = mss()
except Exception as e:
log.warning(f"Screen capture unavailable: {e}")
self._sct = None
return self return self
def __exit__(self, *args): def __exit__(self, *args):
if self._sct: if self._sct:
self._sct.close() self._sct.close()
def capture(self) -> bytes: @property
monitor = self._sct.monitors[1] # Primary monitor def available(self) -> bool:
img = self._sct.grab(monitor) return self._sct is not None
pil = Image.frombytes("RGB", img.size, img.bgra, "raw", "BGRX")
buf = BytesIO() def capture(self) -> Optional[bytes]:
pil.save(buf, format="JPEG", quality=self.quality, optimize=False) if not self._sct:
return buf.getvalue() return None
try:
monitor = self._sct.monitors[1] # Primary monitor
img = self._sct.grab(monitor)
pil = Image.frombytes("RGB", img.size, img.bgra, "raw", "BGRX")
buf = BytesIO()
pil.save(buf, format="JPEG", quality=self.quality, optimize=False)
return buf.getvalue()
except Exception as e:
log.warning(f"Frame capture failed: {e}")
return None
@property @property
def frame_delay(self) -> float: def frame_delay(self) -> float:
@@ -292,16 +346,23 @@ class Agent:
async def _message_loop(self, ws): async def _message_loop(self, ws):
with ScreenCapture(fps=15, quality=60) as screen: with ScreenCapture(fps=15, quality=60) as screen:
if not screen.available:
log.warning(
"Screen capture unavailable — agent will stay connected "
"but cannot stream. Check that $DISPLAY is set."
)
stream_task: Optional[asyncio.Task] = None stream_task: Optional[asyncio.Task] = None
async def stream_frames(): async def stream_frames():
while self._streaming and not self._stop_event.is_set(): while self._streaming and not self._stop_event.is_set():
t0 = time.monotonic() t0 = time.monotonic()
try: frame = screen.capture()
frame = screen.capture() if frame:
await ws.send(frame) try:
except Exception: await ws.send(frame)
break except Exception:
break
elapsed = time.monotonic() - t0 elapsed = time.monotonic() - t0
delay = max(0, screen.frame_delay - elapsed) delay = max(0, screen.frame_delay - elapsed)
await asyncio.sleep(delay) await asyncio.sleep(delay)
@@ -323,6 +384,12 @@ class Agent:
log.info(f"Viewer connected — session {session_id}") log.info(f"Viewer connected — session {session_id}")
self._streaming = True self._streaming = True
self._active_session = session_id self._active_session = session_id
if not screen.available:
# Tell viewer why they won't see frames
await ws.send(json.dumps({
"type": "error",
"message": "Screen capture unavailable on this machine (no display). Set $DISPLAY and restart the agent.",
}))
if stream_task and not stream_task.done(): if stream_task and not stream_task.done():
stream_task.cancel() stream_task.cancel()
stream_task = asyncio.create_task(stream_frames()) stream_task = asyncio.create_task(stream_frames())

View File

@@ -113,6 +113,10 @@ async def agent_endpoint(
while True: while True:
msg = await websocket.receive() msg = await websocket.receive()
# Client sent a close frame — exit cleanly
if msg.get("type") == "websocket.disconnect":
break
if "bytes" in msg and msg["bytes"]: if "bytes" in msg and msg["bytes"]:
# Binary = JPEG frame → forward to all viewers watching this machine # Binary = JPEG frame → forward to all viewers watching this machine
frame_data = msg["bytes"] frame_data = msg["bytes"]