From f8c0da0ebf36427c95a4e78bdd3eea27889a67cc Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Thu, 4 Jun 2026 14:53:56 +0300 Subject: [PATCH] =?UTF-8?q?fix(neuron):=20TP-vision=20Stage=200=20?= =?UTF-8?q?=E2=80=94=20reject=20image=20requests=20on=20the=20TP=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TP inference path has no vision tower, and the TP dispatch in chat_completion / inference_stream returns before the VisionUnsupported guard runs — so an image request to a TP-loaded model (e.g. beast's tp=2 Qwen3.6-27B) was silently dropped and answered from text alone, the exact issue-#3 confident-hallucination pattern Stage C killed for single-GPU. Add the request_has_images → VisionUnsupported guard to both chat_completion_tp and inference_tp_stream, before prefill / before the SSE stream opens, so beast returns a clean 400 vision_unsupported. The guard is unconditional for now (TP has no tower); Stage 3 makes it conditional on the TP model's has_vision once real TP-vision lands. Detection is covered by the existing request_has_images unit test; the guard itself is cuda-gated (validated by CI's CUDA type-check). Refs TP-vision plan Stage 0. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/scheduled_tasks.lock | 1 + crates/neuron/src/harness/candle.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 .claude/scheduled_tasks.lock diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock new file mode 100644 index 0000000..7850727 --- /dev/null +++ b/.claude/scheduled_tasks.lock @@ -0,0 +1 @@ +{"sessionId":"a27586bb-2ca7-4e92-8d82-12f41b39f392","pid":3106893,"procStart":"59753850","acquiredAt":1780571089798} \ No newline at end of file diff --git a/crates/neuron/src/harness/candle.rs b/crates/neuron/src/harness/candle.rs index 701f323..7daad46 100644 --- a/crates/neuron/src/harness/candle.rs +++ b/crates/neuron/src/harness/candle.rs @@ -2739,6 +2739,18 @@ impl CandleHarness { return Err(poisoned_error(&model_id)); } + // Stage 0 (TP-vision): the TP path has no vision tower yet, so + // an image-bearing request can't be honoured. Reject it cleanly + // with `vision_unsupported` instead of silently dropping the + // image and answering from text alone (the issue-#3 confident- + // hallucination pattern). Made conditional on the TP model's + // `has_vision` once Stage 3 wires real TP-vision. + if request_has_images(&request) { + let _g = span.enter(); + tracing::warn!("TP chat_completion: rejecting image request, TP vision unsupported"); + return Err(InferenceError::VisionUnsupported { model_id }); + } + let tp_for_marker = Arc::clone(&tp); let handle = tokio::spawn(chat_completion_tp_inner(tp, request).instrument(span.clone())); match handle.await { @@ -2816,6 +2828,20 @@ impl CandleHarness { return Err(poisoned_error(&request.model)); } + // Stage 0 (TP-vision): reject image requests on the TP streaming + // path before opening the SSE stream — the TP path has no vision + // tower yet, so honouring the image is impossible and silently + // dropping it would hallucinate. Returns a clean 400; made + // conditional on `has_vision` in Stage 3. + if request_has_images(&request) { + tracing::warn!( + "TP chat_completion (stream): rejecting image request, TP vision unsupported" + ); + return Err(InferenceError::VisionUnsupported { + model_id: request.model.clone(), + }); + } + let prompt = build_prompt_for_request(tp.chat_template.as_deref(), &request); let encoding = tp .tokenizer