From 6927286cabcf3e91479b357c80703583165ec50b Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Sun, 31 May 2026 09:37:10 +0300 Subject: [PATCH] fix(neuron): clone id/model_id before TP spawn so wire projector can use them The Step 1 refactor moved the InferenceEvent receiver wrap to *after* the orchestration spawn in chat_completion_tp_stream, but the spawn moves both `id` and `model_id` into its async closure (used heavily by acquire_pool_lock, NCCL ops, and tracing). Result: borrowck error E0382 use-of-moved-value on the wire_chat::project_chat_stream call. The non-CUDA build doesn't exercise this branch (it lives behind `#[cfg(feature = "cuda")]`) which is why the workspace clippy/test gate passed locally and on the regular CI workflow. The RPM build workflow, which compiles with --features cuda, caught it (run 244 jobs 2/3/4 against beast / ampere / ada respectively, all the same error). Fix: snapshot `id` and `model_id` into `projector_id` / `projector_model_id` before the spawn, use those at the projector call site. The originals stay free to be moved into the closure. Co-Authored-By: Claude Opus 4.7 --- crates/neuron/src/harness/candle.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/crates/neuron/src/harness/candle.rs b/crates/neuron/src/harness/candle.rs index 357e15a..8cd095d 100644 --- a/crates/neuron/src/harness/candle.rs +++ b/crates/neuron/src/harness/candle.rs @@ -2265,6 +2265,13 @@ impl CandleHarness { let id = format!("chatcmpl-{:x}", unix_subsec_nanos()); let created = unix_now_secs(); let tokenizer = tp.tokenizer.clone(); + // The spawned orchestration task below consumes both `id` + // and `model_id` (tracing, pool lookups, NCCL ops use them + // heavily). The wire projector at the bottom of this fn + // also needs them to stamp request metadata onto every + // chunk. Clone here so each side owns its copy. + let projector_id = id.clone(); + let projector_model_id = model_id.clone(); // Bounded channel — back-pressures the producer when // downstream consumption (wire projector → SSE writer) is @@ -2537,8 +2544,11 @@ impl CandleHarness { // Wrap the InferenceEvent receiver in the OpenAI chat // projection so the HTTP handler keeps consuming - // ChatCompletionChunks unchanged. - let rx = wire_chat::project_chat_stream(event_rx, id, created, model_id); + // ChatCompletionChunks unchanged. Uses the clones we + // stashed before the spawn — the originals were moved + // into the orchestration task above. + let rx = + wire_chat::project_chat_stream(event_rx, projector_id, created, projector_model_id); Ok(rx) } }