From 1c16732668a366fb76d4e8dc3dc247978e2745c4 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Thu, 28 May 2026 12:30:25 +0300 Subject: [PATCH] feat(helexa-acp): route Qwen3 inline blocks to reasoning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qwen3 emits chain-of-thought as literal ... tags inside delta.content rather than via the separate reasoning_content field — so without parsing the markers, the thinking shows up in the message pane as ordinary text. Add a small ThinkParser in qwen3.rs (same chunk-boundary discipline as ToolCallParser) and stage it after the tool-call parser in decode_stream: text events from the tool-call parser are fed in and split into TextDelta / ReasoningDelta. Zed now renders thinking in its dedicated thought UI; visible answer text stays in the message pane. The parking-lot entry from the plan is now closed. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/helexa-acp/src/provider/openai_chat.rs | 95 ++++++- crates/helexa-acp/src/qwen3.rs | 239 ++++++++++++++++++ 2 files changed, 332 insertions(+), 2 deletions(-) diff --git a/crates/helexa-acp/src/provider/openai_chat.rs b/crates/helexa-acp/src/provider/openai_chat.rs index 77c33d2..dc50ec3 100644 --- a/crates/helexa-acp/src/provider/openai_chat.rs +++ b/crates/helexa-acp/src/provider/openai_chat.rs @@ -296,6 +296,49 @@ mod tests { assert_eq!(events.len(), 4); } + #[tokio::test] + async fn decodes_qwen3_inline_think_block_to_reasoning_deltas() { + // Qwen3-shaped output: a `` block lives + // inside `delta.content`. The decoder should route bytes + // inside the block to ReasoningDelta and the surrounding + // content to TextDelta. Marker boundaries split across + // chunks to exercise the parser's prefix-hold logic. + let sse = fake_sse(vec![ + r#"{"choices":[{"delta":{"content":"internal reasoningvisible answer"}}]}"#, + r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#, + "[DONE]", + ]); + let events: Vec<_> = decode_stream(sse, CancellationToken::new()) + .collect::>() + .await + .into_iter() + .map(|r| r.unwrap()) + .collect(); + + let text: String = events + .iter() + .filter_map(|e| match e { + CompletionEvent::TextDelta(t) => Some(t.as_str()), + _ => None, + }) + .collect(); + let reasoning: String = events + .iter() + .filter_map(|e| match e { + CompletionEvent::ReasoningDelta(r) => Some(r.as_str()), + _ => None, + }) + .collect(); + assert_eq!(text, "visible answer"); + assert_eq!(reasoning, "internal reasoning"); + assert!(matches!( + events.last(), + Some(CompletionEvent::Finish { reason }) if reason.as_deref() == Some("stop") + )); + } + #[tokio::test] async fn decodes_qwen3_inline_tool_call_from_content_stream() { // Qwen3-shaped output: `{…}` inside @@ -638,6 +681,11 @@ where // structured tool-call events, holding back only the suffix // bytes that could be the start of a marker. let mut qwen_parser = crate::qwen3::ToolCallParser::new(); + // Same shape, second stage: take the plain-text events out + // of the tool-call parser and split off `` + // blocks into ReasoningDelta so Zed can render them in its + // dedicated thought UI rather than the message pane. + let mut think_parser = crate::qwen3::ThinkParser::new(); let mut sse = Box::pin(sse); loop { @@ -678,7 +726,21 @@ where for ev in qwen_parser.feed(&text) { match ev { crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => { - yield Ok(CompletionEvent::TextDelta(t)); + for tev in think_parser.feed(&t) { + match tev { + crate::qwen3::ThinkEvent::Text(s) + if !s.is_empty() => + { + yield Ok(CompletionEvent::TextDelta(s)); + } + crate::qwen3::ThinkEvent::Reasoning(s) + if !s.is_empty() => + { + yield Ok(CompletionEvent::ReasoningDelta(s)); + } + _ => {} + } + } } crate::qwen3::ParserEvent::Text(_) => {} crate::qwen3::ParserEvent::Start { index, name } => { @@ -747,7 +809,21 @@ where for ev in qwen_parser.finish() { match ev { crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => { - yield Ok(CompletionEvent::TextDelta(t)); + for tev in think_parser.feed(&t) { + match tev { + crate::qwen3::ThinkEvent::Text(s) + if !s.is_empty() => + { + yield Ok(CompletionEvent::TextDelta(s)); + } + crate::qwen3::ThinkEvent::Reasoning(s) + if !s.is_empty() => + { + yield Ok(CompletionEvent::ReasoningDelta(s)); + } + _ => {} + } + } } crate::qwen3::ParserEvent::Text(_) => {} crate::qwen3::ParserEvent::Start { index, name } => { @@ -768,6 +844,21 @@ where } } } + // Flush the think parser too — any + // unclosed at stream end becomes + // a final ReasoningDelta rather than + // being lost. + for tev in think_parser.finish() { + match tev { + crate::qwen3::ThinkEvent::Text(s) if !s.is_empty() => { + yield Ok(CompletionEvent::TextDelta(s)); + } + crate::qwen3::ThinkEvent::Reasoning(s) if !s.is_empty() => { + yield Ok(CompletionEvent::ReasoningDelta(s)); + } + _ => {} + } + } yield Ok(CompletionEvent::Finish { reason: Some(reason) }); } } diff --git a/crates/helexa-acp/src/qwen3.rs b/crates/helexa-acp/src/qwen3.rs index 474e8e7..80baa4f 100644 --- a/crates/helexa-acp/src/qwen3.rs +++ b/crates/helexa-acp/src/qwen3.rs @@ -37,6 +37,11 @@ const TOOL_CALL_OPEN: &str = ""; /// One closing marker. Length 12. const TOOL_CALL_CLOSE: &str = ""; +/// Reasoning open. Length 7. +const THINK_OPEN: &str = ""; +/// Reasoning close. Length 8. +const THINK_CLOSE: &str = ""; + // ── System-prompt-side rendering ──────────────────────────────────── /// Append-this-to-the-system-prompt block describing the available @@ -295,6 +300,98 @@ struct ToolCallBody { arguments: serde_json::Value, } +// ── Think-block parser ────────────────────────────────────────────── + +/// Events from [`ThinkParser`]. Plain text outside any `` +/// block stays `Text`; bytes between `` and `` become +/// `Reasoning` so the agent can route them to a thought-channel +/// notification (Zed surfaces these in a dedicated UI affordance +/// rather than the main message pane). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ThinkEvent { + Text(String), + Reasoning(String), +} + +/// Streaming parser for Qwen3-style inline reasoning. Same +/// chunk-boundary discipline as [`ToolCallParser`]: hold back only +/// the suffix that could be the start of the marker we're scanning +/// for. Markers (``, ``) never nest; a stray +/// `` outside a block is emitted as text (the model +/// occasionally writes the tag conversationally). +#[derive(Debug, Default)] +pub struct ThinkParser { + buffer: String, + in_think: bool, +} + +impl ThinkParser { + pub fn new() -> Self { + Self::default() + } + + pub fn feed(&mut self, chunk: &str) -> Vec { + self.buffer.push_str(chunk); + self.drain() + } + + /// Flush any buffered tail at end-of-stream. If we end mid-think + /// (no closing tag arrived), emit what we have as reasoning so + /// the partial thought isn't silently dropped. + pub fn finish(&mut self) -> Vec { + let mut events = self.drain(); + if !self.buffer.is_empty() { + let raw = std::mem::take(&mut self.buffer); + if self.in_think { + events.push(ThinkEvent::Reasoning(raw)); + } else { + events.push(ThinkEvent::Text(raw)); + } + } + self.in_think = false; + events + } + + fn drain(&mut self) -> Vec { + let mut events = Vec::new(); + loop { + if self.in_think { + if let Some(end) = self.buffer.find(THINK_CLOSE) { + let body = self.buffer[..end].to_string(); + if !body.is_empty() { + events.push(ThinkEvent::Reasoning(body)); + } + self.buffer.drain(..end + THINK_CLOSE.len()); + self.in_think = false; + } else { + let hold = longest_marker_prefix_suffix(&self.buffer, THINK_CLOSE); + let safe = self.buffer.len() - hold; + if safe > 0 { + let r: String = self.buffer.drain(..safe).collect(); + events.push(ThinkEvent::Reasoning(r)); + } + return events; + } + } else if let Some(start) = self.buffer.find(THINK_OPEN) { + let text = self.buffer[..start].to_string(); + if !text.is_empty() { + events.push(ThinkEvent::Text(text)); + } + self.buffer.drain(..start + THINK_OPEN.len()); + self.in_think = true; + } else { + let hold = longest_marker_prefix_suffix(&self.buffer, THINK_OPEN); + let safe = self.buffer.len() - hold; + if safe > 0 { + let t: String = self.buffer.drain(..safe).collect(); + events.push(ThinkEvent::Text(t)); + } + return events; + } + } + } +} + // ── Tests ─────────────────────────────────────────────────────────── #[cfg(test)] @@ -600,6 +697,148 @@ mod tests { ); } + // ── ThinkParser ───────────────────────────────────────────────── + + fn drive_think(parser: &mut ThinkParser, chunks: &[&str]) -> Vec { + let mut events = Vec::new(); + for c in chunks { + events.extend(parser.feed(c)); + } + events.extend(parser.finish()); + events + } + + #[test] + fn think_plain_text_passes_through() { + let mut p = ThinkParser::new(); + let events = drive_think(&mut p, &["hello ", "world"]); + assert_eq!(events.len(), 2); + assert_eq!(events[0], ThinkEvent::Text("hello ".into())); + assert_eq!(events[1], ThinkEvent::Text("world".into())); + } + + #[test] + fn think_splits_text_reasoning_text() { + let mut p = ThinkParser::new(); + let events = drive_think(&mut p, &["before thinking now after"]); + assert_eq!(events[0], ThinkEvent::Text("before ".into())); + assert_eq!(events[1], ThinkEvent::Reasoning("thinking now".into())); + assert_eq!(events[2], ThinkEvent::Text(" after".into())); + } + + #[test] + fn think_open_marker_split_across_chunks() { + let mut p = ThinkParser::new(); + let events = drive_think(&mut p, &["pre <", "think>middle post"]); + let texts: String = events + .iter() + .filter_map(|e| match e { + ThinkEvent::Text(t) => Some(t.as_str()), + _ => None, + }) + .collect(); + let reasoning: String = events + .iter() + .filter_map(|e| match e { + ThinkEvent::Reasoning(r) => Some(r.as_str()), + _ => None, + }) + .collect(); + assert_eq!(texts, "pre post"); + assert_eq!(reasoning, "middle"); + } + + #[test] + fn think_close_marker_split_across_chunks() { + let mut p = ThinkParser::new(); + let events = drive_think(&mut p, &["ab<", "/think>c"]); + let reasoning: String = events + .iter() + .filter_map(|e| match e { + ThinkEvent::Reasoning(r) => Some(r.as_str()), + _ => None, + }) + .collect(); + assert_eq!(reasoning, "b"); + let last_text = events.iter().rev().find_map(|e| match e { + ThinkEvent::Text(t) => Some(t.as_str()), + _ => None, + }); + assert_eq!(last_text, Some("c")); + } + + #[test] + fn think_one_byte_at_a_time_matches_single_chunk() { + let input = "xinternaly"; + let mut single = ThinkParser::new(); + let single_events = drive_think(&mut single, &[input]); + + let chunks: Vec = input.chars().map(|c| c.to_string()).collect(); + let chunk_refs: Vec<&str> = chunks.iter().map(|s| s.as_str()).collect(); + let mut byte = ThinkParser::new(); + let byte_events = drive_think(&mut byte, &chunk_refs); + + let text = |evs: &[ThinkEvent]| -> (String, String) { + let mut t = String::new(); + let mut r = String::new(); + for e in evs { + match e { + ThinkEvent::Text(s) => t.push_str(s), + ThinkEvent::Reasoning(s) => r.push_str(s), + } + } + (t, r) + }; + assert_eq!(text(&single_events), text(&byte_events)); + assert_eq!(text(&byte_events), ("xy".into(), "internal".into())); + } + + #[test] + fn think_empty_block_emits_no_reasoning_event() { + let mut p = ThinkParser::new(); + let events = drive_think(&mut p, &["real"]); + // No Reasoning event for an empty ; just the + // trailing text. + assert!( + !events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_))), + "events: {events:?}" + ); + assert_eq!(events[0], ThinkEvent::Text("real".into())); + } + + #[test] + fn think_unterminated_block_flushes_as_reasoning_on_finish() { + let mut p = ThinkParser::new(); + let events = drive_think(&mut p, &["xthinking but no close"]); + assert_eq!(events[0], ThinkEvent::Text("x".into())); + let reasoning: String = events + .iter() + .filter_map(|e| match e { + ThinkEvent::Reasoning(r) => Some(r.as_str()), + _ => None, + }) + .collect(); + assert_eq!(reasoning, "thinking but no close"); + } + + #[test] + fn think_bare_close_marker_passes_through_as_text() { + // Model emits with no preceding . Treat the + // bare close as ordinary text — the agent doesn't try to + // retroactively reclassify earlier deltas. + let mut p = ThinkParser::new(); + let events = drive_think(&mut p, &["hello world"]); + let text: String = events + .iter() + .filter_map(|e| match e { + ThinkEvent::Text(t) => Some(t.as_str()), + _ => None, + }) + .collect(); + assert_eq!(text, "hello world"); + assert!(!events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_)))); + } + #[test] fn quoted_lt_inside_args_does_not_trigger_marker() { // Sanity: a string value that happens to contain "