feat(helexa-acp): route Qwen3 inline <think> blocks to reasoning

Qwen3 emits chain-of-thought as literal <think>...</think> tags inside delta.content rather than via the separate reasoning_content field — so without parsing the markers, the thinking shows up in the message pane as ordinary text. Add a small ThinkParser in qwen3.rs (same chunk-boundary discipline as ToolCallParser) and stage it after the tool-call parser in decode_stream: text events from the tool-call parser are fed in and split into TextDelta / ReasoningDelta. Zed now renders thinking in its dedicated thought UI; visible answer text stays in the message pane. The parking-lot entry from the plan is now closed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 12:30:25 +03:00
parent 5a0861d639
commit 1c16732668
2 changed files with 332 additions and 2 deletions
--- a/crates/helexa-acp/src/provider/openai_chat.rs
+++ b/crates/helexa-acp/src/provider/openai_chat.rs
@@ -296,6 +296,49 @@ mod tests {
        assert_eq!(events.len(), 4);
    }
    #[tokio::test]
    async fn decodes_qwen3_inline_think_block_to_reasoning_deltas() {
        // Qwen3-shaped output: a `<think>…</think>` block lives
        // inside `delta.content`. The decoder should route bytes
        // inside the block to ReasoningDelta and the surrounding
        // content to TextDelta. Marker boundaries split across
        // chunks to exercise the parser's prefix-hold logic.
        let sse = fake_sse(vec![
            r#"{"choices":[{"delta":{"content":"<thi"}}]}"#,
            r#"{"choices":[{"delta":{"content":"nk>internal reasoning</thi"}}]}"#,
            r#"{"choices":[{"delta":{"content":"nk>visible answer"}}]}"#,
            r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
            "[DONE]",
        ]);
        let events: Vec<_> = decode_stream(sse, CancellationToken::new())
            .collect::<Vec<_>>()
            .await
            .into_iter()
            .map(|r| r.unwrap())
            .collect();
        let text: String = events
            .iter()
            .filter_map(|e| match e {
                CompletionEvent::TextDelta(t) => Some(t.as_str()),
                _ => None,
            })
            .collect();
        let reasoning: String = events
            .iter()
            .filter_map(|e| match e {
                CompletionEvent::ReasoningDelta(r) => Some(r.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(text, "visible answer");
        assert_eq!(reasoning, "internal reasoning");
        assert!(matches!(
            events.last(),
            Some(CompletionEvent::Finish { reason }) if reason.as_deref() == Some("stop")
        ));
    }
    #[tokio::test]
    async fn decodes_qwen3_inline_tool_call_from_content_stream() {
        // Qwen3-shaped output: `<tool_call>{…}</tool_call>` inside
@@ -638,6 +681,11 @@ where
        // structured tool-call events, holding back only the suffix
        // bytes that could be the start of a marker.
        let mut qwen_parser = crate::qwen3::ToolCallParser::new();
        // Same shape, second stage: take the plain-text events out
        // of the tool-call parser and split off `<think>…</think>`
        // blocks into ReasoningDelta so Zed can render them in its
        // dedicated thought UI rather than the message pane.
        let mut think_parser = crate::qwen3::ThinkParser::new();
        let mut sse = Box::pin(sse);
        loop {
@@ -678,7 +726,21 @@ where
                            for ev in qwen_parser.feed(&text) {
                                match ev {
                                    crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
-                                        yield Ok(CompletionEvent::TextDelta(t));
+                                        for tev in think_parser.feed(&t) {
                                            match tev {
                                                crate::qwen3::ThinkEvent::Text(s)
                                                    if !s.is_empty() =>
                                                {
                                                    yield Ok(CompletionEvent::TextDelta(s));
                                                }
                                                crate::qwen3::ThinkEvent::Reasoning(s)
                                                    if !s.is_empty() =>
                                                {
                                                    yield Ok(CompletionEvent::ReasoningDelta(s));
                                                }
                                                _ => {}
                                            }
                                        }
                                    }
                                    crate::qwen3::ParserEvent::Text(_) => {}
                                    crate::qwen3::ParserEvent::Start { index, name } => {
@@ -747,7 +809,21 @@ where
                            for ev in qwen_parser.finish() {
                                match ev {
                                    crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
-                                        yield Ok(CompletionEvent::TextDelta(t));
+                                        for tev in think_parser.feed(&t) {
                                            match tev {
                                                crate::qwen3::ThinkEvent::Text(s)
                                                    if !s.is_empty() =>
                                                {
                                                    yield Ok(CompletionEvent::TextDelta(s));
                                                }
                                                crate::qwen3::ThinkEvent::Reasoning(s)
                                                    if !s.is_empty() =>
                                                {
                                                    yield Ok(CompletionEvent::ReasoningDelta(s));
                                                }
                                                _ => {}
                                            }
                                        }
                                    }
                                    crate::qwen3::ParserEvent::Text(_) => {}
                                    crate::qwen3::ParserEvent::Start { index, name } => {
@@ -768,6 +844,21 @@ where
                                    }
                                }
                            }
                            // Flush the think parser too — any
                            // unclosed <think> at stream end becomes
                            // a final ReasoningDelta rather than
                            // being lost.
                            for tev in think_parser.finish() {
                                match tev {
                                    crate::qwen3::ThinkEvent::Text(s) if !s.is_empty() => {
                                        yield Ok(CompletionEvent::TextDelta(s));
                                    }
                                    crate::qwen3::ThinkEvent::Reasoning(s) if !s.is_empty() => {
                                        yield Ok(CompletionEvent::ReasoningDelta(s));
                                    }
                                    _ => {}
                                }
                            }
                            yield Ok(CompletionEvent::Finish { reason: Some(reason) });
                        }
                    }
--- a/crates/helexa-acp/src/qwen3.rs
+++ b/crates/helexa-acp/src/qwen3.rs
@@ -37,6 +37,11 @@ const TOOL_CALL_OPEN: &str = "<tool_call>";
 /// One closing marker. Length 12.
 const TOOL_CALL_CLOSE: &str = "</tool_call>";
 /// Reasoning open. Length 7.
 const THINK_OPEN: &str = "<think>";
 /// Reasoning close. Length 8.
 const THINK_CLOSE: &str = "</think>";
 // ── System-prompt-side rendering ────────────────────────────────────
 /// Append-this-to-the-system-prompt block describing the available
@@ -295,6 +300,98 @@ struct ToolCallBody {
    arguments: serde_json::Value,
 }
 // ── Think-block parser ──────────────────────────────────────────────
 /// Events from [`ThinkParser`]. Plain text outside any `<think>`
 /// block stays `Text`; bytes between `<think>` and `</think>` become
 /// `Reasoning` so the agent can route them to a thought-channel
 /// notification (Zed surfaces these in a dedicated UI affordance
 /// rather than the main message pane).
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum ThinkEvent {
    Text(String),
    Reasoning(String),
 }
 /// Streaming parser for Qwen3-style inline reasoning. Same
 /// chunk-boundary discipline as [`ToolCallParser`]: hold back only
 /// the suffix that could be the start of the marker we're scanning
 /// for. Markers (`<think>`, `</think>`) never nest; a stray
 /// `</think>` outside a block is emitted as text (the model
 /// occasionally writes the tag conversationally).
 #[derive(Debug, Default)]
 pub struct ThinkParser {
    buffer: String,
    in_think: bool,
 }
 impl ThinkParser {
    pub fn new() -> Self {
        Self::default()
    }
    pub fn feed(&mut self, chunk: &str) -> Vec<ThinkEvent> {
        self.buffer.push_str(chunk);
        self.drain()
    }
    /// Flush any buffered tail at end-of-stream. If we end mid-think
    /// (no closing tag arrived), emit what we have as reasoning so
    /// the partial thought isn't silently dropped.
    pub fn finish(&mut self) -> Vec<ThinkEvent> {
        let mut events = self.drain();
        if !self.buffer.is_empty() {
            let raw = std::mem::take(&mut self.buffer);
            if self.in_think {
                events.push(ThinkEvent::Reasoning(raw));
            } else {
                events.push(ThinkEvent::Text(raw));
            }
        }
        self.in_think = false;
        events
    }
    fn drain(&mut self) -> Vec<ThinkEvent> {
        let mut events = Vec::new();
        loop {
            if self.in_think {
                if let Some(end) = self.buffer.find(THINK_CLOSE) {
                    let body = self.buffer[..end].to_string();
                    if !body.is_empty() {
                        events.push(ThinkEvent::Reasoning(body));
                    }
                    self.buffer.drain(..end + THINK_CLOSE.len());
                    self.in_think = false;
                } else {
                    let hold = longest_marker_prefix_suffix(&self.buffer, THINK_CLOSE);
                    let safe = self.buffer.len() - hold;
                    if safe > 0 {
                        let r: String = self.buffer.drain(..safe).collect();
                        events.push(ThinkEvent::Reasoning(r));
                    }
                    return events;
                }
            } else if let Some(start) = self.buffer.find(THINK_OPEN) {
                let text = self.buffer[..start].to_string();
                if !text.is_empty() {
                    events.push(ThinkEvent::Text(text));
                }
                self.buffer.drain(..start + THINK_OPEN.len());
                self.in_think = true;
            } else {
                let hold = longest_marker_prefix_suffix(&self.buffer, THINK_OPEN);
                let safe = self.buffer.len() - hold;
                if safe > 0 {
                    let t: String = self.buffer.drain(..safe).collect();
                    events.push(ThinkEvent::Text(t));
                }
                return events;
            }
        }
    }
 }
 // ── Tests ───────────────────────────────────────────────────────────
 #[cfg(test)]
@@ -600,6 +697,148 @@ mod tests {
        );
    }
    // ── ThinkParser ─────────────────────────────────────────────────
    fn drive_think(parser: &mut ThinkParser, chunks: &[&str]) -> Vec<ThinkEvent> {
        let mut events = Vec::new();
        for c in chunks {
            events.extend(parser.feed(c));
        }
        events.extend(parser.finish());
        events
    }
    #[test]
    fn think_plain_text_passes_through() {
        let mut p = ThinkParser::new();
        let events = drive_think(&mut p, &["hello ", "world"]);
        assert_eq!(events.len(), 2);
        assert_eq!(events[0], ThinkEvent::Text("hello ".into()));
        assert_eq!(events[1], ThinkEvent::Text("world".into()));
    }
    #[test]
    fn think_splits_text_reasoning_text() {
        let mut p = ThinkParser::new();
        let events = drive_think(&mut p, &["before <think>thinking now</think> after"]);
        assert_eq!(events[0], ThinkEvent::Text("before ".into()));
        assert_eq!(events[1], ThinkEvent::Reasoning("thinking now".into()));
        assert_eq!(events[2], ThinkEvent::Text(" after".into()));
    }
    #[test]
    fn think_open_marker_split_across_chunks() {
        let mut p = ThinkParser::new();
        let events = drive_think(&mut p, &["pre <", "think>middle</think> post"]);
        let texts: String = events
            .iter()
            .filter_map(|e| match e {
                ThinkEvent::Text(t) => Some(t.as_str()),
                _ => None,
            })
            .collect();
        let reasoning: String = events
            .iter()
            .filter_map(|e| match e {
                ThinkEvent::Reasoning(r) => Some(r.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(texts, "pre  post");
        assert_eq!(reasoning, "middle");
    }
    #[test]
    fn think_close_marker_split_across_chunks() {
        let mut p = ThinkParser::new();
        let events = drive_think(&mut p, &["a<think>b<", "/think>c"]);
        let reasoning: String = events
            .iter()
            .filter_map(|e| match e {
                ThinkEvent::Reasoning(r) => Some(r.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(reasoning, "b");
        let last_text = events.iter().rev().find_map(|e| match e {
            ThinkEvent::Text(t) => Some(t.as_str()),
            _ => None,
        });
        assert_eq!(last_text, Some("c"));
    }
    #[test]
    fn think_one_byte_at_a_time_matches_single_chunk() {
        let input = "x<think>internal</think>y";
        let mut single = ThinkParser::new();
        let single_events = drive_think(&mut single, &[input]);
        let chunks: Vec<String> = input.chars().map(|c| c.to_string()).collect();
        let chunk_refs: Vec<&str> = chunks.iter().map(|s| s.as_str()).collect();
        let mut byte = ThinkParser::new();
        let byte_events = drive_think(&mut byte, &chunk_refs);
        let text = |evs: &[ThinkEvent]| -> (String, String) {
            let mut t = String::new();
            let mut r = String::new();
            for e in evs {
                match e {
                    ThinkEvent::Text(s) => t.push_str(s),
                    ThinkEvent::Reasoning(s) => r.push_str(s),
                }
            }
            (t, r)
        };
        assert_eq!(text(&single_events), text(&byte_events));
        assert_eq!(text(&byte_events), ("xy".into(), "internal".into()));
    }
    #[test]
    fn think_empty_block_emits_no_reasoning_event() {
        let mut p = ThinkParser::new();
        let events = drive_think(&mut p, &["<think></think>real"]);
        // No Reasoning event for an empty <think></think>; just the
        // trailing text.
        assert!(
            !events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_))),
            "events: {events:?}"
        );
        assert_eq!(events[0], ThinkEvent::Text("real".into()));
    }
    #[test]
    fn think_unterminated_block_flushes_as_reasoning_on_finish() {
        let mut p = ThinkParser::new();
        let events = drive_think(&mut p, &["x<think>thinking but no close"]);
        assert_eq!(events[0], ThinkEvent::Text("x".into()));
        let reasoning: String = events
            .iter()
            .filter_map(|e| match e {
                ThinkEvent::Reasoning(r) => Some(r.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(reasoning, "thinking but no close");
    }
    #[test]
    fn think_bare_close_marker_passes_through_as_text() {
        // Model emits </think> with no preceding <think>. Treat the
        // bare close as ordinary text — the agent doesn't try to
        // retroactively reclassify earlier deltas.
        let mut p = ThinkParser::new();
        let events = drive_think(&mut p, &["hello </think> world"]);
        let text: String = events
            .iter()
            .filter_map(|e| match e {
                ThinkEvent::Text(t) => Some(t.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(text, "hello </think> world");
        assert!(!events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_))));
    }
    #[test]
    fn quoted_lt_inside_args_does_not_trigger_marker() {
        // Sanity: a string value that happens to contain "<tool" is