feat(helexa-acp): route Qwen3 inline <think> blocks to reasoning
Some checks failed
build-prerelease / Build cortex binary (push) Blocked by required conditions
CI / Test (push) Waiting to run
CI / Format (push) Successful in 26s
build-prerelease / Resolve version stamps (push) Successful in 30s
CI / Clippy (push) Successful in 2m40s
build-prerelease / Build neuron-ada (push) Has been cancelled
build-prerelease / Package cortex RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ada RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ampere RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-blackwell RPM (push) Has been cancelled
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
build-prerelease / Build neuron-blackwell (push) Has been cancelled
CI / Build cortex SRPM (push) Has been cancelled
CI / Build neuron SRPM (push) Has been cancelled
CI / Publish cortex to COPR (push) Has been cancelled
build-prerelease / Build neuron-ampere (push) Has been cancelled
CI / Publish neuron to COPR (push) Has been cancelled
CI / Bump version in source (push) Has been cancelled

Qwen3 emits chain-of-thought as literal <think>...</think> tags
inside delta.content rather than via the separate reasoning_content
field — so without parsing the markers, the thinking shows up in
the message pane as ordinary text. Add a small ThinkParser in
qwen3.rs (same chunk-boundary discipline as ToolCallParser) and
stage it after the tool-call parser in decode_stream: text events
from the tool-call parser are fed in and split into TextDelta /
ReasoningDelta. Zed now renders thinking in its dedicated thought
UI; visible answer text stays in the message pane.

The parking-lot entry from the plan is now closed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 12:30:25 +03:00
parent 5a0861d639
commit 1c16732668
2 changed files with 332 additions and 2 deletions

View File

@@ -296,6 +296,49 @@ mod tests {
assert_eq!(events.len(), 4); assert_eq!(events.len(), 4);
} }
#[tokio::test]
async fn decodes_qwen3_inline_think_block_to_reasoning_deltas() {
// Qwen3-shaped output: a `<think>…</think>` block lives
// inside `delta.content`. The decoder should route bytes
// inside the block to ReasoningDelta and the surrounding
// content to TextDelta. Marker boundaries split across
// chunks to exercise the parser's prefix-hold logic.
let sse = fake_sse(vec![
r#"{"choices":[{"delta":{"content":"<thi"}}]}"#,
r#"{"choices":[{"delta":{"content":"nk>internal reasoning</thi"}}]}"#,
r#"{"choices":[{"delta":{"content":"nk>visible answer"}}]}"#,
r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
"[DONE]",
]);
let events: Vec<_> = decode_stream(sse, CancellationToken::new())
.collect::<Vec<_>>()
.await
.into_iter()
.map(|r| r.unwrap())
.collect();
let text: String = events
.iter()
.filter_map(|e| match e {
CompletionEvent::TextDelta(t) => Some(t.as_str()),
_ => None,
})
.collect();
let reasoning: String = events
.iter()
.filter_map(|e| match e {
CompletionEvent::ReasoningDelta(r) => Some(r.as_str()),
_ => None,
})
.collect();
assert_eq!(text, "visible answer");
assert_eq!(reasoning, "internal reasoning");
assert!(matches!(
events.last(),
Some(CompletionEvent::Finish { reason }) if reason.as_deref() == Some("stop")
));
}
#[tokio::test] #[tokio::test]
async fn decodes_qwen3_inline_tool_call_from_content_stream() { async fn decodes_qwen3_inline_tool_call_from_content_stream() {
// Qwen3-shaped output: `<tool_call>{…}</tool_call>` inside // Qwen3-shaped output: `<tool_call>{…}</tool_call>` inside
@@ -638,6 +681,11 @@ where
// structured tool-call events, holding back only the suffix // structured tool-call events, holding back only the suffix
// bytes that could be the start of a marker. // bytes that could be the start of a marker.
let mut qwen_parser = crate::qwen3::ToolCallParser::new(); let mut qwen_parser = crate::qwen3::ToolCallParser::new();
// Same shape, second stage: take the plain-text events out
// of the tool-call parser and split off `<think>…</think>`
// blocks into ReasoningDelta so Zed can render them in its
// dedicated thought UI rather than the message pane.
let mut think_parser = crate::qwen3::ThinkParser::new();
let mut sse = Box::pin(sse); let mut sse = Box::pin(sse);
loop { loop {
@@ -678,7 +726,21 @@ where
for ev in qwen_parser.feed(&text) { for ev in qwen_parser.feed(&text) {
match ev { match ev {
crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => { crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
yield Ok(CompletionEvent::TextDelta(t)); for tev in think_parser.feed(&t) {
match tev {
crate::qwen3::ThinkEvent::Text(s)
if !s.is_empty() =>
{
yield Ok(CompletionEvent::TextDelta(s));
}
crate::qwen3::ThinkEvent::Reasoning(s)
if !s.is_empty() =>
{
yield Ok(CompletionEvent::ReasoningDelta(s));
}
_ => {}
}
}
} }
crate::qwen3::ParserEvent::Text(_) => {} crate::qwen3::ParserEvent::Text(_) => {}
crate::qwen3::ParserEvent::Start { index, name } => { crate::qwen3::ParserEvent::Start { index, name } => {
@@ -747,7 +809,21 @@ where
for ev in qwen_parser.finish() { for ev in qwen_parser.finish() {
match ev { match ev {
crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => { crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
yield Ok(CompletionEvent::TextDelta(t)); for tev in think_parser.feed(&t) {
match tev {
crate::qwen3::ThinkEvent::Text(s)
if !s.is_empty() =>
{
yield Ok(CompletionEvent::TextDelta(s));
}
crate::qwen3::ThinkEvent::Reasoning(s)
if !s.is_empty() =>
{
yield Ok(CompletionEvent::ReasoningDelta(s));
}
_ => {}
}
}
} }
crate::qwen3::ParserEvent::Text(_) => {} crate::qwen3::ParserEvent::Text(_) => {}
crate::qwen3::ParserEvent::Start { index, name } => { crate::qwen3::ParserEvent::Start { index, name } => {
@@ -768,6 +844,21 @@ where
} }
} }
} }
// Flush the think parser too — any
// unclosed <think> at stream end becomes
// a final ReasoningDelta rather than
// being lost.
for tev in think_parser.finish() {
match tev {
crate::qwen3::ThinkEvent::Text(s) if !s.is_empty() => {
yield Ok(CompletionEvent::TextDelta(s));
}
crate::qwen3::ThinkEvent::Reasoning(s) if !s.is_empty() => {
yield Ok(CompletionEvent::ReasoningDelta(s));
}
_ => {}
}
}
yield Ok(CompletionEvent::Finish { reason: Some(reason) }); yield Ok(CompletionEvent::Finish { reason: Some(reason) });
} }
} }

View File

@@ -37,6 +37,11 @@ const TOOL_CALL_OPEN: &str = "<tool_call>";
/// One closing marker. Length 12. /// One closing marker. Length 12.
const TOOL_CALL_CLOSE: &str = "</tool_call>"; const TOOL_CALL_CLOSE: &str = "</tool_call>";
/// Reasoning open. Length 7.
const THINK_OPEN: &str = "<think>";
/// Reasoning close. Length 8.
const THINK_CLOSE: &str = "</think>";
// ── System-prompt-side rendering ──────────────────────────────────── // ── System-prompt-side rendering ────────────────────────────────────
/// Append-this-to-the-system-prompt block describing the available /// Append-this-to-the-system-prompt block describing the available
@@ -295,6 +300,98 @@ struct ToolCallBody {
arguments: serde_json::Value, arguments: serde_json::Value,
} }
// ── Think-block parser ──────────────────────────────────────────────
/// Events from [`ThinkParser`]. Plain text outside any `<think>`
/// block stays `Text`; bytes between `<think>` and `</think>` become
/// `Reasoning` so the agent can route them to a thought-channel
/// notification (Zed surfaces these in a dedicated UI affordance
/// rather than the main message pane).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ThinkEvent {
Text(String),
Reasoning(String),
}
/// Streaming parser for Qwen3-style inline reasoning. Same
/// chunk-boundary discipline as [`ToolCallParser`]: hold back only
/// the suffix that could be the start of the marker we're scanning
/// for. Markers (`<think>`, `</think>`) never nest; a stray
/// `</think>` outside a block is emitted as text (the model
/// occasionally writes the tag conversationally).
#[derive(Debug, Default)]
pub struct ThinkParser {
buffer: String,
in_think: bool,
}
impl ThinkParser {
pub fn new() -> Self {
Self::default()
}
pub fn feed(&mut self, chunk: &str) -> Vec<ThinkEvent> {
self.buffer.push_str(chunk);
self.drain()
}
/// Flush any buffered tail at end-of-stream. If we end mid-think
/// (no closing tag arrived), emit what we have as reasoning so
/// the partial thought isn't silently dropped.
pub fn finish(&mut self) -> Vec<ThinkEvent> {
let mut events = self.drain();
if !self.buffer.is_empty() {
let raw = std::mem::take(&mut self.buffer);
if self.in_think {
events.push(ThinkEvent::Reasoning(raw));
} else {
events.push(ThinkEvent::Text(raw));
}
}
self.in_think = false;
events
}
fn drain(&mut self) -> Vec<ThinkEvent> {
let mut events = Vec::new();
loop {
if self.in_think {
if let Some(end) = self.buffer.find(THINK_CLOSE) {
let body = self.buffer[..end].to_string();
if !body.is_empty() {
events.push(ThinkEvent::Reasoning(body));
}
self.buffer.drain(..end + THINK_CLOSE.len());
self.in_think = false;
} else {
let hold = longest_marker_prefix_suffix(&self.buffer, THINK_CLOSE);
let safe = self.buffer.len() - hold;
if safe > 0 {
let r: String = self.buffer.drain(..safe).collect();
events.push(ThinkEvent::Reasoning(r));
}
return events;
}
} else if let Some(start) = self.buffer.find(THINK_OPEN) {
let text = self.buffer[..start].to_string();
if !text.is_empty() {
events.push(ThinkEvent::Text(text));
}
self.buffer.drain(..start + THINK_OPEN.len());
self.in_think = true;
} else {
let hold = longest_marker_prefix_suffix(&self.buffer, THINK_OPEN);
let safe = self.buffer.len() - hold;
if safe > 0 {
let t: String = self.buffer.drain(..safe).collect();
events.push(ThinkEvent::Text(t));
}
return events;
}
}
}
}
// ── Tests ─────────────────────────────────────────────────────────── // ── Tests ───────────────────────────────────────────────────────────
#[cfg(test)] #[cfg(test)]
@@ -600,6 +697,148 @@ mod tests {
); );
} }
// ── ThinkParser ─────────────────────────────────────────────────
fn drive_think(parser: &mut ThinkParser, chunks: &[&str]) -> Vec<ThinkEvent> {
let mut events = Vec::new();
for c in chunks {
events.extend(parser.feed(c));
}
events.extend(parser.finish());
events
}
#[test]
fn think_plain_text_passes_through() {
let mut p = ThinkParser::new();
let events = drive_think(&mut p, &["hello ", "world"]);
assert_eq!(events.len(), 2);
assert_eq!(events[0], ThinkEvent::Text("hello ".into()));
assert_eq!(events[1], ThinkEvent::Text("world".into()));
}
#[test]
fn think_splits_text_reasoning_text() {
let mut p = ThinkParser::new();
let events = drive_think(&mut p, &["before <think>thinking now</think> after"]);
assert_eq!(events[0], ThinkEvent::Text("before ".into()));
assert_eq!(events[1], ThinkEvent::Reasoning("thinking now".into()));
assert_eq!(events[2], ThinkEvent::Text(" after".into()));
}
#[test]
fn think_open_marker_split_across_chunks() {
let mut p = ThinkParser::new();
let events = drive_think(&mut p, &["pre <", "think>middle</think> post"]);
let texts: String = events
.iter()
.filter_map(|e| match e {
ThinkEvent::Text(t) => Some(t.as_str()),
_ => None,
})
.collect();
let reasoning: String = events
.iter()
.filter_map(|e| match e {
ThinkEvent::Reasoning(r) => Some(r.as_str()),
_ => None,
})
.collect();
assert_eq!(texts, "pre post");
assert_eq!(reasoning, "middle");
}
#[test]
fn think_close_marker_split_across_chunks() {
let mut p = ThinkParser::new();
let events = drive_think(&mut p, &["a<think>b<", "/think>c"]);
let reasoning: String = events
.iter()
.filter_map(|e| match e {
ThinkEvent::Reasoning(r) => Some(r.as_str()),
_ => None,
})
.collect();
assert_eq!(reasoning, "b");
let last_text = events.iter().rev().find_map(|e| match e {
ThinkEvent::Text(t) => Some(t.as_str()),
_ => None,
});
assert_eq!(last_text, Some("c"));
}
#[test]
fn think_one_byte_at_a_time_matches_single_chunk() {
let input = "x<think>internal</think>y";
let mut single = ThinkParser::new();
let single_events = drive_think(&mut single, &[input]);
let chunks: Vec<String> = input.chars().map(|c| c.to_string()).collect();
let chunk_refs: Vec<&str> = chunks.iter().map(|s| s.as_str()).collect();
let mut byte = ThinkParser::new();
let byte_events = drive_think(&mut byte, &chunk_refs);
let text = |evs: &[ThinkEvent]| -> (String, String) {
let mut t = String::new();
let mut r = String::new();
for e in evs {
match e {
ThinkEvent::Text(s) => t.push_str(s),
ThinkEvent::Reasoning(s) => r.push_str(s),
}
}
(t, r)
};
assert_eq!(text(&single_events), text(&byte_events));
assert_eq!(text(&byte_events), ("xy".into(), "internal".into()));
}
#[test]
fn think_empty_block_emits_no_reasoning_event() {
let mut p = ThinkParser::new();
let events = drive_think(&mut p, &["<think></think>real"]);
// No Reasoning event for an empty <think></think>; just the
// trailing text.
assert!(
!events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_))),
"events: {events:?}"
);
assert_eq!(events[0], ThinkEvent::Text("real".into()));
}
#[test]
fn think_unterminated_block_flushes_as_reasoning_on_finish() {
let mut p = ThinkParser::new();
let events = drive_think(&mut p, &["x<think>thinking but no close"]);
assert_eq!(events[0], ThinkEvent::Text("x".into()));
let reasoning: String = events
.iter()
.filter_map(|e| match e {
ThinkEvent::Reasoning(r) => Some(r.as_str()),
_ => None,
})
.collect();
assert_eq!(reasoning, "thinking but no close");
}
#[test]
fn think_bare_close_marker_passes_through_as_text() {
// Model emits </think> with no preceding <think>. Treat the
// bare close as ordinary text — the agent doesn't try to
// retroactively reclassify earlier deltas.
let mut p = ThinkParser::new();
let events = drive_think(&mut p, &["hello </think> world"]);
let text: String = events
.iter()
.filter_map(|e| match e {
ThinkEvent::Text(t) => Some(t.as_str()),
_ => None,
})
.collect();
assert_eq!(text, "hello </think> world");
assert!(!events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_))));
}
#[test] #[test]
fn quoted_lt_inside_args_does_not_trigger_marker() { fn quoted_lt_inside_args_does_not_trigger_marker() {
// Sanity: a string value that happens to contain "<tool" is // Sanity: a string value that happens to contain "<tool" is