feat(helexa-acp): route Qwen3 inline <think> blocks to reasoning
Some checks failed
build-prerelease / Build cortex binary (push) Blocked by required conditions
CI / Test (push) Waiting to run
CI / Format (push) Successful in 26s
build-prerelease / Resolve version stamps (push) Successful in 30s
CI / Clippy (push) Successful in 2m40s
build-prerelease / Build neuron-ada (push) Has been cancelled
build-prerelease / Package cortex RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ada RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ampere RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-blackwell RPM (push) Has been cancelled
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
build-prerelease / Build neuron-blackwell (push) Has been cancelled
CI / Build cortex SRPM (push) Has been cancelled
CI / Build neuron SRPM (push) Has been cancelled
CI / Publish cortex to COPR (push) Has been cancelled
build-prerelease / Build neuron-ampere (push) Has been cancelled
CI / Publish neuron to COPR (push) Has been cancelled
CI / Bump version in source (push) Has been cancelled
Some checks failed
build-prerelease / Build cortex binary (push) Blocked by required conditions
CI / Test (push) Waiting to run
CI / Format (push) Successful in 26s
build-prerelease / Resolve version stamps (push) Successful in 30s
CI / Clippy (push) Successful in 2m40s
build-prerelease / Build neuron-ada (push) Has been cancelled
build-prerelease / Package cortex RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ada RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ampere RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-blackwell RPM (push) Has been cancelled
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
build-prerelease / Build neuron-blackwell (push) Has been cancelled
CI / Build cortex SRPM (push) Has been cancelled
CI / Build neuron SRPM (push) Has been cancelled
CI / Publish cortex to COPR (push) Has been cancelled
build-prerelease / Build neuron-ampere (push) Has been cancelled
CI / Publish neuron to COPR (push) Has been cancelled
CI / Bump version in source (push) Has been cancelled
Qwen3 emits chain-of-thought as literal <think>...</think> tags inside delta.content rather than via the separate reasoning_content field — so without parsing the markers, the thinking shows up in the message pane as ordinary text. Add a small ThinkParser in qwen3.rs (same chunk-boundary discipline as ToolCallParser) and stage it after the tool-call parser in decode_stream: text events from the tool-call parser are fed in and split into TextDelta / ReasoningDelta. Zed now renders thinking in its dedicated thought UI; visible answer text stays in the message pane. The parking-lot entry from the plan is now closed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -296,6 +296,49 @@ mod tests {
|
|||||||
assert_eq!(events.len(), 4);
|
assert_eq!(events.len(), 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn decodes_qwen3_inline_think_block_to_reasoning_deltas() {
|
||||||
|
// Qwen3-shaped output: a `<think>…</think>` block lives
|
||||||
|
// inside `delta.content`. The decoder should route bytes
|
||||||
|
// inside the block to ReasoningDelta and the surrounding
|
||||||
|
// content to TextDelta. Marker boundaries split across
|
||||||
|
// chunks to exercise the parser's prefix-hold logic.
|
||||||
|
let sse = fake_sse(vec![
|
||||||
|
r#"{"choices":[{"delta":{"content":"<thi"}}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{"content":"nk>internal reasoning</thi"}}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{"content":"nk>visible answer"}}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
|
||||||
|
"[DONE]",
|
||||||
|
]);
|
||||||
|
let events: Vec<_> = decode_stream(sse, CancellationToken::new())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| r.unwrap())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let text: String = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| match e {
|
||||||
|
CompletionEvent::TextDelta(t) => Some(t.as_str()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let reasoning: String = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| match e {
|
||||||
|
CompletionEvent::ReasoningDelta(r) => Some(r.as_str()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
assert_eq!(text, "visible answer");
|
||||||
|
assert_eq!(reasoning, "internal reasoning");
|
||||||
|
assert!(matches!(
|
||||||
|
events.last(),
|
||||||
|
Some(CompletionEvent::Finish { reason }) if reason.as_deref() == Some("stop")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn decodes_qwen3_inline_tool_call_from_content_stream() {
|
async fn decodes_qwen3_inline_tool_call_from_content_stream() {
|
||||||
// Qwen3-shaped output: `<tool_call>{…}</tool_call>` inside
|
// Qwen3-shaped output: `<tool_call>{…}</tool_call>` inside
|
||||||
@@ -638,6 +681,11 @@ where
|
|||||||
// structured tool-call events, holding back only the suffix
|
// structured tool-call events, holding back only the suffix
|
||||||
// bytes that could be the start of a marker.
|
// bytes that could be the start of a marker.
|
||||||
let mut qwen_parser = crate::qwen3::ToolCallParser::new();
|
let mut qwen_parser = crate::qwen3::ToolCallParser::new();
|
||||||
|
// Same shape, second stage: take the plain-text events out
|
||||||
|
// of the tool-call parser and split off `<think>…</think>`
|
||||||
|
// blocks into ReasoningDelta so Zed can render them in its
|
||||||
|
// dedicated thought UI rather than the message pane.
|
||||||
|
let mut think_parser = crate::qwen3::ThinkParser::new();
|
||||||
|
|
||||||
let mut sse = Box::pin(sse);
|
let mut sse = Box::pin(sse);
|
||||||
loop {
|
loop {
|
||||||
@@ -678,7 +726,21 @@ where
|
|||||||
for ev in qwen_parser.feed(&text) {
|
for ev in qwen_parser.feed(&text) {
|
||||||
match ev {
|
match ev {
|
||||||
crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
|
crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
|
||||||
yield Ok(CompletionEvent::TextDelta(t));
|
for tev in think_parser.feed(&t) {
|
||||||
|
match tev {
|
||||||
|
crate::qwen3::ThinkEvent::Text(s)
|
||||||
|
if !s.is_empty() =>
|
||||||
|
{
|
||||||
|
yield Ok(CompletionEvent::TextDelta(s));
|
||||||
|
}
|
||||||
|
crate::qwen3::ThinkEvent::Reasoning(s)
|
||||||
|
if !s.is_empty() =>
|
||||||
|
{
|
||||||
|
yield Ok(CompletionEvent::ReasoningDelta(s));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
crate::qwen3::ParserEvent::Text(_) => {}
|
crate::qwen3::ParserEvent::Text(_) => {}
|
||||||
crate::qwen3::ParserEvent::Start { index, name } => {
|
crate::qwen3::ParserEvent::Start { index, name } => {
|
||||||
@@ -747,7 +809,21 @@ where
|
|||||||
for ev in qwen_parser.finish() {
|
for ev in qwen_parser.finish() {
|
||||||
match ev {
|
match ev {
|
||||||
crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
|
crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
|
||||||
yield Ok(CompletionEvent::TextDelta(t));
|
for tev in think_parser.feed(&t) {
|
||||||
|
match tev {
|
||||||
|
crate::qwen3::ThinkEvent::Text(s)
|
||||||
|
if !s.is_empty() =>
|
||||||
|
{
|
||||||
|
yield Ok(CompletionEvent::TextDelta(s));
|
||||||
|
}
|
||||||
|
crate::qwen3::ThinkEvent::Reasoning(s)
|
||||||
|
if !s.is_empty() =>
|
||||||
|
{
|
||||||
|
yield Ok(CompletionEvent::ReasoningDelta(s));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
crate::qwen3::ParserEvent::Text(_) => {}
|
crate::qwen3::ParserEvent::Text(_) => {}
|
||||||
crate::qwen3::ParserEvent::Start { index, name } => {
|
crate::qwen3::ParserEvent::Start { index, name } => {
|
||||||
@@ -768,6 +844,21 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Flush the think parser too — any
|
||||||
|
// unclosed <think> at stream end becomes
|
||||||
|
// a final ReasoningDelta rather than
|
||||||
|
// being lost.
|
||||||
|
for tev in think_parser.finish() {
|
||||||
|
match tev {
|
||||||
|
crate::qwen3::ThinkEvent::Text(s) if !s.is_empty() => {
|
||||||
|
yield Ok(CompletionEvent::TextDelta(s));
|
||||||
|
}
|
||||||
|
crate::qwen3::ThinkEvent::Reasoning(s) if !s.is_empty() => {
|
||||||
|
yield Ok(CompletionEvent::ReasoningDelta(s));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
yield Ok(CompletionEvent::Finish { reason: Some(reason) });
|
yield Ok(CompletionEvent::Finish { reason: Some(reason) });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,11 @@ const TOOL_CALL_OPEN: &str = "<tool_call>";
|
|||||||
/// One closing marker. Length 12.
|
/// One closing marker. Length 12.
|
||||||
const TOOL_CALL_CLOSE: &str = "</tool_call>";
|
const TOOL_CALL_CLOSE: &str = "</tool_call>";
|
||||||
|
|
||||||
|
/// Reasoning open. Length 7.
|
||||||
|
const THINK_OPEN: &str = "<think>";
|
||||||
|
/// Reasoning close. Length 8.
|
||||||
|
const THINK_CLOSE: &str = "</think>";
|
||||||
|
|
||||||
// ── System-prompt-side rendering ────────────────────────────────────
|
// ── System-prompt-side rendering ────────────────────────────────────
|
||||||
|
|
||||||
/// Append-this-to-the-system-prompt block describing the available
|
/// Append-this-to-the-system-prompt block describing the available
|
||||||
@@ -295,6 +300,98 @@ struct ToolCallBody {
|
|||||||
arguments: serde_json::Value,
|
arguments: serde_json::Value,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Think-block parser ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Events from [`ThinkParser`]. Plain text outside any `<think>`
|
||||||
|
/// block stays `Text`; bytes between `<think>` and `</think>` become
|
||||||
|
/// `Reasoning` so the agent can route them to a thought-channel
|
||||||
|
/// notification (Zed surfaces these in a dedicated UI affordance
|
||||||
|
/// rather than the main message pane).
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub enum ThinkEvent {
|
||||||
|
Text(String),
|
||||||
|
Reasoning(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Streaming parser for Qwen3-style inline reasoning. Same
|
||||||
|
/// chunk-boundary discipline as [`ToolCallParser`]: hold back only
|
||||||
|
/// the suffix that could be the start of the marker we're scanning
|
||||||
|
/// for. Markers (`<think>`, `</think>`) never nest; a stray
|
||||||
|
/// `</think>` outside a block is emitted as text (the model
|
||||||
|
/// occasionally writes the tag conversationally).
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct ThinkParser {
|
||||||
|
buffer: String,
|
||||||
|
in_think: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ThinkParser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn feed(&mut self, chunk: &str) -> Vec<ThinkEvent> {
|
||||||
|
self.buffer.push_str(chunk);
|
||||||
|
self.drain()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flush any buffered tail at end-of-stream. If we end mid-think
|
||||||
|
/// (no closing tag arrived), emit what we have as reasoning so
|
||||||
|
/// the partial thought isn't silently dropped.
|
||||||
|
pub fn finish(&mut self) -> Vec<ThinkEvent> {
|
||||||
|
let mut events = self.drain();
|
||||||
|
if !self.buffer.is_empty() {
|
||||||
|
let raw = std::mem::take(&mut self.buffer);
|
||||||
|
if self.in_think {
|
||||||
|
events.push(ThinkEvent::Reasoning(raw));
|
||||||
|
} else {
|
||||||
|
events.push(ThinkEvent::Text(raw));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.in_think = false;
|
||||||
|
events
|
||||||
|
}
|
||||||
|
|
||||||
|
fn drain(&mut self) -> Vec<ThinkEvent> {
|
||||||
|
let mut events = Vec::new();
|
||||||
|
loop {
|
||||||
|
if self.in_think {
|
||||||
|
if let Some(end) = self.buffer.find(THINK_CLOSE) {
|
||||||
|
let body = self.buffer[..end].to_string();
|
||||||
|
if !body.is_empty() {
|
||||||
|
events.push(ThinkEvent::Reasoning(body));
|
||||||
|
}
|
||||||
|
self.buffer.drain(..end + THINK_CLOSE.len());
|
||||||
|
self.in_think = false;
|
||||||
|
} else {
|
||||||
|
let hold = longest_marker_prefix_suffix(&self.buffer, THINK_CLOSE);
|
||||||
|
let safe = self.buffer.len() - hold;
|
||||||
|
if safe > 0 {
|
||||||
|
let r: String = self.buffer.drain(..safe).collect();
|
||||||
|
events.push(ThinkEvent::Reasoning(r));
|
||||||
|
}
|
||||||
|
return events;
|
||||||
|
}
|
||||||
|
} else if let Some(start) = self.buffer.find(THINK_OPEN) {
|
||||||
|
let text = self.buffer[..start].to_string();
|
||||||
|
if !text.is_empty() {
|
||||||
|
events.push(ThinkEvent::Text(text));
|
||||||
|
}
|
||||||
|
self.buffer.drain(..start + THINK_OPEN.len());
|
||||||
|
self.in_think = true;
|
||||||
|
} else {
|
||||||
|
let hold = longest_marker_prefix_suffix(&self.buffer, THINK_OPEN);
|
||||||
|
let safe = self.buffer.len() - hold;
|
||||||
|
if safe > 0 {
|
||||||
|
let t: String = self.buffer.drain(..safe).collect();
|
||||||
|
events.push(ThinkEvent::Text(t));
|
||||||
|
}
|
||||||
|
return events;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ── Tests ───────────────────────────────────────────────────────────
|
// ── Tests ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -600,6 +697,148 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── ThinkParser ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn drive_think(parser: &mut ThinkParser, chunks: &[&str]) -> Vec<ThinkEvent> {
|
||||||
|
let mut events = Vec::new();
|
||||||
|
for c in chunks {
|
||||||
|
events.extend(parser.feed(c));
|
||||||
|
}
|
||||||
|
events.extend(parser.finish());
|
||||||
|
events
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_plain_text_passes_through() {
|
||||||
|
let mut p = ThinkParser::new();
|
||||||
|
let events = drive_think(&mut p, &["hello ", "world"]);
|
||||||
|
assert_eq!(events.len(), 2);
|
||||||
|
assert_eq!(events[0], ThinkEvent::Text("hello ".into()));
|
||||||
|
assert_eq!(events[1], ThinkEvent::Text("world".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_splits_text_reasoning_text() {
|
||||||
|
let mut p = ThinkParser::new();
|
||||||
|
let events = drive_think(&mut p, &["before <think>thinking now</think> after"]);
|
||||||
|
assert_eq!(events[0], ThinkEvent::Text("before ".into()));
|
||||||
|
assert_eq!(events[1], ThinkEvent::Reasoning("thinking now".into()));
|
||||||
|
assert_eq!(events[2], ThinkEvent::Text(" after".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_open_marker_split_across_chunks() {
|
||||||
|
let mut p = ThinkParser::new();
|
||||||
|
let events = drive_think(&mut p, &["pre <", "think>middle</think> post"]);
|
||||||
|
let texts: String = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| match e {
|
||||||
|
ThinkEvent::Text(t) => Some(t.as_str()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let reasoning: String = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| match e {
|
||||||
|
ThinkEvent::Reasoning(r) => Some(r.as_str()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
assert_eq!(texts, "pre post");
|
||||||
|
assert_eq!(reasoning, "middle");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_close_marker_split_across_chunks() {
|
||||||
|
let mut p = ThinkParser::new();
|
||||||
|
let events = drive_think(&mut p, &["a<think>b<", "/think>c"]);
|
||||||
|
let reasoning: String = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| match e {
|
||||||
|
ThinkEvent::Reasoning(r) => Some(r.as_str()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
assert_eq!(reasoning, "b");
|
||||||
|
let last_text = events.iter().rev().find_map(|e| match e {
|
||||||
|
ThinkEvent::Text(t) => Some(t.as_str()),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
assert_eq!(last_text, Some("c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_one_byte_at_a_time_matches_single_chunk() {
|
||||||
|
let input = "x<think>internal</think>y";
|
||||||
|
let mut single = ThinkParser::new();
|
||||||
|
let single_events = drive_think(&mut single, &[input]);
|
||||||
|
|
||||||
|
let chunks: Vec<String> = input.chars().map(|c| c.to_string()).collect();
|
||||||
|
let chunk_refs: Vec<&str> = chunks.iter().map(|s| s.as_str()).collect();
|
||||||
|
let mut byte = ThinkParser::new();
|
||||||
|
let byte_events = drive_think(&mut byte, &chunk_refs);
|
||||||
|
|
||||||
|
let text = |evs: &[ThinkEvent]| -> (String, String) {
|
||||||
|
let mut t = String::new();
|
||||||
|
let mut r = String::new();
|
||||||
|
for e in evs {
|
||||||
|
match e {
|
||||||
|
ThinkEvent::Text(s) => t.push_str(s),
|
||||||
|
ThinkEvent::Reasoning(s) => r.push_str(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(t, r)
|
||||||
|
};
|
||||||
|
assert_eq!(text(&single_events), text(&byte_events));
|
||||||
|
assert_eq!(text(&byte_events), ("xy".into(), "internal".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_empty_block_emits_no_reasoning_event() {
|
||||||
|
let mut p = ThinkParser::new();
|
||||||
|
let events = drive_think(&mut p, &["<think></think>real"]);
|
||||||
|
// No Reasoning event for an empty <think></think>; just the
|
||||||
|
// trailing text.
|
||||||
|
assert!(
|
||||||
|
!events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_))),
|
||||||
|
"events: {events:?}"
|
||||||
|
);
|
||||||
|
assert_eq!(events[0], ThinkEvent::Text("real".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_unterminated_block_flushes_as_reasoning_on_finish() {
|
||||||
|
let mut p = ThinkParser::new();
|
||||||
|
let events = drive_think(&mut p, &["x<think>thinking but no close"]);
|
||||||
|
assert_eq!(events[0], ThinkEvent::Text("x".into()));
|
||||||
|
let reasoning: String = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| match e {
|
||||||
|
ThinkEvent::Reasoning(r) => Some(r.as_str()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
assert_eq!(reasoning, "thinking but no close");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn think_bare_close_marker_passes_through_as_text() {
|
||||||
|
// Model emits </think> with no preceding <think>. Treat the
|
||||||
|
// bare close as ordinary text — the agent doesn't try to
|
||||||
|
// retroactively reclassify earlier deltas.
|
||||||
|
let mut p = ThinkParser::new();
|
||||||
|
let events = drive_think(&mut p, &["hello </think> world"]);
|
||||||
|
let text: String = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| match e {
|
||||||
|
ThinkEvent::Text(t) => Some(t.as_str()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
assert_eq!(text, "hello </think> world");
|
||||||
|
assert!(!events.iter().any(|e| matches!(e, ThinkEvent::Reasoning(_))));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn quoted_lt_inside_args_does_not_trigger_marker() {
|
fn quoted_lt_inside_args_does_not_trigger_marker() {
|
||||||
// Sanity: a string value that happens to contain "<tool" is
|
// Sanity: a string value that happens to contain "<tool" is
|
||||||
|
|||||||
Reference in New Issue
Block a user