feat(helexa-acp): repair malformed tool calls and render failures as cards
Some checks failed
build-prerelease / Package helexa-neuron-blackwell RPM (push) Blocked by required conditions
build-prerelease / Resolve version stamps (push) Successful in 28s
CI / Format (push) Successful in 4m7s
CI / Test (push) Failing after 1m2s
build-prerelease / Build neuron-blackwell (push) Successful in 6m10s
CI / Clippy (push) Successful in 2m37s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m24s
build-prerelease / Build neuron-ampere (push) Successful in 8m18s
build-prerelease / Package cortex RPM (push) Successful in 1m22s
build-prerelease / Build neuron-ada (push) Successful in 5m23s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 2m54s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 2m56s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled

Two related fixes for cases where Qwen3 sometimes emits slightly-off
JSON inside <tool_call> blocks:

1. JSON repair pass in qwen3::parse_tool_call_body — strip up to
   three trailing extra `}` characters (model overshoots its closing
   braces), and hoist `name` out of `arguments` when it lands
   nested instead of as a sibling. Both observed in the field; both
   trivially repairable; both now dispatch as normal tool calls
   instead of falling back to the malformed path.

2. New CompletionEvent::MalformedToolCall variant for the cases
   repair can't fix. decode_stream now emits it instead of wrapping
   the raw body in a TextDelta, and agent.rs surfaces each one as
   a Failed SessionUpdate::ToolCall card (so Zed renders it as a
   structured failure UI element rather than dumping the body
   inline) plus a synthetic tool-call/tool-result history pair so
   the model gets clear feedback for self-correction on the next
   round.

Empty <tool_call></tool_call> blocks are now a no-op too (no
Malformed event), matching the existing empty-<think> behaviour.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 12:58:51 +03:00
parent abbedf8d8a
commit a494c8d43c
4 changed files with 294 additions and 22 deletions

View File

@@ -391,6 +391,9 @@ async fn drive_prompt(
// input — we persist these to session.history at the end so // input — we persist these to session.history at the end so
// future prompts see them. // future prompts see them.
let mut new_turns: Vec<Message> = Vec::new(); let mut new_turns: Vec<Message> = Vec::new();
// Monotonic counter for synthetic ids assigned to unparseable
// <tool_call> blocks across all rounds of this prompt.
let mut next_malformed_index: usize = 0;
let mut stop_reason = StopReason::EndTurn; let mut stop_reason = StopReason::EndTurn;
@@ -430,6 +433,11 @@ async fn drive_prompt(
// bucket — `ToolCallStart` may arrive interleaved with // bucket — `ToolCallStart` may arrive interleaved with
// `ToolCallArgsDelta` for different indices. // `ToolCallArgsDelta` for different indices.
let mut tool_buckets: BTreeMap<usize, ToolCallBucket> = BTreeMap::new(); let mut tool_buckets: BTreeMap<usize, ToolCallBucket> = BTreeMap::new();
// <tool_call> blocks whose JSON couldn't be parsed even with
// qwen3's repair pass. We surface each as a Failed
// ToolCall card and feed a synthetic error result back to
// the model so it can retry on the next round.
let mut malformed_calls: Vec<String> = Vec::new();
while let Some(event) = stream.next().await { while let Some(event) = stream.next().await {
let event = match event { let event = match event {
@@ -472,6 +480,9 @@ async fn drive_prompt(
.arguments .arguments
.push_str(&args_delta); .push_str(&args_delta);
} }
CompletionEvent::MalformedToolCall { raw } => {
malformed_calls.push(raw);
}
CompletionEvent::Finish { reason } => finish_reason = reason, CompletionEvent::Finish { reason } => finish_reason = reason,
CompletionEvent::Usage(_) => {} CompletionEvent::Usage(_) => {}
} }
@@ -490,8 +501,9 @@ async fn drive_prompt(
} }
let has_tool_calls = !tool_buckets.is_empty(); let has_tool_calls = !tool_buckets.is_empty();
let has_malformed = !malformed_calls.is_empty();
if !has_tool_calls { if !has_tool_calls && !has_malformed {
// Terminal turn: just text. Save and finish. // Terminal turn: just text. Save and finish.
if !assistant_text.is_empty() { if !assistant_text.is_empty() {
new_turns.push(Message { new_turns.push(Message {
@@ -503,7 +515,9 @@ async fn drive_prompt(
break; break;
} }
// Assistant turn carrying the tool calls. // Assistant turn carrying any successfully-parsed tool calls
// (malformed ones are handled separately so each gets its
// own Failed card with its raw body intact).
let calls: Vec<ToolCall> = tool_buckets let calls: Vec<ToolCall> = tool_buckets
.values() .values()
.map(|b| ToolCall { .map(|b| ToolCall {
@@ -512,15 +526,21 @@ async fn drive_prompt(
arguments: b.arguments.clone(), arguments: b.arguments.clone(),
}) })
.collect(); .collect();
if has_tool_calls || !assistant_text.is_empty() {
let assistant_turn = Message { let assistant_turn = Message {
role: Role::Assistant, role: Role::Assistant,
content: MessageContent::ToolCalls { content: if has_tool_calls {
MessageContent::ToolCalls {
text: (!assistant_text.is_empty()).then_some(assistant_text), text: (!assistant_text.is_empty()).then_some(assistant_text),
calls, calls,
}
} else {
MessageContent::Text(assistant_text)
}, },
}; };
new_turns.push(assistant_turn.clone()); new_turns.push(assistant_turn.clone());
messages.push(assistant_turn); messages.push(assistant_turn);
}
// Refresh the mode in case the user toggled it during the // Refresh the mode in case the user toggled it during the
// streaming above (cheap — one mutex acquisition). // streaming above (cheap — one mutex acquisition).
@@ -552,6 +572,25 @@ async fn drive_prompt(
messages.push(result_turn); messages.push(result_turn);
} }
// Handle malformed calls last — each becomes a Failed
// SessionUpdate::ToolCall card (so Zed renders structured
// failure UI instead of dumping raw JSON inline) plus a
// synthetic tool-result message so the model gets concrete
// feedback for self-correction on the next round.
for raw in malformed_calls.drain(..) {
if cancel.is_cancelled() {
stop_reason = StopReason::Cancelled;
break;
}
let synthetic_id = next_synthetic_id(&mut next_malformed_index);
emit_malformed_tool_card(&cx, &session_id, &synthetic_id, &raw);
let (call_turn, result_turn) = synthesize_malformed_history(&synthetic_id, &raw);
new_turns.push(call_turn.clone());
messages.push(call_turn);
new_turns.push(result_turn.clone());
messages.push(result_turn);
}
if cancel.is_cancelled() { if cancel.is_cancelled() {
stop_reason = StopReason::Cancelled; stop_reason = StopReason::Cancelled;
break; break;
@@ -599,6 +638,75 @@ fn text_chunk(text: String) -> agent_client_protocol::schema::ContentChunk {
ContentChunk::new(ContentBlock::Text(TextContent::new(text))) ContentChunk::new(ContentBlock::Text(TextContent::new(text)))
} }
/// Mint a synthetic tool_call_id for a malformed `<tool_call>` block.
/// The format mirrors successful calls (`call_<n>`) but uses its own
/// counter so the ids don't collide.
fn next_synthetic_id(counter: &mut usize) -> String {
let id = format!("call_malformed_{}", *counter);
*counter += 1;
id
}
/// Emit a `SessionUpdate::ToolCall` with `Failed` status so Zed
/// renders the malformed call as a structured failure card (raw
/// body visible inside the card) instead of leaving it as inline
/// text in the message pane.
fn emit_malformed_tool_card(
cx: &ConnectionTo<Client>,
session_id: &SessionId,
tool_call_id: &str,
raw: &str,
) {
use agent_client_protocol::schema::{
Content, ToolCall as AcpToolCall, ToolCallContent, ToolCallId, ToolCallStatus, ToolKind,
};
let body = format!(
"Tool call JSON could not be parsed. Raw body:\n\n```\n{raw}\n```\n\n\
Expected schema:\n\n```json\n{{\"name\": \"<function>\", \"arguments\": {{...}}}}\n```",
);
let card = AcpToolCall::new(ToolCallId::new(tool_call_id), "Malformed tool call")
.kind(ToolKind::Other)
.status(ToolCallStatus::Failed)
.raw_input(serde_json::Value::String(raw.to_string()))
.content(vec![ToolCallContent::Content(Content::new(
ContentBlock::Text(TextContent::new(body)),
))]);
send_chunk(cx, session_id, SessionUpdate::ToolCall(card));
}
/// Build the assistant-turn / tool-result pair for a malformed
/// `<tool_call>`. The assistant turn carries the raw body verbatim
/// (so the model sees its own previous output), and the tool
/// result spells out *why* it failed with the expected schema —
/// enough for a competent model to self-correct on the next round.
fn synthesize_malformed_history(tool_call_id: &str, raw: &str) -> (Message, Message) {
let call = Message {
role: Role::Assistant,
content: MessageContent::ToolCalls {
text: None,
calls: vec![ToolCall {
id: tool_call_id.to_string(),
// Real tool names never start with `<` — using this
// placeholder makes the malformed call's identity
// unambiguous in the rendered transcript.
name: "<invalid>".to_string(),
arguments: raw.to_string(),
}],
},
};
let result = Message {
role: Role::Tool,
content: MessageContent::ToolResult {
tool_call_id: tool_call_id.to_string(),
content: format!(
"ERROR: previous <tool_call> body was not valid JSON. Body was:\n{raw}\n\n\
Retry with the schema: {{\"name\": \"<function>\", \"arguments\": {{…}}}}"
),
},
};
(call, result)
}
fn map_finish_reason(reason: Option<&str>) -> StopReason { fn map_finish_reason(reason: Option<&str>) -> StopReason {
match reason { match reason {
Some("length") => StopReason::MaxTokens, Some("length") => StopReason::MaxTokens,

View File

@@ -163,6 +163,14 @@ pub enum CompletionEvent {
/// the bytes by `index` until the call's arguments are complete. /// the bytes by `index` until the call's arguments are complete.
#[allow(dead_code)] #[allow(dead_code)]
ToolCallArgsDelta { index: usize, args_delta: String }, ToolCallArgsDelta { index: usize, args_delta: String },
/// A `<tool_call>` block whose JSON couldn't be parsed even with
/// the qwen3 module's repair attempts. The agent surfaces this
/// as a Failed `SessionUpdate::ToolCall` card with the raw body
/// visible (so the editor renders structured failure UI rather
/// than dumping the body inline in the message pane), and feeds
/// a synthetic tool-error message back into history so the
/// model can self-correct on the next round.
MalformedToolCall { raw: String },
/// Stream finished. Carries the upstream `finish_reason` if it /// Stream finished. Carries the upstream `finish_reason` if it
/// gave one (`"stop"`, `"length"`, `"tool_calls"`, …). /// gave one (`"stop"`, `"length"`, `"tool_calls"`, …).
Finish { reason: Option<String> }, Finish { reason: Option<String> },

View File

@@ -758,10 +758,8 @@ where
}); });
} }
crate::qwen3::ParserEvent::Malformed { raw } => { crate::qwen3::ParserEvent::Malformed { raw } => {
tracing::warn!(raw = %raw, "qwen3: malformed <tool_call> block; passing through as text"); tracing::warn!(raw = %raw, "qwen3: malformed <tool_call> block; surfacing as Failed tool card");
yield Ok(CompletionEvent::TextDelta(format!( yield Ok(CompletionEvent::MalformedToolCall { raw });
"<tool_call>{raw}</tool_call>"
)));
} }
} }
} }
@@ -842,6 +840,7 @@ where
} }
crate::qwen3::ParserEvent::Malformed { raw } => { crate::qwen3::ParserEvent::Malformed { raw } => {
tracing::warn!(raw = %raw, "qwen3: unterminated <tool_call> at stream end"); tracing::warn!(raw = %raw, "qwen3: unterminated <tool_call> at stream end");
yield Ok(CompletionEvent::MalformedToolCall { raw });
} }
} }
} }

View File

@@ -250,25 +250,95 @@ impl ToolCallParser {
fn emit_completed_tool_call(&mut self, events: &mut Vec<ParserEvent>) { fn emit_completed_tool_call(&mut self, events: &mut Vec<ParserEvent>) {
let body = std::mem::take(&mut self.tool_call_buf); let body = std::mem::take(&mut self.tool_call_buf);
let trimmed = body.trim(); // <tool_call></tool_call> with nothing inside: nothing to do.
let parsed: Result<ToolCallBody, _> = serde_json::from_str(trimmed); if body.trim().is_empty() {
match parsed { return;
Ok(call) => { }
match parse_tool_call_body(&body) {
Some(call) => {
let index = self.next_index; let index = self.next_index;
self.next_index += 1; self.next_index += 1;
let name = call.name;
let args_json = let args_json =
serde_json::to_string(&call.arguments).unwrap_or_else(|_| "{}".to_string()); serde_json::to_string(&call.arguments).unwrap_or_else(|_| "{}".to_string());
events.push(ParserEvent::Start { index, name }); events.push(ParserEvent::Start {
index,
name: call.name,
});
events.push(ParserEvent::Args { index, args_json }); events.push(ParserEvent::Args { index, args_json });
} }
Err(_) => { None => {
events.push(ParserEvent::Malformed { raw: body }); events.push(ParserEvent::Malformed { raw: body });
} }
} }
} }
} }
/// Best-effort parse of a `<tool_call>` body, with repair for common
/// model misemissions. Returns `None` only when no reasonable
/// interpretation produces a tool call with a name.
///
/// Repairs handled (in order of attempt):
///
/// 1. The body parses cleanly as `{name, arguments}` — done.
/// 2. The body has trailing extra `}` characters (model overshoots
/// the closing brace count). Strip up to three trailing `}` and
/// retry. Common for chunked-sampling models that emit the closing
/// of `arguments` then re-emit the closing of the outer object.
/// 3. The body parses as JSON but has no top-level `name`; instead
/// `name` is nested inside `arguments`. Hoist it out. Common when
/// the model's training distribution had `{name, arguments}`
/// inverted in some examples.
fn parse_tool_call_body(body: &str) -> Option<ToolCallBody> {
let trimmed = body.trim();
if trimmed.is_empty() {
return None;
}
if let Ok(call) = serde_json::from_str::<ToolCallBody>(trimmed) {
return Some(call);
}
// Repair 1: strip 1..=3 trailing closing braces. We only consider
// this if the body actually ends with `}`, so the strip is
// structural not random.
for n in 1..=3 {
if trimmed.len() <= n {
break;
}
let candidate = &trimmed[..trimmed.len() - n];
if !candidate.ends_with('}') {
// Stripping further would leave the JSON without a
// closing brace at all — definitely worse, not better.
break;
}
if let Ok(call) = serde_json::from_str::<ToolCallBody>(candidate) {
tracing::debug!(
stripped = n,
"qwen3: recovered tool call by trimming trailing brace(s)"
);
return Some(call);
}
}
// Repair 2: hoist `name` from inside `arguments`.
if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed)
&& value.get("name").is_none()
&& let Some(args_value) = value.get("arguments")
&& let Some(name) = args_value.get("name").and_then(|n| n.as_str())
{
let mut hoisted = args_value.clone();
if let Some(obj) = hoisted.as_object_mut() {
obj.remove("name");
}
tracing::debug!(
name = %name,
"qwen3: recovered tool call by hoisting name out of arguments"
);
return Some(ToolCallBody {
name: name.to_string(),
arguments: hoisted,
});
}
None
}
/// Returns the length of the longest suffix of `haystack` that is a /// Returns the length of the longest suffix of `haystack` that is a
/// proper prefix of `needle`. Used to decide how many trailing bytes /// proper prefix of `needle`. Used to decide how many trailing bytes
/// to hold back when scanning for `needle`: anything that could /// to hold back when scanning for `needle`: anything that could
@@ -664,6 +734,93 @@ mod tests {
assert_eq!(starts, vec![(0, "a".into()), (1, "b".into())]); assert_eq!(starts, vec![(0, "a".into()), (1, "b".into())]);
} }
#[test]
fn recovers_from_extra_trailing_close_brace() {
// Real failure case captured in the field: model emits one
// extra `}` after closing the outer object. Parser should
// recover, not surface as Malformed.
let body = r#"<tool_call>{"name":"read_file","arguments":{"path":"/tmp/a"}}}</tool_call>"#;
let mut p = ToolCallParser::new();
let events = drive(&mut p, &[body]);
assert!(
events
.iter()
.any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "read_file")),
"expected recovered Start event; got {events:?}"
);
assert!(
!events
.iter()
.any(|e| matches!(e, ParserEvent::Malformed { .. })),
"should not have produced Malformed: {events:?}"
);
}
#[test]
fn recovers_from_two_extra_trailing_close_braces() {
let body = r#"<tool_call>{"name":"bash","arguments":{"command":"ls"}}}}</tool_call>"#;
let mut p = ToolCallParser::new();
let events = drive(&mut p, &[body]);
assert!(
events
.iter()
.any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "bash"))
);
}
#[test]
fn recovers_from_name_nested_in_arguments() {
// Another field-observed failure: the model puts `name`
// inside `arguments`. Parser should hoist it.
let body = r#"<tool_call>{"arguments":{"command":"head -100 readme.md","name":"bash"}}</tool_call>"#;
let mut p = ToolCallParser::new();
let events = drive(&mut p, &[body]);
assert!(
events
.iter()
.any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "bash")),
"expected hoisted Start event; got {events:?}"
);
let args: Vec<&str> = events
.iter()
.filter_map(|e| match e {
ParserEvent::Args { args_json, .. } => Some(args_json.as_str()),
_ => None,
})
.collect();
assert_eq!(args.len(), 1);
// After hoisting, the `name` key should be removed from
// arguments so it's not double-counted.
assert!(!args[0].contains(r#""name""#));
assert!(args[0].contains(r#""command""#));
}
#[test]
fn empty_tool_call_block_is_noop() {
let mut p = ToolCallParser::new();
let events = drive(&mut p, &["before<tool_call></tool_call>after"]);
// No Start/Args/Malformed events for an empty body; just the
// surrounding text passes through.
assert!(
!events
.iter()
.any(|e| matches!(e, ParserEvent::Start { .. }))
);
assert!(
!events
.iter()
.any(|e| matches!(e, ParserEvent::Malformed { .. }))
);
let text: String = events
.iter()
.filter_map(|e| match e {
ParserEvent::Text(t) => Some(t.as_str()),
_ => None,
})
.collect();
assert_eq!(text, "beforeafter");
}
#[test] #[test]
fn malformed_tool_call_does_not_crash() { fn malformed_tool_call_does_not_crash() {
let mut p = ToolCallParser::new(); let mut p = ToolCallParser::new();