feat(helexa-acp): repair malformed tool calls and render failures as cards

Two related fixes for cases where Qwen3 sometimes emits slightly-off JSON inside <tool_call> blocks: 1. JSON repair pass in qwen3::parse_tool_call_body — strip up to three trailing extra `}` characters (model overshoots its closing braces), and hoist `name` out of `arguments` when it lands nested instead of as a sibling. Both observed in the field; both trivially repairable; both now dispatch as normal tool calls instead of falling back to the malformed path. 2. New CompletionEvent::MalformedToolCall variant for the cases repair can't fix. decode_stream now emits it instead of wrapping the raw body in a TextDelta, and agent.rs surfaces each one as a Failed SessionUpdate::ToolCall card (so Zed renders it as a structured failure UI element rather than dumping the body inline) plus a synthetic tool-call/tool-result history pair so the model gets clear feedback for self-correction on the next round. Empty <tool_call></tool_call> blocks are now a no-op too (no Malformed event), matching the existing empty-<think> behaviour. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 12:58:51 +03:00
parent abbedf8d8a
commit a494c8d43c
4 changed files with 294 additions and 22 deletions
--- a/crates/helexa-acp/src/agent.rs
+++ b/crates/helexa-acp/src/agent.rs
@@ -391,6 +391,9 @@ async fn drive_prompt(
    // input — we persist these to session.history at the end so
    // future prompts see them.
    let mut new_turns: Vec<Message> = Vec::new();
    // Monotonic counter for synthetic ids assigned to unparseable
    // <tool_call> blocks across all rounds of this prompt.
    let mut next_malformed_index: usize = 0;
    let mut stop_reason = StopReason::EndTurn;
@@ -430,6 +433,11 @@ async fn drive_prompt(
        // bucket — `ToolCallStart` may arrive interleaved with
        // `ToolCallArgsDelta` for different indices.
        let mut tool_buckets: BTreeMap<usize, ToolCallBucket> = BTreeMap::new();
        // <tool_call> blocks whose JSON couldn't be parsed even with
        // qwen3's repair pass. We surface each as a Failed
        // ToolCall card and feed a synthetic error result back to
        // the model so it can retry on the next round.
        let mut malformed_calls: Vec<String> = Vec::new();
        while let Some(event) = stream.next().await {
            let event = match event {
@@ -472,6 +480,9 @@ async fn drive_prompt(
                        .arguments
                        .push_str(&args_delta);
                }
                CompletionEvent::MalformedToolCall { raw } => {
                    malformed_calls.push(raw);
                }
                CompletionEvent::Finish { reason } => finish_reason = reason,
                CompletionEvent::Usage(_) => {}
            }
@@ -490,8 +501,9 @@ async fn drive_prompt(
        }
        let has_tool_calls = !tool_buckets.is_empty();
        let has_malformed = !malformed_calls.is_empty();
-        if !has_tool_calls {
+        if !has_tool_calls && !has_malformed {
            // Terminal turn: just text. Save and finish.
            if !assistant_text.is_empty() {
                new_turns.push(Message {
@@ -503,7 +515,9 @@ async fn drive_prompt(
            break;
        }
-        // Assistant turn carrying the tool calls.
+        // Assistant turn carrying any successfully-parsed tool calls
        // (malformed ones are handled separately so each gets its
        // own Failed card with its raw body intact).
        let calls: Vec<ToolCall> = tool_buckets
            .values()
            .map(|b| ToolCall {
@@ -512,15 +526,21 @@ async fn drive_prompt(
                arguments: b.arguments.clone(),
            })
            .collect();
        if has_tool_calls || !assistant_text.is_empty() {
            let assistant_turn = Message {
                role: Role::Assistant,
-            content: MessageContent::ToolCalls {
+                content: if has_tool_calls {
                    MessageContent::ToolCalls {
                        text: (!assistant_text.is_empty()).then_some(assistant_text),
                        calls,
                    }
                } else {
                    MessageContent::Text(assistant_text)
                },
            };
            new_turns.push(assistant_turn.clone());
            messages.push(assistant_turn);
        }
        // Refresh the mode in case the user toggled it during the
        // streaming above (cheap — one mutex acquisition).
@@ -552,6 +572,25 @@ async fn drive_prompt(
            messages.push(result_turn);
        }
        // Handle malformed calls last — each becomes a Failed
        // SessionUpdate::ToolCall card (so Zed renders structured
        // failure UI instead of dumping raw JSON inline) plus a
        // synthetic tool-result message so the model gets concrete
        // feedback for self-correction on the next round.
        for raw in malformed_calls.drain(..) {
            if cancel.is_cancelled() {
                stop_reason = StopReason::Cancelled;
                break;
            }
            let synthetic_id = next_synthetic_id(&mut next_malformed_index);
            emit_malformed_tool_card(&cx, &session_id, &synthetic_id, &raw);
            let (call_turn, result_turn) = synthesize_malformed_history(&synthetic_id, &raw);
            new_turns.push(call_turn.clone());
            messages.push(call_turn);
            new_turns.push(result_turn.clone());
            messages.push(result_turn);
        }
        if cancel.is_cancelled() {
            stop_reason = StopReason::Cancelled;
            break;
@@ -599,6 +638,75 @@ fn text_chunk(text: String) -> agent_client_protocol::schema::ContentChunk {
    ContentChunk::new(ContentBlock::Text(TextContent::new(text)))
 }
 /// Mint a synthetic tool_call_id for a malformed `<tool_call>` block.
 /// The format mirrors successful calls (`call_<n>`) but uses its own
 /// counter so the ids don't collide.
 fn next_synthetic_id(counter: &mut usize) -> String {
    let id = format!("call_malformed_{}", *counter);
    *counter += 1;
    id
 }
 /// Emit a `SessionUpdate::ToolCall` with `Failed` status so Zed
 /// renders the malformed call as a structured failure card (raw
 /// body visible inside the card) instead of leaving it as inline
 /// text in the message pane.
 fn emit_malformed_tool_card(
    cx: &ConnectionTo<Client>,
    session_id: &SessionId,
    tool_call_id: &str,
    raw: &str,
 ) {
    use agent_client_protocol::schema::{
        Content, ToolCall as AcpToolCall, ToolCallContent, ToolCallId, ToolCallStatus, ToolKind,
    };
    let body = format!(
        "Tool call JSON could not be parsed. Raw body:\n\n```\n{raw}\n```\n\n\
         Expected schema:\n\n```json\n{{\"name\": \"<function>\", \"arguments\": {{...}}}}\n```",
    );
    let card = AcpToolCall::new(ToolCallId::new(tool_call_id), "Malformed tool call")
        .kind(ToolKind::Other)
        .status(ToolCallStatus::Failed)
        .raw_input(serde_json::Value::String(raw.to_string()))
        .content(vec![ToolCallContent::Content(Content::new(
            ContentBlock::Text(TextContent::new(body)),
        ))]);
    send_chunk(cx, session_id, SessionUpdate::ToolCall(card));
 }
 /// Build the assistant-turn / tool-result pair for a malformed
 /// `<tool_call>`. The assistant turn carries the raw body verbatim
 /// (so the model sees its own previous output), and the tool
 /// result spells out *why* it failed with the expected schema —
 /// enough for a competent model to self-correct on the next round.
 fn synthesize_malformed_history(tool_call_id: &str, raw: &str) -> (Message, Message) {
    let call = Message {
        role: Role::Assistant,
        content: MessageContent::ToolCalls {
            text: None,
            calls: vec![ToolCall {
                id: tool_call_id.to_string(),
                // Real tool names never start with `<` — using this
                // placeholder makes the malformed call's identity
                // unambiguous in the rendered transcript.
                name: "<invalid>".to_string(),
                arguments: raw.to_string(),
            }],
        },
    };
    let result = Message {
        role: Role::Tool,
        content: MessageContent::ToolResult {
            tool_call_id: tool_call_id.to_string(),
            content: format!(
                "ERROR: previous <tool_call> body was not valid JSON. Body was:\n{raw}\n\n\
                 Retry with the schema: {{\"name\": \"<function>\", \"arguments\": {{…}}}}"
            ),
        },
    };
    (call, result)
 }
 fn map_finish_reason(reason: Option<&str>) -> StopReason {
    match reason {
        Some("length") => StopReason::MaxTokens,
--- a/crates/helexa-acp/src/provider/mod.rs
+++ b/crates/helexa-acp/src/provider/mod.rs
@@ -163,6 +163,14 @@ pub enum CompletionEvent {
    /// the bytes by `index` until the call's arguments are complete.
    #[allow(dead_code)]
    ToolCallArgsDelta { index: usize, args_delta: String },
    /// A `<tool_call>` block whose JSON couldn't be parsed even with
    /// the qwen3 module's repair attempts. The agent surfaces this
    /// as a Failed `SessionUpdate::ToolCall` card with the raw body
    /// visible (so the editor renders structured failure UI rather
    /// than dumping the body inline in the message pane), and feeds
    /// a synthetic tool-error message back into history so the
    /// model can self-correct on the next round.
    MalformedToolCall { raw: String },
    /// Stream finished. Carries the upstream `finish_reason` if it
    /// gave one (`"stop"`, `"length"`, `"tool_calls"`, …).
    Finish { reason: Option<String> },
--- a/crates/helexa-acp/src/provider/openai_chat.rs
+++ b/crates/helexa-acp/src/provider/openai_chat.rs
@@ -758,10 +758,8 @@ where
                                        });
                                    }
                                    crate::qwen3::ParserEvent::Malformed { raw } => {
-                                        tracing::warn!(raw = %raw, "qwen3: malformed <tool_call> block; passing through as text");
+                                        tracing::warn!(raw = %raw, "qwen3: malformed <tool_call> block; surfacing as Failed tool card");
-                                        yield Ok(CompletionEvent::TextDelta(format!(
+                                        yield Ok(CompletionEvent::MalformedToolCall { raw });
                                            "<tool_call>{raw}</tool_call>"
                                        )));
                                    }
                                }
                            }
@@ -842,6 +840,7 @@ where
                                    }
                                    crate::qwen3::ParserEvent::Malformed { raw } => {
                                        tracing::warn!(raw = %raw, "qwen3: unterminated <tool_call> at stream end");
                                        yield Ok(CompletionEvent::MalformedToolCall { raw });
                                    }
                                }
                            }
--- a/crates/helexa-acp/src/qwen3.rs
+++ b/crates/helexa-acp/src/qwen3.rs
@@ -250,25 +250,95 @@ impl ToolCallParser {
    fn emit_completed_tool_call(&mut self, events: &mut Vec<ParserEvent>) {
        let body = std::mem::take(&mut self.tool_call_buf);
-        let trimmed = body.trim();
+        // <tool_call></tool_call> with nothing inside: nothing to do.
-        let parsed: Result<ToolCallBody, _> = serde_json::from_str(trimmed);
+        if body.trim().is_empty() {
-        match parsed {
+            return;
-            Ok(call) => {
+        }
        match parse_tool_call_body(&body) {
            Some(call) => {
                let index = self.next_index;
                self.next_index += 1;
                let name = call.name;
                let args_json =
                    serde_json::to_string(&call.arguments).unwrap_or_else(|_| "{}".to_string());
-                events.push(ParserEvent::Start { index, name });
+                events.push(ParserEvent::Start {
                    index,
                    name: call.name,
                });
                events.push(ParserEvent::Args { index, args_json });
            }
-            Err(_) => {
+            None => {
                events.push(ParserEvent::Malformed { raw: body });
            }
        }
    }
 }
 /// Best-effort parse of a `<tool_call>` body, with repair for common
 /// model misemissions. Returns `None` only when no reasonable
 /// interpretation produces a tool call with a name.
 ///
 /// Repairs handled (in order of attempt):
 ///
 /// 1. The body parses cleanly as `{name, arguments}` — done.
 /// 2. The body has trailing extra `}` characters (model overshoots
 ///    the closing brace count). Strip up to three trailing `}` and
 ///    retry. Common for chunked-sampling models that emit the closing
 ///    of `arguments` then re-emit the closing of the outer object.
 /// 3. The body parses as JSON but has no top-level `name`; instead
 ///    `name` is nested inside `arguments`. Hoist it out. Common when
 ///    the model's training distribution had `{name, arguments}`
 ///    inverted in some examples.
 fn parse_tool_call_body(body: &str) -> Option<ToolCallBody> {
    let trimmed = body.trim();
    if trimmed.is_empty() {
        return None;
    }
    if let Ok(call) = serde_json::from_str::<ToolCallBody>(trimmed) {
        return Some(call);
    }
    // Repair 1: strip 1..=3 trailing closing braces. We only consider
    // this if the body actually ends with `}`, so the strip is
    // structural not random.
    for n in 1..=3 {
        if trimmed.len() <= n {
            break;
        }
        let candidate = &trimmed[..trimmed.len() - n];
        if !candidate.ends_with('}') {
            // Stripping further would leave the JSON without a
            // closing brace at all — definitely worse, not better.
            break;
        }
        if let Ok(call) = serde_json::from_str::<ToolCallBody>(candidate) {
            tracing::debug!(
                stripped = n,
                "qwen3: recovered tool call by trimming trailing brace(s)"
            );
            return Some(call);
        }
    }
    // Repair 2: hoist `name` from inside `arguments`.
    if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed)
        && value.get("name").is_none()
        && let Some(args_value) = value.get("arguments")
        && let Some(name) = args_value.get("name").and_then(|n| n.as_str())
    {
        let mut hoisted = args_value.clone();
        if let Some(obj) = hoisted.as_object_mut() {
            obj.remove("name");
        }
        tracing::debug!(
            name = %name,
            "qwen3: recovered tool call by hoisting name out of arguments"
        );
        return Some(ToolCallBody {
            name: name.to_string(),
            arguments: hoisted,
        });
    }
    None
 }
 /// Returns the length of the longest suffix of `haystack` that is a
 /// proper prefix of `needle`. Used to decide how many trailing bytes
 /// to hold back when scanning for `needle`: anything that could
@@ -664,6 +734,93 @@ mod tests {
        assert_eq!(starts, vec![(0, "a".into()), (1, "b".into())]);
    }
    #[test]
    fn recovers_from_extra_trailing_close_brace() {
        // Real failure case captured in the field: model emits one
        // extra `}` after closing the outer object. Parser should
        // recover, not surface as Malformed.
        let body = r#"<tool_call>{"name":"read_file","arguments":{"path":"/tmp/a"}}}</tool_call>"#;
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &[body]);
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "read_file")),
            "expected recovered Start event; got {events:?}"
        );
        assert!(
            !events
                .iter()
                .any(|e| matches!(e, ParserEvent::Malformed { .. })),
            "should not have produced Malformed: {events:?}"
        );
    }
    #[test]
    fn recovers_from_two_extra_trailing_close_braces() {
        let body = r#"<tool_call>{"name":"bash","arguments":{"command":"ls"}}}}</tool_call>"#;
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &[body]);
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "bash"))
        );
    }
    #[test]
    fn recovers_from_name_nested_in_arguments() {
        // Another field-observed failure: the model puts `name`
        // inside `arguments`. Parser should hoist it.
        let body = r#"<tool_call>{"arguments":{"command":"head -100 readme.md","name":"bash"}}</tool_call>"#;
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &[body]);
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "bash")),
            "expected hoisted Start event; got {events:?}"
        );
        let args: Vec<&str> = events
            .iter()
            .filter_map(|e| match e {
                ParserEvent::Args { args_json, .. } => Some(args_json.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(args.len(), 1);
        // After hoisting, the `name` key should be removed from
        // arguments so it's not double-counted.
        assert!(!args[0].contains(r#""name""#));
        assert!(args[0].contains(r#""command""#));
    }
    #[test]
    fn empty_tool_call_block_is_noop() {
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &["before<tool_call></tool_call>after"]);
        // No Start/Args/Malformed events for an empty body; just the
        // surrounding text passes through.
        assert!(
            !events
                .iter()
                .any(|e| matches!(e, ParserEvent::Start { .. }))
        );
        assert!(
            !events
                .iter()
                .any(|e| matches!(e, ParserEvent::Malformed { .. }))
        );
        let text: String = events
            .iter()
            .filter_map(|e| match e {
                ParserEvent::Text(t) => Some(t.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(text, "beforeafter");
    }
    #[test]
    fn malformed_tool_call_does_not_crash() {
        let mut p = ToolCallParser::new();