diff --git a/crates/helexa-acp/src/agent.rs b/crates/helexa-acp/src/agent.rs index 67e4680..f93f068 100644 --- a/crates/helexa-acp/src/agent.rs +++ b/crates/helexa-acp/src/agent.rs @@ -391,6 +391,9 @@ async fn drive_prompt( // input — we persist these to session.history at the end so // future prompts see them. let mut new_turns: Vec = Vec::new(); + // Monotonic counter for synthetic ids assigned to unparseable + // blocks across all rounds of this prompt. + let mut next_malformed_index: usize = 0; let mut stop_reason = StopReason::EndTurn; @@ -430,6 +433,11 @@ async fn drive_prompt( // bucket — `ToolCallStart` may arrive interleaved with // `ToolCallArgsDelta` for different indices. let mut tool_buckets: BTreeMap = BTreeMap::new(); + // blocks whose JSON couldn't be parsed even with + // qwen3's repair pass. We surface each as a Failed + // ToolCall card and feed a synthetic error result back to + // the model so it can retry on the next round. + let mut malformed_calls: Vec = Vec::new(); while let Some(event) = stream.next().await { let event = match event { @@ -472,6 +480,9 @@ async fn drive_prompt( .arguments .push_str(&args_delta); } + CompletionEvent::MalformedToolCall { raw } => { + malformed_calls.push(raw); + } CompletionEvent::Finish { reason } => finish_reason = reason, CompletionEvent::Usage(_) => {} } @@ -490,8 +501,9 @@ async fn drive_prompt( } let has_tool_calls = !tool_buckets.is_empty(); + let has_malformed = !malformed_calls.is_empty(); - if !has_tool_calls { + if !has_tool_calls && !has_malformed { // Terminal turn: just text. Save and finish. if !assistant_text.is_empty() { new_turns.push(Message { @@ -503,7 +515,9 @@ async fn drive_prompt( break; } - // Assistant turn carrying the tool calls. + // Assistant turn carrying any successfully-parsed tool calls + // (malformed ones are handled separately so each gets its + // own Failed card with its raw body intact). let calls: Vec = tool_buckets .values() .map(|b| ToolCall { @@ -512,15 +526,21 @@ async fn drive_prompt( arguments: b.arguments.clone(), }) .collect(); - let assistant_turn = Message { - role: Role::Assistant, - content: MessageContent::ToolCalls { - text: (!assistant_text.is_empty()).then_some(assistant_text), - calls, - }, - }; - new_turns.push(assistant_turn.clone()); - messages.push(assistant_turn); + if has_tool_calls || !assistant_text.is_empty() { + let assistant_turn = Message { + role: Role::Assistant, + content: if has_tool_calls { + MessageContent::ToolCalls { + text: (!assistant_text.is_empty()).then_some(assistant_text), + calls, + } + } else { + MessageContent::Text(assistant_text) + }, + }; + new_turns.push(assistant_turn.clone()); + messages.push(assistant_turn); + } // Refresh the mode in case the user toggled it during the // streaming above (cheap — one mutex acquisition). @@ -552,6 +572,25 @@ async fn drive_prompt( messages.push(result_turn); } + // Handle malformed calls last — each becomes a Failed + // SessionUpdate::ToolCall card (so Zed renders structured + // failure UI instead of dumping raw JSON inline) plus a + // synthetic tool-result message so the model gets concrete + // feedback for self-correction on the next round. + for raw in malformed_calls.drain(..) { + if cancel.is_cancelled() { + stop_reason = StopReason::Cancelled; + break; + } + let synthetic_id = next_synthetic_id(&mut next_malformed_index); + emit_malformed_tool_card(&cx, &session_id, &synthetic_id, &raw); + let (call_turn, result_turn) = synthesize_malformed_history(&synthetic_id, &raw); + new_turns.push(call_turn.clone()); + messages.push(call_turn); + new_turns.push(result_turn.clone()); + messages.push(result_turn); + } + if cancel.is_cancelled() { stop_reason = StopReason::Cancelled; break; @@ -599,6 +638,75 @@ fn text_chunk(text: String) -> agent_client_protocol::schema::ContentChunk { ContentChunk::new(ContentBlock::Text(TextContent::new(text))) } +/// Mint a synthetic tool_call_id for a malformed `` block. +/// The format mirrors successful calls (`call_`) but uses its own +/// counter so the ids don't collide. +fn next_synthetic_id(counter: &mut usize) -> String { + let id = format!("call_malformed_{}", *counter); + *counter += 1; + id +} + +/// Emit a `SessionUpdate::ToolCall` with `Failed` status so Zed +/// renders the malformed call as a structured failure card (raw +/// body visible inside the card) instead of leaving it as inline +/// text in the message pane. +fn emit_malformed_tool_card( + cx: &ConnectionTo, + session_id: &SessionId, + tool_call_id: &str, + raw: &str, +) { + use agent_client_protocol::schema::{ + Content, ToolCall as AcpToolCall, ToolCallContent, ToolCallId, ToolCallStatus, ToolKind, + }; + let body = format!( + "Tool call JSON could not be parsed. Raw body:\n\n```\n{raw}\n```\n\n\ + Expected schema:\n\n```json\n{{\"name\": \"\", \"arguments\": {{...}}}}\n```", + ); + let card = AcpToolCall::new(ToolCallId::new(tool_call_id), "Malformed tool call") + .kind(ToolKind::Other) + .status(ToolCallStatus::Failed) + .raw_input(serde_json::Value::String(raw.to_string())) + .content(vec![ToolCallContent::Content(Content::new( + ContentBlock::Text(TextContent::new(body)), + ))]); + send_chunk(cx, session_id, SessionUpdate::ToolCall(card)); +} + +/// Build the assistant-turn / tool-result pair for a malformed +/// ``. The assistant turn carries the raw body verbatim +/// (so the model sees its own previous output), and the tool +/// result spells out *why* it failed with the expected schema — +/// enough for a competent model to self-correct on the next round. +fn synthesize_malformed_history(tool_call_id: &str, raw: &str) -> (Message, Message) { + let call = Message { + role: Role::Assistant, + content: MessageContent::ToolCalls { + text: None, + calls: vec![ToolCall { + id: tool_call_id.to_string(), + // Real tool names never start with `<` — using this + // placeholder makes the malformed call's identity + // unambiguous in the rendered transcript. + name: "".to_string(), + arguments: raw.to_string(), + }], + }, + }; + let result = Message { + role: Role::Tool, + content: MessageContent::ToolResult { + tool_call_id: tool_call_id.to_string(), + content: format!( + "ERROR: previous body was not valid JSON. Body was:\n{raw}\n\n\ + Retry with the schema: {{\"name\": \"\", \"arguments\": {{…}}}}" + ), + }, + }; + (call, result) +} + fn map_finish_reason(reason: Option<&str>) -> StopReason { match reason { Some("length") => StopReason::MaxTokens, diff --git a/crates/helexa-acp/src/provider/mod.rs b/crates/helexa-acp/src/provider/mod.rs index 3335058..4797599 100644 --- a/crates/helexa-acp/src/provider/mod.rs +++ b/crates/helexa-acp/src/provider/mod.rs @@ -163,6 +163,14 @@ pub enum CompletionEvent { /// the bytes by `index` until the call's arguments are complete. #[allow(dead_code)] ToolCallArgsDelta { index: usize, args_delta: String }, + /// A `` block whose JSON couldn't be parsed even with + /// the qwen3 module's repair attempts. The agent surfaces this + /// as a Failed `SessionUpdate::ToolCall` card with the raw body + /// visible (so the editor renders structured failure UI rather + /// than dumping the body inline in the message pane), and feeds + /// a synthetic tool-error message back into history so the + /// model can self-correct on the next round. + MalformedToolCall { raw: String }, /// Stream finished. Carries the upstream `finish_reason` if it /// gave one (`"stop"`, `"length"`, `"tool_calls"`, …). Finish { reason: Option }, diff --git a/crates/helexa-acp/src/provider/openai_chat.rs b/crates/helexa-acp/src/provider/openai_chat.rs index a664623..bb59cda 100644 --- a/crates/helexa-acp/src/provider/openai_chat.rs +++ b/crates/helexa-acp/src/provider/openai_chat.rs @@ -758,10 +758,8 @@ where }); } crate::qwen3::ParserEvent::Malformed { raw } => { - tracing::warn!(raw = %raw, "qwen3: malformed block; passing through as text"); - yield Ok(CompletionEvent::TextDelta(format!( - "{raw}" - ))); + tracing::warn!(raw = %raw, "qwen3: malformed block; surfacing as Failed tool card"); + yield Ok(CompletionEvent::MalformedToolCall { raw }); } } } @@ -842,6 +840,7 @@ where } crate::qwen3::ParserEvent::Malformed { raw } => { tracing::warn!(raw = %raw, "qwen3: unterminated at stream end"); + yield Ok(CompletionEvent::MalformedToolCall { raw }); } } } diff --git a/crates/helexa-acp/src/qwen3.rs b/crates/helexa-acp/src/qwen3.rs index 80baa4f..31831fe 100644 --- a/crates/helexa-acp/src/qwen3.rs +++ b/crates/helexa-acp/src/qwen3.rs @@ -250,25 +250,95 @@ impl ToolCallParser { fn emit_completed_tool_call(&mut self, events: &mut Vec) { let body = std::mem::take(&mut self.tool_call_buf); - let trimmed = body.trim(); - let parsed: Result = serde_json::from_str(trimmed); - match parsed { - Ok(call) => { + // with nothing inside: nothing to do. + if body.trim().is_empty() { + return; + } + match parse_tool_call_body(&body) { + Some(call) => { let index = self.next_index; self.next_index += 1; - let name = call.name; let args_json = serde_json::to_string(&call.arguments).unwrap_or_else(|_| "{}".to_string()); - events.push(ParserEvent::Start { index, name }); + events.push(ParserEvent::Start { + index, + name: call.name, + }); events.push(ParserEvent::Args { index, args_json }); } - Err(_) => { + None => { events.push(ParserEvent::Malformed { raw: body }); } } } } +/// Best-effort parse of a `` body, with repair for common +/// model misemissions. Returns `None` only when no reasonable +/// interpretation produces a tool call with a name. +/// +/// Repairs handled (in order of attempt): +/// +/// 1. The body parses cleanly as `{name, arguments}` — done. +/// 2. The body has trailing extra `}` characters (model overshoots +/// the closing brace count). Strip up to three trailing `}` and +/// retry. Common for chunked-sampling models that emit the closing +/// of `arguments` then re-emit the closing of the outer object. +/// 3. The body parses as JSON but has no top-level `name`; instead +/// `name` is nested inside `arguments`. Hoist it out. Common when +/// the model's training distribution had `{name, arguments}` +/// inverted in some examples. +fn parse_tool_call_body(body: &str) -> Option { + let trimmed = body.trim(); + if trimmed.is_empty() { + return None; + } + if let Ok(call) = serde_json::from_str::(trimmed) { + return Some(call); + } + // Repair 1: strip 1..=3 trailing closing braces. We only consider + // this if the body actually ends with `}`, so the strip is + // structural not random. + for n in 1..=3 { + if trimmed.len() <= n { + break; + } + let candidate = &trimmed[..trimmed.len() - n]; + if !candidate.ends_with('}') { + // Stripping further would leave the JSON without a + // closing brace at all — definitely worse, not better. + break; + } + if let Ok(call) = serde_json::from_str::(candidate) { + tracing::debug!( + stripped = n, + "qwen3: recovered tool call by trimming trailing brace(s)" + ); + return Some(call); + } + } + // Repair 2: hoist `name` from inside `arguments`. + if let Ok(value) = serde_json::from_str::(trimmed) + && value.get("name").is_none() + && let Some(args_value) = value.get("arguments") + && let Some(name) = args_value.get("name").and_then(|n| n.as_str()) + { + let mut hoisted = args_value.clone(); + if let Some(obj) = hoisted.as_object_mut() { + obj.remove("name"); + } + tracing::debug!( + name = %name, + "qwen3: recovered tool call by hoisting name out of arguments" + ); + return Some(ToolCallBody { + name: name.to_string(), + arguments: hoisted, + }); + } + None +} + /// Returns the length of the longest suffix of `haystack` that is a /// proper prefix of `needle`. Used to decide how many trailing bytes /// to hold back when scanning for `needle`: anything that could @@ -664,6 +734,93 @@ mod tests { assert_eq!(starts, vec![(0, "a".into()), (1, "b".into())]); } + #[test] + fn recovers_from_extra_trailing_close_brace() { + // Real failure case captured in the field: model emits one + // extra `}` after closing the outer object. Parser should + // recover, not surface as Malformed. + let body = r#"{"name":"read_file","arguments":{"path":"/tmp/a"}}}"#; + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &[body]); + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "read_file")), + "expected recovered Start event; got {events:?}" + ); + assert!( + !events + .iter() + .any(|e| matches!(e, ParserEvent::Malformed { .. })), + "should not have produced Malformed: {events:?}" + ); + } + + #[test] + fn recovers_from_two_extra_trailing_close_braces() { + let body = r#"{"name":"bash","arguments":{"command":"ls"}}}}"#; + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &[body]); + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "bash")) + ); + } + + #[test] + fn recovers_from_name_nested_in_arguments() { + // Another field-observed failure: the model puts `name` + // inside `arguments`. Parser should hoist it. + let body = r#"{"arguments":{"command":"head -100 readme.md","name":"bash"}}"#; + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &[body]); + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "bash")), + "expected hoisted Start event; got {events:?}" + ); + let args: Vec<&str> = events + .iter() + .filter_map(|e| match e { + ParserEvent::Args { args_json, .. } => Some(args_json.as_str()), + _ => None, + }) + .collect(); + assert_eq!(args.len(), 1); + // After hoisting, the `name` key should be removed from + // arguments so it's not double-counted. + assert!(!args[0].contains(r#""name""#)); + assert!(args[0].contains(r#""command""#)); + } + + #[test] + fn empty_tool_call_block_is_noop() { + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &["beforeafter"]); + // No Start/Args/Malformed events for an empty body; just the + // surrounding text passes through. + assert!( + !events + .iter() + .any(|e| matches!(e, ParserEvent::Start { .. })) + ); + assert!( + !events + .iter() + .any(|e| matches!(e, ParserEvent::Malformed { .. })) + ); + let text: String = events + .iter() + .filter_map(|e| match e { + ParserEvent::Text(t) => Some(t.as_str()), + _ => None, + }) + .collect(); + assert_eq!(text, "beforeafter"); + } + #[test] fn malformed_tool_call_does_not_crash() { let mut p = ToolCallParser::new();