feat(helexa-acp): infer tool name from arg shape when model omits it

Qwen3.6-27B occasionally emits a <tool_call> body with the right arguments but no top-level `name` field — observed in the field as mkdir-style bash calls like {"arguments":{"command":"mkdir -p .../doc/plan/{01-discovery,...}"}} with no `name`. The agent had no tool to dispatch and surfaced a Failed card; the model would then hang or retry the same shape. Add a shape-based inference layer: - tools::infer_tool_name(arguments) — given an `arguments` object alone, return Some(name) when the key set uniquely identifies one tool: `{command}` or `{command,cwd}` → bash, `{path,content}` → write_file, `{path,old_text,new_text}` → edit_file. Ambiguous shapes (`{path}` alone — could be read_file or list_dir) return None so the agent still emits a Failed card rather than guessing. - agent::try_repair_missing_name(raw) — parses a malformed body, applies infer_tool_name, returns (name, args_json) on success. - drive_prompt sweeps malformed_calls through this repair before the Failed-card path. Recovered calls go into tool_buckets at the next free index and dispatch through the normal tool loop. 10 new unit tests in tools::tests cover the inference table plus the verbatim mkdir failure from the field log. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 13:14:50 +03:00
parent a494c8d43c
commit ec2b6450b2
2 changed files with 179 additions and 0 deletions
--- a/crates/helexa-acp/src/agent.rs
+++ b/crates/helexa-acp/src/agent.rs
@@ -500,6 +500,38 @@ async fn drive_prompt(
            break;
        }
        // Recovery pass before deciding "is there work to do?".
        // For each malformed body, try shape-based inference
        // against the tool catalogue (handles the "model emitted
        // `arguments` but forgot `name`" case). Successes get
        // promoted to real tool buckets; failures stay in
        // `malformed_calls` for the Failed-card path below.
        malformed_calls.retain(|raw| match try_repair_missing_name(raw) {
            Some((name, args_json)) => {
                let idx = tool_buckets
                    .keys()
                    .max()
                    .copied()
                    .map(|m| m + 1)
                    .unwrap_or(0);
                tracing::debug!(
                    inferred_name = %name,
                    index = idx,
                    "qwen3: recovered missing-name tool call via shape inference"
                );
                tool_buckets.insert(
                    idx,
                    ToolCallBucket {
                        id: format!("call_recovered_{idx}"),
                        name,
                        arguments: args_json,
                    },
                );
                false
            }
            None => true,
        });
        let has_tool_calls = !tool_buckets.is_empty();
        let has_malformed = !malformed_calls.is_empty();
@@ -679,6 +711,32 @@ fn emit_malformed_tool_card(
 /// (so the model sees its own previous output), and the tool
 /// result spells out *why* it failed with the expected schema —
 /// enough for a competent model to self-correct on the next round.
 /// Last-chance repair for a malformed `<tool_call>` body: if the
 /// model emitted a structurally-valid JSON object with `arguments`
 /// but a missing `name`, infer the intended tool from the
 /// arguments' shape (see [`tools::infer_tool_name`]). Returns
 /// `Some((name, arguments_json))` only when the inference is
 /// unambiguous; ambiguous or unrecognised shapes return `None`
 /// so the caller surfaces a Failed card.
 ///
 /// We don't try to repair anything qwen3.rs already gave up on for
 /// structural reasons (truncation, free-form prose) — those stay
 /// Failed and the model retries.
 fn try_repair_missing_name(raw: &str) -> Option<(String, String)> {
    let value: serde_json::Value = serde_json::from_str(raw.trim()).ok()?;
    // If a `name` exists at the top level, the parser's own
    // earlier repair passes already had a shot at this and decided
    // it was malformed for some other reason. Don't second-guess
    // them here.
    if value.get("name").is_some() {
        return None;
    }
    let arguments = value.get("arguments")?;
    let name = tools::infer_tool_name(arguments)?;
    let args_json = serde_json::to_string(arguments).ok()?;
    Some((name.to_string(), args_json))
 }
 fn synthesize_malformed_history(tool_call_id: &str, raw: &str) -> (Message, Message) {
    let call = Message {
        role: Role::Assistant,
--- a/crates/helexa-acp/src/tools.rs
+++ b/crates/helexa-acp/src/tools.rs
@@ -140,6 +140,55 @@ pub fn all_tools() -> Vec<ToolSpec> {
    ]
 }
 /// Try to infer which tool was intended from the shape of an
 /// `arguments` object alone. Used by the agent when the model
 /// emits a `<tool_call>` whose JSON has the right arguments but a
 /// missing or invalid top-level `name` field — a recurring
 /// Qwen3.6-27B failure mode.
 ///
 /// Returns `Some(name)` only when the argument keys uniquely match
 /// exactly one tool in the catalogue. Ambiguous shapes (`{path}`
 /// alone could be either [`READ_FILE`] or [`LIST_DIR`]) return
 /// `None` so the caller surfaces a Failed-card and lets the model
 /// retry rather than guessing wrong.
 ///
 /// Inference table (key set → tool):
 ///
 /// | Keys                                  | Tool         |
 /// |---------------------------------------|--------------|
 /// | `{command}` or `{command, cwd}`       | `bash`       |
 /// | `{path, content}`                     | `write_file` |
 /// | `{path, old_text, new_text}`          | `edit_file`  |
 /// | `{path}` / `{path, line}` / `{path, line, limit}` | *ambiguous* — None |
 /// | (anything else)                       | None         |
 pub fn infer_tool_name(arguments: &serde_json::Value) -> Option<&'static str> {
    let obj = arguments.as_object()?;
    let keys: std::collections::HashSet<&str> = obj.keys().map(|s| s.as_str()).collect();
    // `command` is unique to bash. Allow the optional `cwd` arg
    // alongside but nothing else (any unrecognised keys → bail and
    // let the model retry rather than misroute).
    if keys.contains("command") && keys.iter().all(|k| matches!(*k, "command" | "cwd")) {
        return Some(BASH);
    }
    // `content` is unique to write_file.
    if keys.contains("content") && keys.contains("path") && keys.len() == 2 {
        return Some(WRITE_FILE);
    }
    // `old_text` + `new_text` are unique to edit_file.
    if keys.contains("old_text")
        && keys.contains("new_text")
        && keys.contains("path")
        && keys.len() == 3
    {
        return Some(EDIT_FILE);
    }
    // `{path}` / `{path, line}` / `{path, line, limit}` overlap
    // between read_file (file contents) and list_dir (directory
    // contents). No safe inference — refuse.
    None
 }
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -154,6 +203,78 @@ mod tests {
        );
    }
    #[test]
    fn infer_bash_from_command_only() {
        let args = serde_json::json!({"command": "ls /tmp"});
        assert_eq!(infer_tool_name(&args), Some(BASH));
    }
    #[test]
    fn infer_bash_from_command_and_cwd() {
        let args = serde_json::json!({"command": "ls", "cwd": "/tmp"});
        assert_eq!(infer_tool_name(&args), Some(BASH));
    }
    #[test]
    fn infer_bash_from_mkdir_like_real_failure() {
        // Lifted verbatim from the agent failure that motivated
        // this helper (helexa-acp.log @ 10:03:11).
        let args = serde_json::json!({
            "command": "mkdir -p /home/grenade/git/beat/beat/doc/plan/{01-discovery,02-segmentation,03-description,04-summary,05-output}"
        });
        assert_eq!(infer_tool_name(&args), Some(BASH));
    }
    #[test]
    fn infer_write_file() {
        let args = serde_json::json!({"path": "/tmp/x", "content": "hi"});
        assert_eq!(infer_tool_name(&args), Some(WRITE_FILE));
    }
    #[test]
    fn infer_edit_file() {
        let args = serde_json::json!({
            "path": "/tmp/x", "old_text": "a", "new_text": "b"
        });
        assert_eq!(infer_tool_name(&args), Some(EDIT_FILE));
    }
    #[test]
    fn refuse_ambiguous_path_only() {
        let args = serde_json::json!({"path": "/tmp/x"});
        assert_eq!(infer_tool_name(&args), None);
    }
    #[test]
    fn refuse_ambiguous_path_with_optionals() {
        // read_file accepts these optionals; list_dir doesn't —
        // but Qwen wouldn't reliably emit them either, so we
        // can't use their presence to disambiguate. Refuse.
        let args = serde_json::json!({"path": "/tmp/x", "line": 1, "limit": 50});
        assert_eq!(infer_tool_name(&args), None);
    }
    #[test]
    fn refuse_command_with_extra_unknown_keys() {
        // Defence in depth: an unrecognised key alongside
        // `command` means we don't really know what tool the
        // model wanted; refuse rather than guess.
        let args = serde_json::json!({"command": "ls", "extra": "?"});
        assert_eq!(infer_tool_name(&args), None);
    }
    #[test]
    fn refuse_empty_args() {
        let args = serde_json::json!({});
        assert_eq!(infer_tool_name(&args), None);
    }
    #[test]
    fn refuse_non_object_args() {
        let args = serde_json::json!("not an object");
        assert_eq!(infer_tool_name(&args), None);
    }
    #[test]
    fn every_tool_has_an_object_parameter_schema() {
        for tool in all_tools() {