feat(helexa-acp): inject and parse Qwen3 Hermes tool format

The OpenAI `tools` API field isn't load-bearing in this stack — neuron's chat template renders only message.content, so tool definitions sent that way never reach the model. Move both sides of the tool conversation into the Qwen3 Hermes wire format the model is actually trained on: - Append a `# Tools` block to the system prompt describing every available function (qwen3::render_tool_block). - Parse `<tool_call>{json}</tool_call>` markers out of the streamed content via a chunk-boundary-safe state machine (qwen3::ToolCallParser), surfacing them as the existing CompletionEvent::ToolCall* events so the agent loop doesn't change. - Re-serialise assistant turns that called tools with inline `<tool_call>` blocks and tool results as user turns wrapped in `<tool_response>` (qwen3::render_assistant_with_tool_calls, render_tool_response). Verified against cortex+Qwen3.6-27B: the model produces a well-formed `<tool_call>{"name":"list_dir","arguments":{"path":"/tmp"}}</tool_call>` in response to a Hermes-formatted prompt. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 11:06:38 +03:00
parent 13f4c36aeb
commit 0121a1930f
6 changed files with 861 additions and 74 deletions
--- a/crates/helexa-acp/src/agent.rs
+++ b/crates/helexa-acp/src/agent.rs
@@ -322,7 +322,8 @@ async fn drive_prompt(
        )
    };
-    let system_prompt = build_system_prompt(&cwd, inner.system_prompt_path.as_deref())
+    let tool_specs = tools::all_tools();
    let system_prompt = build_system_prompt(&cwd, inner.system_prompt_path.as_deref(), &tool_specs)
        .map_err(|e| anyhow::anyhow!("build system prompt: {e:#}"))?;
    let (provider, local_model) =
@@ -361,7 +362,6 @@ async fn drive_prompt(
    // future prompts see them.
    let mut new_turns: Vec<Message> = Vec::new();
    let tool_specs = tools::all_tools();
    let mut stop_reason = StopReason::EndTurn;
    for round in 0..MAX_TOOL_ROUNDS {
@@ -370,10 +370,15 @@ async fn drive_prompt(
            break;
        }
        // Tool descriptions reach the model via the Qwen3 `# Tools`
        // block in the system prompt, not via the OpenAI `tools`
        // request field — cortex/neuron pass that field through to
        // the encoder unread, and including it would double-describe
        // tools once a strict-OpenAI backend lands. Leave empty.
        let completion_req = CompletionRequest {
            model: local_model.clone(),
            messages: messages.clone(),
-            tools: tool_specs.clone(),
+            tools: vec![],
            temperature: None,
            top_p: None,
            max_tokens: None,
--- a/crates/helexa-acp/src/main.rs
+++ b/crates/helexa-acp/src/main.rs
@@ -19,6 +19,7 @@ mod agent;
 mod config;
 mod prompt;
 mod provider;
 mod qwen3;
 mod session;
 mod tool_runner;
 mod tools;
--- a/crates/helexa-acp/src/prompt.rs
+++ b/crates/helexa-acp/src/prompt.rs
@@ -1,53 +1,59 @@
 //! System prompt assembly.
 //!
-//! The built-in prompt tells the model the working directory and
+//! The system message has two parts:
-//! enumerates the tools it actually has — without this, models trained
+//!
-//! to "be safe when you don't know your environment" tend to refuse
+//! 1. A short human-readable preamble (working directory, style
-//! tool use and ask the user to paste content instead. Override with
+//!    instructions). Either the built-in [`DEFAULT_PROMPT`] or a
-//! `HELEXA_ACP_SYSTEM_PROMPT_PATH` (env) or `system_prompt_path`
+//!    user-supplied file at `HELEXA_ACP_SYSTEM_PROMPT_PATH` /
-//! (TOML); the literal token `{cwd}` in a user-supplied file is
+//!    `system_prompt_path`. `{cwd}` is substituted in both.
-//! substituted with the session's working directory.
+//! 2. A `# Tools` block in Qwen3 Hermes format (see [`crate::qwen3`])
 //!    describing the available functions. This is what makes the
 //!    model actually call them — neuron/cortex don't honour the
 //!    OpenAI `tools` API field, so the tool list has to live in the
 //!    prompt itself.
 use anyhow::Context;
 use std::path::Path;
 use crate::provider::ToolSpec;
 use crate::qwen3;
 const DEFAULT_PROMPT: &str = "\
 You are helexa-acp, a coding assistant working inside an editor.
 Working directory: {cwd}
-You have the following tools. Call them whenever the user's request
+Use the tools described below whenever the user's request involves
-involves looking at or modifying files, or running commands — do not
+looking at or modifying files, or running commands. Do not ask the
-ask the user to paste file contents you could read yourself.
+user to paste file contents you could read yourself. All file paths
-
+must be absolute. Writes and shell commands may prompt the user for
- read_file(path, line?, limit?) — Read a text file's contents.
+permission depending on the session mode.
 - write_file(path, content) — Create or overwrite a file.
 - edit_file(path, old_text, new_text) — Replace one unique substring
  in a file. Fails if old_text is not unique; call multiple times for
  multiple edits.
 - list_dir(path) — List a directory's entries.
 - bash(command, cwd?) — Run a shell command via `sh -c`. Returns
  combined stdout+stderr and the exit status.
 All file paths must be absolute. Writes and shell commands may
 prompt the user for permission depending on the session mode.
 Be concise; the user is reading your output in an editor pane.";
 /// Build the system prompt for a session.
 ///
-/// `cwd` is the session's working directory (substituted for `{cwd}`
+/// - `cwd`: session working directory (substituted for `{cwd}` in
-/// in both the default prompt and any user-supplied template).
+///   the preamble — both the default and any user-supplied template).
-/// `override_path` is the user's `system_prompt_path` (TOML) or
+/// - `override_path`: path to a user-supplied template, already
-/// `HELEXA_ACP_SYSTEM_PROMPT_PATH` (env) value, already resolved by
+///   resolved by [`crate::config::Config`]. The `# Tools` block is
-/// [`crate::config::Config`].
+///   appended *after* the user's template so a custom preamble
-pub fn build_system_prompt(cwd: &Path, override_path: Option<&Path>) -> anyhow::Result<String> {
+///   still gets the tool descriptions the model needs.
 /// - `tools`: the tools to advertise. Empty list → no `# Tools`
 ///   block is appended at all.
 pub fn build_system_prompt(
    cwd: &Path,
    override_path: Option<&Path>,
    tools: &[ToolSpec],
 ) -> anyhow::Result<String> {
    let template = match override_path {
        Some(path) => std::fs::read_to_string(path)
            .with_context(|| format!("read system prompt from {}", path.display()))?,
        None => DEFAULT_PROMPT.to_string(),
    };
-    Ok(template.replace("{cwd}", &cwd.display().to_string()))
+    let mut prompt = template.replace("{cwd}", &cwd.display().to_string());
    prompt.push_str(&qwen3::render_tool_block(tools));
    Ok(prompt)
 }
 #[cfg(test)]
@@ -57,7 +63,7 @@ mod tests {
    #[test]
    fn default_prompt_substitutes_cwd() {
-        let prompt = build_system_prompt(Path::new("/home/me/proj"), None).unwrap();
+        let prompt = build_system_prompt(Path::new("/home/me/proj"), None, &[]).unwrap();
        assert!(
            prompt.contains("/home/me/proj"),
            "cwd not interpolated: {prompt}"
@@ -67,6 +73,22 @@ mod tests {
            !prompt.contains("{cwd}"),
            "left-over placeholder in default prompt"
        );
        // With no tools, the # Tools block is absent.
        assert!(!prompt.contains("# Tools"));
    }
    #[test]
    fn tools_are_appended_in_hermes_format() {
        let spec = ToolSpec {
            name: "read_file".into(),
            description: "Read a file.".into(),
            parameters: serde_json::json!({"type":"object","properties":{}, "required":[]}),
        };
        let prompt = build_system_prompt(Path::new("/x"), None, &[spec]).unwrap();
        assert!(prompt.contains("# Tools"));
        assert!(prompt.contains("<tools>"));
        assert!(prompt.contains("\"name\":\"read_file\""));
        assert!(prompt.contains("<tool_call>"));
    }
    #[test]
@@ -78,8 +100,8 @@ mod tests {
        let path = tmp.path().to_path_buf();
        drop(tmp);
-        let prompt =
+        let prompt = build_system_prompt(Path::new("/etc"), Some(path.as_path()), &[])
-            build_system_prompt(Path::new("/etc"), Some(path.as_path())).expect("read override");
+            .expect("read override");
        assert_eq!(prompt, "custom prompt for /etc only");
        let _ = std::fs::remove_file(&path);
@@ -90,6 +112,7 @@ mod tests {
        let err = build_system_prompt(
            Path::new("/tmp"),
            Some(Path::new("/definitely/not/a/real/path")),
            &[],
        )
        .unwrap_err();
        assert!(format!("{err:#}").contains("read system prompt"));
--- a/crates/helexa-acp/src/provider/mod.rs
+++ b/crates/helexa-acp/src/provider/mod.rs
@@ -115,7 +115,14 @@ pub enum MessageContent {
 #[derive(Debug, Clone)]
 pub struct ToolCall {
-    /// Provider-assigned id that ties the call to its result.
+    /// Provider-assigned id that ties the call to its result. The
    /// Qwen3 wire format we use today doesn't carry this on the
    /// model side (calls and results are matched positionally inside
    /// a turn), so the field looks unused in the prod build — but it
    /// flows through to `MessageContent::ToolResult.tool_call_id` for
    /// history bookkeeping and a future strict-OpenAI backend will
    /// consume it directly.
    #[allow(dead_code)]
    pub id: String,
    pub name: String,
    /// JSON-encoded arguments. Kept as a string because providers
--- a/crates/helexa-acp/src/provider/openai_chat.rs
+++ b/crates/helexa-acp/src/provider/openai_chat.rs
@@ -219,19 +219,40 @@ mod tests {
            max_tokens: None,
        };
        let body = encode_request(&req);
-        // Tool defs flow through:
+        // Tool defs flow through as a courtesy to any future
        // strict-OpenAI backend; today's Qwen3 path puts them in
        // the prompt instead.
        let tools = body["tools"].as_array().unwrap();
        assert_eq!(tools[0]["function"]["name"], "read_file");
-        // Assistant tool_calls flow through:
+
        // Qwen3 wire shape for the assistant turn: tool calls are
        // inline in `content` as `<tool_call>{…}</tool_call>` blocks,
        // *not* in a structured `tool_calls` field.
        let asst = &body["messages"][0];
        assert_eq!(asst["role"], "assistant");
-        assert_eq!(asst["tool_calls"][0]["id"], "call_1");
+        assert!(
-        assert_eq!(asst["tool_calls"][0]["function"]["name"], "read_file");
+            asst.get("tool_calls").is_none(),
-        // Tool result flows through:
+            "tool_calls should not be set"
        );
        let content = asst["content"].as_str().expect("content is a string");
        assert!(
            content.starts_with("calling read_file\n<tool_call>"),
            "content was: {content}"
        );
        assert!(content.contains(r#""name":"read_file""#));
        assert!(content.contains(r#""path":"/tmp/a.txt""#));
        assert!(content.ends_with("</tool_call>"));
        // Qwen3 wire shape for the tool result: a user-role turn
        // wrapped in `<tool_response>`. No `role: "tool"`.
        let tool = &body["messages"][1];
-        assert_eq!(tool["role"], "tool");
+        assert_eq!(tool["role"], "user");
-        assert_eq!(tool["tool_call_id"], "call_1");
+        assert!(tool.get("tool_call_id").is_none());
-        assert_eq!(tool["content"], "file contents");
+        let tool_content = tool["content"].as_str().expect("content is a string");
        assert_eq!(
            tool_content,
            "<tool_response>\nfile contents\n</tool_response>"
        );
    }
    /// Build a fake eventsource stream from canned SSE `data:` lines.
@@ -275,6 +296,56 @@ mod tests {
        assert_eq!(events.len(), 4);
    }
    #[tokio::test]
    async fn decodes_qwen3_inline_tool_call_from_content_stream() {
        // Qwen3-shaped output: `<tool_call>{…}</tool_call>` inside
        // ordinary `delta.content`, split across multiple chunks at
        // arbitrary byte boundaries.
        let sse = fake_sse(vec![
            r#"{"choices":[{"delta":{"content":"sure, let me read it.\n<too"}}]}"#,
            r#"{"choices":[{"delta":{"content":"l_call>\n{\"name\":\"read_file\","}}]}"#,
            r#"{"choices":[{"delta":{"content":"\"arguments\":{\"path\":\"/etc/hostname\"}}\n</tool_call>"}}]}"#,
            r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
            "[DONE]",
        ]);
        let events: Vec<_> = decode_stream(sse, CancellationToken::new())
            .collect::<Vec<_>>()
            .await
            .into_iter()
            .map(|r| r.unwrap())
            .collect();
        // Concatenated text deltas should equal the leading prose
        // (everything before `<tool_call>`).
        let text: String = events
            .iter()
            .filter_map(|e| match e {
                CompletionEvent::TextDelta(t) => Some(t.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(text, "sure, let me read it.\n");
        // Exactly one structured tool call.
        assert!(matches!(
            events.iter().find(|e| matches!(e, CompletionEvent::ToolCallStart { .. })),
            Some(CompletionEvent::ToolCallStart { index: 0, name, .. }) if name == "read_file"
        ));
        let args: Vec<&str> = events
            .iter()
            .filter_map(|e| match e {
                CompletionEvent::ToolCallArgsDelta { args_delta, .. } => Some(args_delta.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(args.len(), 1);
        assert!(args[0].contains(r#""path":"/etc/hostname""#));
        // Finish reason still propagates.
        assert!(matches!(
            events.last(),
            Some(CompletionEvent::Finish { reason }) if reason.as_deref() == Some("stop")
        ));
    }
    #[tokio::test]
    async fn decodes_tool_call_progressively() {
        let sse = fake_sse(vec![
@@ -391,41 +462,31 @@ fn encode_message(m: &Message) -> Value {
        (Role::System, MessageContent::Text(s)) => json!({"role": "system", "content": s}),
        (Role::User, MessageContent::Text(s)) => json!({"role": "user", "content": s}),
        (Role::Assistant, MessageContent::Text(s)) => json!({"role": "assistant", "content": s}),
        // Qwen3 wire shape: assistant turns that called tools come
        // back to the model with `<tool_call>{…}</tool_call>` blocks
        // inline in `content`, *not* via the structured `tool_calls`
        // field. Using the OpenAI shape here would invisibly drop
        // the tool calls from the model's context the next round,
        // because neuron's chat template only renders `content`.
        (Role::Assistant, MessageContent::ToolCalls { text, calls }) => {
            let calls_json: Vec<Value> = calls
                .iter()
                .map(|c| {
                    json!({
                        "id": c.id,
                        "type": "function",
                        "function": {
                            "name": c.name,
                            "arguments": c.arguments,
                        }
                    })
                })
                .collect();
            json!({
                "role": "assistant",
-                "content": text.clone().unwrap_or_default(),
+                "content": crate::qwen3::render_assistant_with_tool_calls(text.as_deref(), calls),
                "tool_calls": calls_json,
            })
        }
        // Qwen3 convention: tool results live in a *user* turn
        // wrapped in `<tool_response>…</tool_response>`. The model
        // wasn't trained on a separate `role: "tool"`.
        (
            Role::Tool,
            MessageContent::ToolResult {
-                tool_call_id,
+                tool_call_id: _,
                content,
            },
        ) => json!({
-            "role": "tool",
+            "role": "user",
-            "tool_call_id": tool_call_id,
+            "content": crate::qwen3::render_tool_response(content),
            "content": content,
        }),
        // Mismatched (role, content) combinations shouldn't happen
        // — the agent constructs them in pairs. If they do, degrade
        // gracefully to a plain text turn so the request still goes
        // out rather than crashing the conversation.
        (role, content) => {
            tracing::warn!(
                ?role,
@@ -562,17 +623,25 @@ where
 {
    async_stream::stream! {
        // Track which (index) tool calls we've already announced. The
-        // OpenAI stream emits the id and name only on the first delta
+        // For structured OpenAI tool calls (the canonical wire
-        // for each tool call; later deltas just carry argument bytes.
+        // format) we still want to dedupe ToolCallStart events per
        // index — only the first chunk for a given index carries the
        // id and name. This stays alongside the qwen3 text-stream
        // parser below; backends that *do* emit structured
        // tool_calls (a future strict-OpenAI endpoint) just keep
        // working without going through the Qwen3 path.
        let mut announced: std::collections::HashSet<usize> = Default::default();
        // Qwen3 wire path: tool calls come through `delta.content` as
        // literal `<tool_call>{…}</tool_call>` blocks. The parser
        // splits content into plain-text passthrough and
        // structured tool-call events, holding back only the suffix
        // bytes that could be the start of a marker.
        let mut qwen_parser = crate::qwen3::ToolCallParser::new();
        let mut sse = Box::pin(sse);
        loop {
            tokio::select! {
                // `biased;` checks `cancel.cancelled()` first on every
                // poll — without it, a pre-cancelled token loses to a
                // ready SSE chunk, and a mid-stream cancellation could
                // still consume one more chunk before noticing.
                biased;
                _ = cancel.cancelled() => {
                    tracing::debug!("openai_chat: cancellation requested, ending stream");
@@ -606,13 +675,43 @@ where
                        if let Some(text) = choice.delta.content
                            && !text.is_empty()
                        {
-                            yield Ok(CompletionEvent::TextDelta(text));
+                            for ev in qwen_parser.feed(&text) {
                                match ev {
                                    crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
                                        yield Ok(CompletionEvent::TextDelta(t));
                                    }
                                    crate::qwen3::ParserEvent::Text(_) => {}
                                    crate::qwen3::ParserEvent::Start { index, name } => {
                                        yield Ok(CompletionEvent::ToolCallStart {
                                            index,
                                            id: format!("call_{index}"),
                                            name,
                                        });
                                    }
                                    crate::qwen3::ParserEvent::Args { index, args_json } => {
                                        yield Ok(CompletionEvent::ToolCallArgsDelta {
                                            index,
                                            args_delta: args_json,
                                        });
                                    }
                                    crate::qwen3::ParserEvent::Malformed { raw } => {
                                        tracing::warn!(raw = %raw, "qwen3: malformed <tool_call> block; passing through as text");
                                        yield Ok(CompletionEvent::TextDelta(format!(
                                            "<tool_call>{raw}</tool_call>"
                                        )));
                                    }
                                }
                            }
                        }
                        if let Some(reasoning) = choice.delta.reasoning_content
                            && !reasoning.is_empty()
                        {
                            yield Ok(CompletionEvent::ReasoningDelta(reasoning));
                        }
                        // Pass-through for backends that *do* emit
                        // structured tool_calls (a future strict
                        // OpenAI endpoint). Today cortex never
                        // populates this, so this branch stays cold.
                        for tc in choice.delta.tool_calls {
                            let idx = tc.index;
                            if announced.insert(idx) {
@@ -639,6 +738,36 @@ where
                            }
                        }
                        if let Some(reason) = choice.finish_reason {
                            // Flush any tail bytes from the qwen
                            // parser before announcing the finish so
                            // the agent's stop-reason logic sees the
                            // complete picture (in particular, any
                            // trailing <tool_call> block that
                            // arrived without a close tag).
                            for ev in qwen_parser.finish() {
                                match ev {
                                    crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
                                        yield Ok(CompletionEvent::TextDelta(t));
                                    }
                                    crate::qwen3::ParserEvent::Text(_) => {}
                                    crate::qwen3::ParserEvent::Start { index, name } => {
                                        yield Ok(CompletionEvent::ToolCallStart {
                                            index,
                                            id: format!("call_{index}"),
                                            name,
                                        });
                                    }
                                    crate::qwen3::ParserEvent::Args { index, args_json } => {
                                        yield Ok(CompletionEvent::ToolCallArgsDelta {
                                            index,
                                            args_delta: args_json,
                                        });
                                    }
                                    crate::qwen3::ParserEvent::Malformed { raw } => {
                                        tracing::warn!(raw = %raw, "qwen3: unterminated <tool_call> at stream end");
                                    }
                                }
                            }
                            yield Ok(CompletionEvent::Finish { reason: Some(reason) });
                        }
                    }
--- a/crates/helexa-acp/src/qwen3.rs
+++ b/crates/helexa-acp/src/qwen3.rs
@@ -0,0 +1,622 @@
 //! Qwen3 tool-call wire conventions.
 //!
 //! Qwen3 (and the Hermes-derived chat templates it inherits) wires tool
 //! use entirely through the prompt and the model's text output —
 //! nothing on the server cares about the OpenAI `tools` API field.
 //! This module owns both sides of that convention so the rest of
 //! helexa-acp can stay generic.
 //!
 //! **System prompt** — a `# Tools` block is appended to the system
 //! message describing every available function. Models trained on
 //! this template recognise it and emit calls as
 //! `<tool_call>{"name":"…","arguments":{…}}</tool_call>` inside the
 //! normal content stream.
 //!
 //! **Streaming parse** — [`ToolCallParser`] is a small state machine
 //! fed SSE content chunks. It emits a sequence of
 //! [`ParserEvent`]s — plain text outside tool calls; `Start` + `Args`
 //! events for each `<tool_call>` block. Marker detection is split-safe:
 //! a chunk that ends with `<tool` is buffered until the next chunk
 //! arrives, so even a one-byte-at-a-time stream produces the same
 //! events as a single-buffer reparse would.
 //!
 //! **Multi-turn replay** — when helexa-acp re-sends the conversation
 //! after a tool dispatch, the assistant turn that called the tool and
 //! the tool result need to go back to the model in Qwen3 wire shape:
 //! the assistant turn carries `<tool_call>` blocks inline in its
 //! content, and the tool result rides in a user turn wrapped in
 //! `<tool_response>…</tool_response>`. [`render_assistant_with_tool_calls`]
 //! and [`render_tool_response`] handle those.
 use serde_json::json;
 use crate::provider::{ToolCall, ToolSpec};
 /// One opening marker. Length 11.
 const TOOL_CALL_OPEN: &str = "<tool_call>";
 /// One closing marker. Length 12.
 const TOOL_CALL_CLOSE: &str = "</tool_call>";
 // ── System-prompt-side rendering ────────────────────────────────────
 /// Append-this-to-the-system-prompt block describing the available
 /// tools in Qwen3's expected format. Returns the empty string if
 /// `tools` is empty (no separator, no `# Tools` header — keeps the
 /// prompt clean when tools are absent for any reason).
 pub fn render_tool_block(tools: &[ToolSpec]) -> String {
    if tools.is_empty() {
        return String::new();
    }
    let mut out = String::new();
    out.push_str("\n\n# Tools\n\n");
    out.push_str(
        "You may call one or more functions to assist with the user query.\n\n\
         You are provided with function signatures within <tools></tools> XML tags:\n",
    );
    out.push_str("<tools>\n");
    for spec in tools {
        // Each entry is one JSON object on its own line — newline-
        // delimited, no commas between entries. This is the format
        // Qwen3's training tokenisation expects.
        let entry = json!({
            "type": "function",
            "function": {
                "name": spec.name,
                "description": spec.description,
                "parameters": spec.parameters,
            }
        });
        out.push_str(&serde_json::to_string(&entry).unwrap_or_default());
        out.push('\n');
    }
    out.push_str("</tools>\n\n");
    out.push_str(
        "For each function call, return a json object with function name \
         and arguments within <tool_call></tool_call> XML tags:\n\
         <tool_call>\n\
         {\"name\": <function-name>, \"arguments\": <args-json-object>}\n\
         </tool_call>",
    );
    out
 }
 // ── Multi-turn replay rendering ─────────────────────────────────────
 /// Build the assistant-turn content the model expects when we replay
 /// a turn that included tool calls. Format: any visible text first,
 /// then one `<tool_call>{json}</tool_call>` block per call, joined by
 /// newlines.
 pub fn render_assistant_with_tool_calls(text: Option<&str>, calls: &[ToolCall]) -> String {
    let mut out = String::new();
    if let Some(t) = text
        && !t.is_empty()
    {
        out.push_str(t);
        if !calls.is_empty() {
            out.push('\n');
        }
    }
    for (i, call) in calls.iter().enumerate() {
        if i > 0 {
            out.push('\n');
        }
        // The arguments field on a `ToolCall` is a JSON-encoded
        // string; we want it inlined as an object inside the
        // tool_call body. Best-effort parse; if it isn't valid JSON,
        // pass the raw string through wrapped in quotes so the
        // emission stays well-formed.
        let args_value: serde_json::Value = serde_json::from_str(&call.arguments)
            .unwrap_or_else(|_| serde_json::Value::String(call.arguments.clone()));
        let body = json!({ "name": call.name, "arguments": args_value });
        out.push_str(TOOL_CALL_OPEN);
        out.push('\n');
        out.push_str(&serde_json::to_string(&body).unwrap_or_default());
        out.push('\n');
        out.push_str(TOOL_CALL_CLOSE);
    }
    out
 }
 /// Wrap a tool-result string in the Qwen3 `<tool_response>` block
 /// that goes inside a `user` role message on the next turn.
 pub fn render_tool_response(content: &str) -> String {
    format!("<tool_response>\n{content}\n</tool_response>")
 }
 // ── Streaming parser ────────────────────────────────────────────────
 /// Events produced by [`ToolCallParser`]. Distinct from the
 /// `CompletionEvent` enum because the parser is provider-agnostic —
 /// the caller decides how to translate these into
 /// `CompletionEvent::ToolCall*` and `TextDelta`.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum ParserEvent {
    /// Plain text that lives outside any tool_call block.
    Text(String),
    /// Beginning of a tool call. The index increments per call within
    /// the same parser lifetime.
    Start { index: usize, name: String },
    /// JSON-encoded arguments for the most recent `Start`. Always
    /// follows its `Start` immediately; never split across multiple
    /// `Args` events for a single call (the parser buffers the whole
    /// `<tool_call>` body before emitting).
    Args { index: usize, args_json: String },
    /// Parser encountered a malformed `<tool_call>` body. Emitted so
    /// the agent can log and continue rather than crashing the
    /// conversation.
    Malformed { raw: String },
 }
 /// Streaming parser for Qwen3 tool calls embedded in the model's text
 /// output. Feed it chunks via [`feed`](Self::feed); call
 /// [`finish`](Self::finish) at end-of-stream to drain any trailing
 /// buffered bytes.
 ///
 /// Design notes:
 ///
 /// - Markers (`<tool_call>` / `</tool_call>`) can be split across
 ///   chunks at any byte. The parser holds back exactly as much suffix
 ///   as could be the start of the marker it's currently looking for,
 ///   and no more.
 /// - JSON inside a tool_call is held in a separate buffer until the
 ///   closing marker arrives. We don't try to stream-parse JSON; the
 ///   bodies are tiny (one function call) and assembling first
 ///   yields a much simpler implementation.
 /// - Index is monotonic across the parser's lifetime — one
 ///   conversation turn can contain multiple `<tool_call>` blocks and
 ///   each gets its own index.
 #[derive(Debug, Default)]
 pub struct ToolCallParser {
    /// Unprocessed input bytes carried over between feeds.
    buffer: String,
    /// True while we're between `<tool_call>` and `</tool_call>`.
    in_tool_call: bool,
    /// Bytes accumulated inside the current `<tool_call>` block.
    tool_call_buf: String,
    /// Next tool-call index to assign.
    next_index: usize,
 }
 impl ToolCallParser {
    pub fn new() -> Self {
        Self::default()
    }
    pub fn feed(&mut self, chunk: &str) -> Vec<ParserEvent> {
        self.buffer.push_str(chunk);
        self.drain()
    }
    /// End-of-stream: emit anything still in the buffers. An
    /// unterminated tool_call is reported as `Malformed` so the
    /// caller can decide what to surface to the user.
    pub fn finish(&mut self) -> Vec<ParserEvent> {
        let mut events = self.drain();
        if self.in_tool_call {
            let raw = std::mem::take(&mut self.tool_call_buf) + &std::mem::take(&mut self.buffer);
            events.push(ParserEvent::Malformed { raw });
            self.in_tool_call = false;
        } else if !self.buffer.is_empty() {
            events.push(ParserEvent::Text(std::mem::take(&mut self.buffer)));
        }
        events
    }
    fn drain(&mut self) -> Vec<ParserEvent> {
        let mut events = Vec::new();
        loop {
            if self.in_tool_call {
                if let Some(end) = self.buffer.find(TOOL_CALL_CLOSE) {
                    let body = &self.buffer[..end];
                    self.tool_call_buf.push_str(body);
                    self.buffer.drain(..end + TOOL_CALL_CLOSE.len());
                    self.emit_completed_tool_call(&mut events);
                    self.in_tool_call = false;
                } else {
                    // Hold back exactly the suffix that could be the
                    // start of `</tool_call>`. Everything before it
                    // is safely part of the call body.
                    let hold = longest_marker_prefix_suffix(&self.buffer, TOOL_CALL_CLOSE);
                    let safe = self.buffer.len() - hold;
                    if safe > 0 {
                        self.tool_call_buf.push_str(&self.buffer[..safe]);
                        self.buffer.drain(..safe);
                    }
                    return events;
                }
            } else if let Some(start) = self.buffer.find(TOOL_CALL_OPEN) {
                let text = &self.buffer[..start];
                if !text.is_empty() {
                    events.push(ParserEvent::Text(text.to_string()));
                }
                self.buffer.drain(..start + TOOL_CALL_OPEN.len());
                self.in_tool_call = true;
            } else {
                let hold = longest_marker_prefix_suffix(&self.buffer, TOOL_CALL_OPEN);
                let safe = self.buffer.len() - hold;
                if safe > 0 {
                    let text: String = self.buffer.drain(..safe).collect();
                    events.push(ParserEvent::Text(text));
                }
                return events;
            }
        }
    }
    fn emit_completed_tool_call(&mut self, events: &mut Vec<ParserEvent>) {
        let body = std::mem::take(&mut self.tool_call_buf);
        let trimmed = body.trim();
        let parsed: Result<ToolCallBody, _> = serde_json::from_str(trimmed);
        match parsed {
            Ok(call) => {
                let index = self.next_index;
                self.next_index += 1;
                let name = call.name;
                let args_json =
                    serde_json::to_string(&call.arguments).unwrap_or_else(|_| "{}".to_string());
                events.push(ParserEvent::Start { index, name });
                events.push(ParserEvent::Args { index, args_json });
            }
            Err(_) => {
                events.push(ParserEvent::Malformed { raw: body });
            }
        }
    }
 }
 /// Returns the length of the longest suffix of `haystack` that is a
 /// proper prefix of `needle`. Used to decide how many trailing bytes
 /// to hold back when scanning for `needle`: anything that could
 /// possibly be the start of `needle` is held; everything else is
 /// safe to emit.
 fn longest_marker_prefix_suffix(haystack: &str, needle: &str) -> usize {
    // Try prefixes of needle from longest to shortest; the first one
    // that matches as a suffix of haystack wins. O(|needle|^2) which
    // is fine — both markers are < 20 chars.
    let max = needle.len().min(haystack.len());
    for n in (1..=max).rev() {
        if !haystack.is_char_boundary(haystack.len() - n) || !needle.is_char_boundary(n) {
            continue;
        }
        if haystack.ends_with(&needle[..n]) {
            return n;
        }
    }
    0
 }
 #[derive(Debug, serde::Deserialize)]
 struct ToolCallBody {
    name: String,
    // The model is supposed to emit a JSON object here; in practice
    // some Qwen3 variants stringify it. Deserialize-as-value handles
    // both.
    #[serde(default)]
    arguments: serde_json::Value,
 }
 // ── Tests ───────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    use serde_json::json;
    fn tool(name: &str) -> ToolSpec {
        ToolSpec {
            name: name.to_string(),
            description: format!("desc of {name}"),
            parameters: json!({
                "type": "object",
                "properties": {"path": {"type": "string"}},
                "required": ["path"]
            }),
        }
    }
    // ── render_tool_block ───────────────────────────────────────────
    #[test]
    fn empty_tools_renders_empty() {
        assert_eq!(render_tool_block(&[]), "");
    }
    #[test]
    fn tool_block_contains_hermes_markers_and_each_function() {
        let block = render_tool_block(&[tool("read_file"), tool("write_file")]);
        assert!(block.contains("# Tools"));
        assert!(block.contains("<tools>"));
        assert!(block.contains("</tools>"));
        assert!(block.contains("\"name\":\"read_file\""));
        assert!(block.contains("\"name\":\"write_file\""));
        assert!(block.contains("<tool_call>"));
        assert!(block.contains("</tool_call>"));
    }
    // ── render_assistant_with_tool_calls ────────────────────────────
    #[test]
    fn renders_pure_text_when_no_calls() {
        let out = render_assistant_with_tool_calls(Some("hi"), &[]);
        assert_eq!(out, "hi");
    }
    #[test]
    fn renders_text_then_tool_call_block() {
        let calls = vec![ToolCall {
            id: "call_0".into(),
            name: "read_file".into(),
            arguments: r#"{"path":"/etc/hostname"}"#.into(),
        }];
        let out = render_assistant_with_tool_calls(Some("reading"), &calls);
        assert!(out.starts_with("reading\n<tool_call>"));
        assert!(out.contains(r#""name":"read_file""#));
        assert!(out.contains(r#""path":"/etc/hostname""#));
        assert!(out.ends_with("</tool_call>"));
    }
    #[test]
    fn multiple_calls_separated_by_newlines() {
        let calls = vec![
            ToolCall {
                id: "call_0".into(),
                name: "a".into(),
                arguments: "{}".into(),
            },
            ToolCall {
                id: "call_1".into(),
                name: "b".into(),
                arguments: "{}".into(),
            },
        ];
        let out = render_assistant_with_tool_calls(None, &calls);
        assert_eq!(out.matches("<tool_call>").count(), 2);
        assert_eq!(out.matches("</tool_call>").count(), 2);
    }
    #[test]
    fn invalid_arguments_json_is_wrapped_as_string() {
        let calls = vec![ToolCall {
            id: "call_0".into(),
            name: "x".into(),
            arguments: "not even json".into(),
        }];
        let out = render_assistant_with_tool_calls(None, &calls);
        // Wrapped as JSON string rather than breaking the envelope.
        assert!(out.contains(r#""arguments":"not even json""#));
    }
    // ── render_tool_response ────────────────────────────────────────
    #[test]
    fn tool_response_wraps_content() {
        let out = render_tool_response("hello world");
        assert_eq!(out, "<tool_response>\nhello world\n</tool_response>");
    }
    // ── longest_marker_prefix_suffix ────────────────────────────────
    #[test]
    fn marker_prefix_suffix_returns_longest_match() {
        assert_eq!(longest_marker_prefix_suffix("foo<tool", "<tool_call>"), 5);
        assert_eq!(longest_marker_prefix_suffix("foo<", "<tool_call>"), 1);
        assert_eq!(longest_marker_prefix_suffix("foo<bar", "<tool_call>"), 0);
        assert_eq!(longest_marker_prefix_suffix("foo", "<tool_call>"), 0);
        assert_eq!(longest_marker_prefix_suffix("", "<tool_call>"), 0);
        // Exact prefix length matches.
        assert_eq!(
            longest_marker_prefix_suffix("foo<tool_call", "<tool_call>"),
            10
        );
    }
    // ── ToolCallParser ──────────────────────────────────────────────
    fn drive(parser: &mut ToolCallParser, chunks: &[&str]) -> Vec<ParserEvent> {
        let mut events = Vec::new();
        for c in chunks {
            events.extend(parser.feed(c));
        }
        events.extend(parser.finish());
        events
    }
    #[test]
    fn plain_text_passes_through() {
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &["hello ", "world"]);
        assert_eq!(events.len(), 2);
        assert_eq!(events[0], ParserEvent::Text("hello ".to_string()));
        assert_eq!(events[1], ParserEvent::Text("world".to_string()));
    }
    #[test]
    fn single_complete_tool_call() {
        let mut p = ToolCallParser::new();
        let input =
            r#"before <tool_call>{"name":"read_file","arguments":{"path":"/x"}}</tool_call> after"#;
        let events = drive(&mut p, &[input]);
        // "before " (text) → Start → Args → " after" (text)
        assert_eq!(events[0], ParserEvent::Text("before ".to_string()));
        assert!(matches!(
            &events[1],
            ParserEvent::Start { index: 0, name } if name == "read_file"
        ));
        assert!(matches!(
            &events[2],
            ParserEvent::Args { index: 0, args_json } if args_json.contains(r#""path":"/x""#)
        ));
        assert_eq!(events[3], ParserEvent::Text(" after".to_string()));
    }
    #[test]
    fn open_marker_split_across_chunks_is_buffered() {
        // The chunk boundary lands inside the opening marker.
        let chunks = [
            "before <",
            "tool_call>",
            r#"{"name":"a","arguments":{}}"#,
            "</tool_call> after",
        ];
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &chunks);
        // Despite the split, we get exactly:
        // - "before " as text (the "<" suffix was held)
        // - Start { name: "a" }
        // - Args
        // - " after"
        let texts: Vec<&str> = events
            .iter()
            .filter_map(|e| match e {
                ParserEvent::Text(t) => Some(t.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(texts.join(""), "before  after");
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "a"))
        );
        assert!(events.iter().any(|e| matches!(e, ParserEvent::Args { .. })));
    }
    #[test]
    fn close_marker_split_across_chunks() {
        let chunks = [
            r#"<tool_call>{"name":"a","arguments":{}}<"#,
            "/tool_",
            "call>tail",
        ];
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &chunks);
        // Tail should arrive as text after the call is fully parsed.
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "a"))
        );
        let last_text = events.iter().rev().find_map(|e| match e {
            ParserEvent::Text(t) => Some(t.as_str()),
            _ => None,
        });
        assert_eq!(last_text, Some("tail"));
    }
    #[test]
    fn one_byte_at_a_time_produces_same_events_as_one_chunk() {
        let input = r#"a<tool_call>{"name":"f","arguments":{"k":1}}</tool_call>b"#;
        let mut single = ToolCallParser::new();
        let single_events = drive(&mut single, &[input]);
        let chunks: Vec<String> = input.chars().map(|c| c.to_string()).collect();
        let chunk_refs: Vec<&str> = chunks.iter().map(|s| s.as_str()).collect();
        let mut byte = ToolCallParser::new();
        let byte_events = drive(&mut byte, &chunk_refs);
        // Concatenated text equals on both paths.
        let text = |evs: &[ParserEvent]| -> String {
            evs.iter()
                .filter_map(|e| match e {
                    ParserEvent::Text(t) => Some(t.as_str()),
                    _ => None,
                })
                .collect()
        };
        assert_eq!(text(&single_events), text(&byte_events));
        // Both paths see exactly one Start and one Args, with the
        // same name and arguments payload.
        let starts: Vec<&str> = byte_events
            .iter()
            .filter_map(|e| match e {
                ParserEvent::Start { name, .. } => Some(name.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(starts, vec!["f"]);
        let args: Vec<&str> = byte_events
            .iter()
            .filter_map(|e| match e {
                ParserEvent::Args { args_json, .. } => Some(args_json.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(args.len(), 1);
        assert!(args[0].contains(r#""k":1"#));
    }
    #[test]
    fn multiple_tool_calls_get_distinct_indices() {
        let input = concat!(
            "lead ",
            r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#,
            " mid ",
            r#"<tool_call>{"name":"b","arguments":{}}</tool_call>"#,
            " tail",
        );
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &[input]);
        let starts: Vec<(usize, String)> = events
            .iter()
            .filter_map(|e| match e {
                ParserEvent::Start { index, name } => Some((*index, name.clone())),
                _ => None,
            })
            .collect();
        assert_eq!(starts, vec![(0, "a".into()), (1, "b".into())]);
    }
    #[test]
    fn malformed_tool_call_does_not_crash() {
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &[r#"x<tool_call>not valid json</tool_call>y"#]);
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Malformed { .. }))
        );
        // Bracketing text still flows.
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Text(t) if t == "x"))
        );
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Text(t) if t == "y"))
        );
    }
    #[test]
    fn unterminated_tool_call_is_reported_on_finish() {
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &[r#"x<tool_call>{"name":"a""#]);
        assert!(
            events
                .iter()
                .any(|e| matches!(e, ParserEvent::Malformed { .. }))
        );
    }
    #[test]
    fn quoted_lt_inside_args_does_not_trigger_marker() {
        // Sanity: a string value that happens to contain "<tool" is
        // not a marker. (Our marker search is on the literal byte
        // sequence "<tool_call>" / "</tool_call>", so this would
        // only break if a literal "</tool_call>" appeared in args
        // — which the model has no reason to emit.)
        let input = r#"<tool_call>{"name":"f","arguments":{"q":"why <tool emit?"}}</tool_call>"#;
        let mut p = ToolCallParser::new();
        let events = drive(&mut p, &[input]);
        let starts: Vec<&str> = events
            .iter()
            .filter_map(|e| match e {
                ParserEvent::Start { name, .. } => Some(name.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(starts, vec!["f"]);
    }
 }