diff --git a/crates/helexa-acp/src/agent.rs b/crates/helexa-acp/src/agent.rs
index 4e7b970..189a697 100644
--- a/crates/helexa-acp/src/agent.rs
+++ b/crates/helexa-acp/src/agent.rs
@@ -322,7 +322,8 @@ async fn drive_prompt(
         )
     };
 
-    let system_prompt = build_system_prompt(&cwd, inner.system_prompt_path.as_deref())
+    let tool_specs = tools::all_tools();
+    let system_prompt = build_system_prompt(&cwd, inner.system_prompt_path.as_deref(), &tool_specs)
         .map_err(|e| anyhow::anyhow!("build system prompt: {e:#}"))?;
 
     let (provider, local_model) =
@@ -361,7 +362,6 @@ async fn drive_prompt(
     // future prompts see them.
     let mut new_turns: Vec<Message> = Vec::new();
 
-    let tool_specs = tools::all_tools();
     let mut stop_reason = StopReason::EndTurn;
 
     for round in 0..MAX_TOOL_ROUNDS {
@@ -370,10 +370,15 @@ async fn drive_prompt(
             break;
         }
 
+        // Tool descriptions reach the model via the Qwen3 `# Tools`
+        // block in the system prompt, not via the OpenAI `tools`
+        // request field — cortex/neuron pass that field through to
+        // the encoder unread, and including it would double-describe
+        // tools once a strict-OpenAI backend lands. Leave empty.
         let completion_req = CompletionRequest {
             model: local_model.clone(),
             messages: messages.clone(),
-            tools: tool_specs.clone(),
+            tools: vec![],
             temperature: None,
             top_p: None,
             max_tokens: None,
diff --git a/crates/helexa-acp/src/main.rs b/crates/helexa-acp/src/main.rs
index 9ca9523..c819a58 100644
--- a/crates/helexa-acp/src/main.rs
+++ b/crates/helexa-acp/src/main.rs
@@ -19,6 +19,7 @@ mod agent;
 mod config;
 mod prompt;
 mod provider;
+mod qwen3;
 mod session;
 mod tool_runner;
 mod tools;
diff --git a/crates/helexa-acp/src/prompt.rs b/crates/helexa-acp/src/prompt.rs
index 2613366..4b9b1b3 100644
--- a/crates/helexa-acp/src/prompt.rs
+++ b/crates/helexa-acp/src/prompt.rs
@@ -1,53 +1,59 @@
 //! System prompt assembly.
 //!
-//! The built-in prompt tells the model the working directory and
-//! enumerates the tools it actually has — without this, models trained
-//! to "be safe when you don't know your environment" tend to refuse
-//! tool use and ask the user to paste content instead. Override with
-//! `HELEXA_ACP_SYSTEM_PROMPT_PATH` (env) or `system_prompt_path`
-//! (TOML); the literal token `{cwd}` in a user-supplied file is
-//! substituted with the session's working directory.
+//! The system message has two parts:
+//!
+//! 1. A short human-readable preamble (working directory, style
+//!    instructions). Either the built-in [`DEFAULT_PROMPT`] or a
+//!    user-supplied file at `HELEXA_ACP_SYSTEM_PROMPT_PATH` /
+//!    `system_prompt_path`. `{cwd}` is substituted in both.
+//! 2. A `# Tools` block in Qwen3 Hermes format (see [`crate::qwen3`])
+//!    describing the available functions. This is what makes the
+//!    model actually call them — neuron/cortex don't honour the
+//!    OpenAI `tools` API field, so the tool list has to live in the
+//!    prompt itself.
 
 use anyhow::Context;
 use std::path::Path;
 
+use crate::provider::ToolSpec;
+use crate::qwen3;
+
 const DEFAULT_PROMPT: &str = "\
 You are helexa-acp, a coding assistant working inside an editor.
 
 Working directory: {cwd}
 
-You have the following tools. Call them whenever the user's request
-involves looking at or modifying files, or running commands — do not
-ask the user to paste file contents you could read yourself.
-
-- read_file(path, line?, limit?) — Read a text file's contents.
-- write_file(path, content) — Create or overwrite a file.
-- edit_file(path, old_text, new_text) — Replace one unique substring
-  in a file. Fails if old_text is not unique; call multiple times for
-  multiple edits.
-- list_dir(path) — List a directory's entries.
-- bash(command, cwd?) — Run a shell command via `sh -c`. Returns
-  combined stdout+stderr and the exit status.
-
-All file paths must be absolute. Writes and shell commands may
-prompt the user for permission depending on the session mode.
+Use the tools described below whenever the user's request involves
+looking at or modifying files, or running commands. Do not ask the
+user to paste file contents you could read yourself. All file paths
+must be absolute. Writes and shell commands may prompt the user for
+permission depending on the session mode.
 
 Be concise; the user is reading your output in an editor pane.";
 
 /// Build the system prompt for a session.
 ///
-/// `cwd` is the session's working directory (substituted for `{cwd}`
-/// in both the default prompt and any user-supplied template).
-/// `override_path` is the user's `system_prompt_path` (TOML) or
-/// `HELEXA_ACP_SYSTEM_PROMPT_PATH` (env) value, already resolved by
-/// [`crate::config::Config`].
-pub fn build_system_prompt(cwd: &Path, override_path: Option<&Path>) -> anyhow::Result<String> {
+/// - `cwd`: session working directory (substituted for `{cwd}` in
+///   the preamble — both the default and any user-supplied template).
+/// - `override_path`: path to a user-supplied template, already
+///   resolved by [`crate::config::Config`]. The `# Tools` block is
+///   appended *after* the user's template so a custom preamble
+///   still gets the tool descriptions the model needs.
+/// - `tools`: the tools to advertise. Empty list → no `# Tools`
+///   block is appended at all.
+pub fn build_system_prompt(
+    cwd: &Path,
+    override_path: Option<&Path>,
+    tools: &[ToolSpec],
+) -> anyhow::Result<String> {
     let template = match override_path {
         Some(path) => std::fs::read_to_string(path)
             .with_context(|| format!("read system prompt from {}", path.display()))?,
         None => DEFAULT_PROMPT.to_string(),
     };
-    Ok(template.replace("{cwd}", &cwd.display().to_string()))
+    let mut prompt = template.replace("{cwd}", &cwd.display().to_string());
+    prompt.push_str(&qwen3::render_tool_block(tools));
+    Ok(prompt)
 }
 
 #[cfg(test)]
@@ -57,7 +63,7 @@ mod tests {
 
     #[test]
     fn default_prompt_substitutes_cwd() {
-        let prompt = build_system_prompt(Path::new("/home/me/proj"), None).unwrap();
+        let prompt = build_system_prompt(Path::new("/home/me/proj"), None, &[]).unwrap();
         assert!(
             prompt.contains("/home/me/proj"),
             "cwd not interpolated: {prompt}"
@@ -67,6 +73,22 @@ mod tests {
             !prompt.contains("{cwd}"),
             "left-over placeholder in default prompt"
         );
+        // With no tools, the # Tools block is absent.
+        assert!(!prompt.contains("# Tools"));
+    }
+
+    #[test]
+    fn tools_are_appended_in_hermes_format() {
+        let spec = ToolSpec {
+            name: "read_file".into(),
+            description: "Read a file.".into(),
+            parameters: serde_json::json!({"type":"object","properties":{}, "required":[]}),
+        };
+        let prompt = build_system_prompt(Path::new("/x"), None, &[spec]).unwrap();
+        assert!(prompt.contains("# Tools"));
+        assert!(prompt.contains("<tools>"));
+        assert!(prompt.contains("\"name\":\"read_file\""));
+        assert!(prompt.contains("<tool_call>"));
     }
 
     #[test]
@@ -78,8 +100,8 @@ mod tests {
         let path = tmp.path().to_path_buf();
         drop(tmp);
 
-        let prompt =
-            build_system_prompt(Path::new("/etc"), Some(path.as_path())).expect("read override");
+        let prompt = build_system_prompt(Path::new("/etc"), Some(path.as_path()), &[])
+            .expect("read override");
         assert_eq!(prompt, "custom prompt for /etc only");
 
         let _ = std::fs::remove_file(&path);
@@ -90,6 +112,7 @@ mod tests {
         let err = build_system_prompt(
             Path::new("/tmp"),
             Some(Path::new("/definitely/not/a/real/path")),
+            &[],
         )
         .unwrap_err();
         assert!(format!("{err:#}").contains("read system prompt"));
diff --git a/crates/helexa-acp/src/provider/mod.rs b/crates/helexa-acp/src/provider/mod.rs
index 7aba5e6..3335058 100644
--- a/crates/helexa-acp/src/provider/mod.rs
+++ b/crates/helexa-acp/src/provider/mod.rs
@@ -115,7 +115,14 @@ pub enum MessageContent {
 
 #[derive(Debug, Clone)]
 pub struct ToolCall {
-    /// Provider-assigned id that ties the call to its result.
+    /// Provider-assigned id that ties the call to its result. The
+    /// Qwen3 wire format we use today doesn't carry this on the
+    /// model side (calls and results are matched positionally inside
+    /// a turn), so the field looks unused in the prod build — but it
+    /// flows through to `MessageContent::ToolResult.tool_call_id` for
+    /// history bookkeeping and a future strict-OpenAI backend will
+    /// consume it directly.
+    #[allow(dead_code)]
     pub id: String,
     pub name: String,
     /// JSON-encoded arguments. Kept as a string because providers
diff --git a/crates/helexa-acp/src/provider/openai_chat.rs b/crates/helexa-acp/src/provider/openai_chat.rs
index 0102f6e..77c33d2 100644
--- a/crates/helexa-acp/src/provider/openai_chat.rs
+++ b/crates/helexa-acp/src/provider/openai_chat.rs
@@ -219,19 +219,40 @@ mod tests {
             max_tokens: None,
         };
         let body = encode_request(&req);
-        // Tool defs flow through:
+        // Tool defs flow through as a courtesy to any future
+        // strict-OpenAI backend; today's Qwen3 path puts them in
+        // the prompt instead.
         let tools = body["tools"].as_array().unwrap();
         assert_eq!(tools[0]["function"]["name"], "read_file");
-        // Assistant tool_calls flow through:
+
+        // Qwen3 wire shape for the assistant turn: tool calls are
+        // inline in `content` as `<tool_call>{…}</tool_call>` blocks,
+        // *not* in a structured `tool_calls` field.
         let asst = &body["messages"][0];
         assert_eq!(asst["role"], "assistant");
-        assert_eq!(asst["tool_calls"][0]["id"], "call_1");
-        assert_eq!(asst["tool_calls"][0]["function"]["name"], "read_file");
-        // Tool result flows through:
+        assert!(
+            asst.get("tool_calls").is_none(),
+            "tool_calls should not be set"
+        );
+        let content = asst["content"].as_str().expect("content is a string");
+        assert!(
+            content.starts_with("calling read_file\n<tool_call>"),
+            "content was: {content}"
+        );
+        assert!(content.contains(r#""name":"read_file""#));
+        assert!(content.contains(r#""path":"/tmp/a.txt""#));
+        assert!(content.ends_with("</tool_call>"));
+
+        // Qwen3 wire shape for the tool result: a user-role turn
+        // wrapped in `<tool_response>`. No `role: "tool"`.
         let tool = &body["messages"][1];
-        assert_eq!(tool["role"], "tool");
-        assert_eq!(tool["tool_call_id"], "call_1");
-        assert_eq!(tool["content"], "file contents");
+        assert_eq!(tool["role"], "user");
+        assert!(tool.get("tool_call_id").is_none());
+        let tool_content = tool["content"].as_str().expect("content is a string");
+        assert_eq!(
+            tool_content,
+            "<tool_response>\nfile contents\n</tool_response>"
+        );
     }
 
     /// Build a fake eventsource stream from canned SSE `data:` lines.
@@ -275,6 +296,56 @@ mod tests {
         assert_eq!(events.len(), 4);
     }
 
+    #[tokio::test]
+    async fn decodes_qwen3_inline_tool_call_from_content_stream() {
+        // Qwen3-shaped output: `<tool_call>{…}</tool_call>` inside
+        // ordinary `delta.content`, split across multiple chunks at
+        // arbitrary byte boundaries.
+        let sse = fake_sse(vec![
+            r#"{"choices":[{"delta":{"content":"sure, let me read it.\n<too"}}]}"#,
+            r#"{"choices":[{"delta":{"content":"l_call>\n{\"name\":\"read_file\","}}]}"#,
+            r#"{"choices":[{"delta":{"content":"\"arguments\":{\"path\":\"/etc/hostname\"}}\n</tool_call>"}}]}"#,
+            r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
+            "[DONE]",
+        ]);
+        let events: Vec<_> = decode_stream(sse, CancellationToken::new())
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .map(|r| r.unwrap())
+            .collect();
+
+        // Concatenated text deltas should equal the leading prose
+        // (everything before `<tool_call>`).
+        let text: String = events
+            .iter()
+            .filter_map(|e| match e {
+                CompletionEvent::TextDelta(t) => Some(t.as_str()),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(text, "sure, let me read it.\n");
+        // Exactly one structured tool call.
+        assert!(matches!(
+            events.iter().find(|e| matches!(e, CompletionEvent::ToolCallStart { .. })),
+            Some(CompletionEvent::ToolCallStart { index: 0, name, .. }) if name == "read_file"
+        ));
+        let args: Vec<&str> = events
+            .iter()
+            .filter_map(|e| match e {
+                CompletionEvent::ToolCallArgsDelta { args_delta, .. } => Some(args_delta.as_str()),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(args.len(), 1);
+        assert!(args[0].contains(r#""path":"/etc/hostname""#));
+        // Finish reason still propagates.
+        assert!(matches!(
+            events.last(),
+            Some(CompletionEvent::Finish { reason }) if reason.as_deref() == Some("stop")
+        ));
+    }
+
     #[tokio::test]
     async fn decodes_tool_call_progressively() {
         let sse = fake_sse(vec![
@@ -391,41 +462,31 @@ fn encode_message(m: &Message) -> Value {
         (Role::System, MessageContent::Text(s)) => json!({"role": "system", "content": s}),
         (Role::User, MessageContent::Text(s)) => json!({"role": "user", "content": s}),
         (Role::Assistant, MessageContent::Text(s)) => json!({"role": "assistant", "content": s}),
+        // Qwen3 wire shape: assistant turns that called tools come
+        // back to the model with `<tool_call>{…}</tool_call>` blocks
+        // inline in `content`, *not* via the structured `tool_calls`
+        // field. Using the OpenAI shape here would invisibly drop
+        // the tool calls from the model's context the next round,
+        // because neuron's chat template only renders `content`.
         (Role::Assistant, MessageContent::ToolCalls { text, calls }) => {
-            let calls_json: Vec<Value> = calls
-                .iter()
-                .map(|c| {
-                    json!({
-                        "id": c.id,
-                        "type": "function",
-                        "function": {
-                            "name": c.name,
-                            "arguments": c.arguments,
-                        }
-                    })
-                })
-                .collect();
             json!({
                 "role": "assistant",
-                "content": text.clone().unwrap_or_default(),
-                "tool_calls": calls_json,
+                "content": crate::qwen3::render_assistant_with_tool_calls(text.as_deref(), calls),
             })
         }
+        // Qwen3 convention: tool results live in a *user* turn
+        // wrapped in `<tool_response>…</tool_response>`. The model
+        // wasn't trained on a separate `role: "tool"`.
         (
             Role::Tool,
             MessageContent::ToolResult {
-                tool_call_id,
+                tool_call_id: _,
                 content,
             },
         ) => json!({
-            "role": "tool",
-            "tool_call_id": tool_call_id,
-            "content": content,
+            "role": "user",
+            "content": crate::qwen3::render_tool_response(content),
         }),
-        // Mismatched (role, content) combinations shouldn't happen
-        // — the agent constructs them in pairs. If they do, degrade
-        // gracefully to a plain text turn so the request still goes
-        // out rather than crashing the conversation.
         (role, content) => {
             tracing::warn!(
                 ?role,
@@ -562,17 +623,25 @@ where
 {
     async_stream::stream! {
         // Track which (index) tool calls we've already announced. The
-        // OpenAI stream emits the id and name only on the first delta
-        // for each tool call; later deltas just carry argument bytes.
+        // For structured OpenAI tool calls (the canonical wire
+        // format) we still want to dedupe ToolCallStart events per
+        // index — only the first chunk for a given index carries the
+        // id and name. This stays alongside the qwen3 text-stream
+        // parser below; backends that *do* emit structured
+        // tool_calls (a future strict-OpenAI endpoint) just keep
+        // working without going through the Qwen3 path.
         let mut announced: std::collections::HashSet<usize> = Default::default();
 
+        // Qwen3 wire path: tool calls come through `delta.content` as
+        // literal `<tool_call>{…}</tool_call>` blocks. The parser
+        // splits content into plain-text passthrough and
+        // structured tool-call events, holding back only the suffix
+        // bytes that could be the start of a marker.
+        let mut qwen_parser = crate::qwen3::ToolCallParser::new();
+
         let mut sse = Box::pin(sse);
         loop {
             tokio::select! {
-                // `biased;` checks `cancel.cancelled()` first on every
-                // poll — without it, a pre-cancelled token loses to a
-                // ready SSE chunk, and a mid-stream cancellation could
-                // still consume one more chunk before noticing.
                 biased;
                 _ = cancel.cancelled() => {
                     tracing::debug!("openai_chat: cancellation requested, ending stream");
@@ -606,13 +675,43 @@ where
                         if let Some(text) = choice.delta.content
                             && !text.is_empty()
                         {
-                            yield Ok(CompletionEvent::TextDelta(text));
+                            for ev in qwen_parser.feed(&text) {
+                                match ev {
+                                    crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
+                                        yield Ok(CompletionEvent::TextDelta(t));
+                                    }
+                                    crate::qwen3::ParserEvent::Text(_) => {}
+                                    crate::qwen3::ParserEvent::Start { index, name } => {
+                                        yield Ok(CompletionEvent::ToolCallStart {
+                                            index,
+                                            id: format!("call_{index}"),
+                                            name,
+                                        });
+                                    }
+                                    crate::qwen3::ParserEvent::Args { index, args_json } => {
+                                        yield Ok(CompletionEvent::ToolCallArgsDelta {
+                                            index,
+                                            args_delta: args_json,
+                                        });
+                                    }
+                                    crate::qwen3::ParserEvent::Malformed { raw } => {
+                                        tracing::warn!(raw = %raw, "qwen3: malformed <tool_call> block; passing through as text");
+                                        yield Ok(CompletionEvent::TextDelta(format!(
+                                            "<tool_call>{raw}</tool_call>"
+                                        )));
+                                    }
+                                }
+                            }
                         }
                         if let Some(reasoning) = choice.delta.reasoning_content
                             && !reasoning.is_empty()
                         {
                             yield Ok(CompletionEvent::ReasoningDelta(reasoning));
                         }
+                        // Pass-through for backends that *do* emit
+                        // structured tool_calls (a future strict
+                        // OpenAI endpoint). Today cortex never
+                        // populates this, so this branch stays cold.
                         for tc in choice.delta.tool_calls {
                             let idx = tc.index;
                             if announced.insert(idx) {
@@ -639,6 +738,36 @@ where
                             }
                         }
                         if let Some(reason) = choice.finish_reason {
+                            // Flush any tail bytes from the qwen
+                            // parser before announcing the finish so
+                            // the agent's stop-reason logic sees the
+                            // complete picture (in particular, any
+                            // trailing <tool_call> block that
+                            // arrived without a close tag).
+                            for ev in qwen_parser.finish() {
+                                match ev {
+                                    crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => {
+                                        yield Ok(CompletionEvent::TextDelta(t));
+                                    }
+                                    crate::qwen3::ParserEvent::Text(_) => {}
+                                    crate::qwen3::ParserEvent::Start { index, name } => {
+                                        yield Ok(CompletionEvent::ToolCallStart {
+                                            index,
+                                            id: format!("call_{index}"),
+                                            name,
+                                        });
+                                    }
+                                    crate::qwen3::ParserEvent::Args { index, args_json } => {
+                                        yield Ok(CompletionEvent::ToolCallArgsDelta {
+                                            index,
+                                            args_delta: args_json,
+                                        });
+                                    }
+                                    crate::qwen3::ParserEvent::Malformed { raw } => {
+                                        tracing::warn!(raw = %raw, "qwen3: unterminated <tool_call> at stream end");
+                                    }
+                                }
+                            }
                             yield Ok(CompletionEvent::Finish { reason: Some(reason) });
                         }
                     }
diff --git a/crates/helexa-acp/src/qwen3.rs b/crates/helexa-acp/src/qwen3.rs
new file mode 100644
index 0000000..474e8e7
--- /dev/null
+++ b/crates/helexa-acp/src/qwen3.rs
@@ -0,0 +1,622 @@
+//! Qwen3 tool-call wire conventions.
+//!
+//! Qwen3 (and the Hermes-derived chat templates it inherits) wires tool
+//! use entirely through the prompt and the model's text output —
+//! nothing on the server cares about the OpenAI `tools` API field.
+//! This module owns both sides of that convention so the rest of
+//! helexa-acp can stay generic.
+//!
+//! **System prompt** — a `# Tools` block is appended to the system
+//! message describing every available function. Models trained on
+//! this template recognise it and emit calls as
+//! `<tool_call>{"name":"…","arguments":{…}}</tool_call>` inside the
+//! normal content stream.
+//!
+//! **Streaming parse** — [`ToolCallParser`] is a small state machine
+//! fed SSE content chunks. It emits a sequence of
+//! [`ParserEvent`]s — plain text outside tool calls; `Start` + `Args`
+//! events for each `<tool_call>` block. Marker detection is split-safe:
+//! a chunk that ends with `<tool` is buffered until the next chunk
+//! arrives, so even a one-byte-at-a-time stream produces the same
+//! events as a single-buffer reparse would.
+//!
+//! **Multi-turn replay** — when helexa-acp re-sends the conversation
+//! after a tool dispatch, the assistant turn that called the tool and
+//! the tool result need to go back to the model in Qwen3 wire shape:
+//! the assistant turn carries `<tool_call>` blocks inline in its
+//! content, and the tool result rides in a user turn wrapped in
+//! `<tool_response>…</tool_response>`. [`render_assistant_with_tool_calls`]
+//! and [`render_tool_response`] handle those.
+
+use serde_json::json;
+
+use crate::provider::{ToolCall, ToolSpec};
+
+/// One opening marker. Length 11.
+const TOOL_CALL_OPEN: &str = "<tool_call>";
+/// One closing marker. Length 12.
+const TOOL_CALL_CLOSE: &str = "</tool_call>";
+
+// ── System-prompt-side rendering ────────────────────────────────────
+
+/// Append-this-to-the-system-prompt block describing the available
+/// tools in Qwen3's expected format. Returns the empty string if
+/// `tools` is empty (no separator, no `# Tools` header — keeps the
+/// prompt clean when tools are absent for any reason).
+pub fn render_tool_block(tools: &[ToolSpec]) -> String {
+    if tools.is_empty() {
+        return String::new();
+    }
+    let mut out = String::new();
+    out.push_str("\n\n# Tools\n\n");
+    out.push_str(
+        "You may call one or more functions to assist with the user query.\n\n\
+         You are provided with function signatures within <tools></tools> XML tags:\n",
+    );
+    out.push_str("<tools>\n");
+    for spec in tools {
+        // Each entry is one JSON object on its own line — newline-
+        // delimited, no commas between entries. This is the format
+        // Qwen3's training tokenisation expects.
+        let entry = json!({
+            "type": "function",
+            "function": {
+                "name": spec.name,
+                "description": spec.description,
+                "parameters": spec.parameters,
+            }
+        });
+        out.push_str(&serde_json::to_string(&entry).unwrap_or_default());
+        out.push('\n');
+    }
+    out.push_str("</tools>\n\n");
+    out.push_str(
+        "For each function call, return a json object with function name \
+         and arguments within <tool_call></tool_call> XML tags:\n\
+         <tool_call>\n\
+         {\"name\": <function-name>, \"arguments\": <args-json-object>}\n\
+         </tool_call>",
+    );
+    out
+}
+
+// ── Multi-turn replay rendering ─────────────────────────────────────
+
+/// Build the assistant-turn content the model expects when we replay
+/// a turn that included tool calls. Format: any visible text first,
+/// then one `<tool_call>{json}</tool_call>` block per call, joined by
+/// newlines.
+pub fn render_assistant_with_tool_calls(text: Option<&str>, calls: &[ToolCall]) -> String {
+    let mut out = String::new();
+    if let Some(t) = text
+        && !t.is_empty()
+    {
+        out.push_str(t);
+        if !calls.is_empty() {
+            out.push('\n');
+        }
+    }
+    for (i, call) in calls.iter().enumerate() {
+        if i > 0 {
+            out.push('\n');
+        }
+        // The arguments field on a `ToolCall` is a JSON-encoded
+        // string; we want it inlined as an object inside the
+        // tool_call body. Best-effort parse; if it isn't valid JSON,
+        // pass the raw string through wrapped in quotes so the
+        // emission stays well-formed.
+        let args_value: serde_json::Value = serde_json::from_str(&call.arguments)
+            .unwrap_or_else(|_| serde_json::Value::String(call.arguments.clone()));
+        let body = json!({ "name": call.name, "arguments": args_value });
+        out.push_str(TOOL_CALL_OPEN);
+        out.push('\n');
+        out.push_str(&serde_json::to_string(&body).unwrap_or_default());
+        out.push('\n');
+        out.push_str(TOOL_CALL_CLOSE);
+    }
+    out
+}
+
+/// Wrap a tool-result string in the Qwen3 `<tool_response>` block
+/// that goes inside a `user` role message on the next turn.
+pub fn render_tool_response(content: &str) -> String {
+    format!("<tool_response>\n{content}\n</tool_response>")
+}
+
+// ── Streaming parser ────────────────────────────────────────────────
+
+/// Events produced by [`ToolCallParser`]. Distinct from the
+/// `CompletionEvent` enum because the parser is provider-agnostic —
+/// the caller decides how to translate these into
+/// `CompletionEvent::ToolCall*` and `TextDelta`.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ParserEvent {
+    /// Plain text that lives outside any tool_call block.
+    Text(String),
+    /// Beginning of a tool call. The index increments per call within
+    /// the same parser lifetime.
+    Start { index: usize, name: String },
+    /// JSON-encoded arguments for the most recent `Start`. Always
+    /// follows its `Start` immediately; never split across multiple
+    /// `Args` events for a single call (the parser buffers the whole
+    /// `<tool_call>` body before emitting).
+    Args { index: usize, args_json: String },
+    /// Parser encountered a malformed `<tool_call>` body. Emitted so
+    /// the agent can log and continue rather than crashing the
+    /// conversation.
+    Malformed { raw: String },
+}
+
+/// Streaming parser for Qwen3 tool calls embedded in the model's text
+/// output. Feed it chunks via [`feed`](Self::feed); call
+/// [`finish`](Self::finish) at end-of-stream to drain any trailing
+/// buffered bytes.
+///
+/// Design notes:
+///
+/// - Markers (`<tool_call>` / `</tool_call>`) can be split across
+///   chunks at any byte. The parser holds back exactly as much suffix
+///   as could be the start of the marker it's currently looking for,
+///   and no more.
+/// - JSON inside a tool_call is held in a separate buffer until the
+///   closing marker arrives. We don't try to stream-parse JSON; the
+///   bodies are tiny (one function call) and assembling first
+///   yields a much simpler implementation.
+/// - Index is monotonic across the parser's lifetime — one
+///   conversation turn can contain multiple `<tool_call>` blocks and
+///   each gets its own index.
+#[derive(Debug, Default)]
+pub struct ToolCallParser {
+    /// Unprocessed input bytes carried over between feeds.
+    buffer: String,
+    /// True while we're between `<tool_call>` and `</tool_call>`.
+    in_tool_call: bool,
+    /// Bytes accumulated inside the current `<tool_call>` block.
+    tool_call_buf: String,
+    /// Next tool-call index to assign.
+    next_index: usize,
+}
+
+impl ToolCallParser {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn feed(&mut self, chunk: &str) -> Vec<ParserEvent> {
+        self.buffer.push_str(chunk);
+        self.drain()
+    }
+
+    /// End-of-stream: emit anything still in the buffers. An
+    /// unterminated tool_call is reported as `Malformed` so the
+    /// caller can decide what to surface to the user.
+    pub fn finish(&mut self) -> Vec<ParserEvent> {
+        let mut events = self.drain();
+        if self.in_tool_call {
+            let raw = std::mem::take(&mut self.tool_call_buf) + &std::mem::take(&mut self.buffer);
+            events.push(ParserEvent::Malformed { raw });
+            self.in_tool_call = false;
+        } else if !self.buffer.is_empty() {
+            events.push(ParserEvent::Text(std::mem::take(&mut self.buffer)));
+        }
+        events
+    }
+
+    fn drain(&mut self) -> Vec<ParserEvent> {
+        let mut events = Vec::new();
+        loop {
+            if self.in_tool_call {
+                if let Some(end) = self.buffer.find(TOOL_CALL_CLOSE) {
+                    let body = &self.buffer[..end];
+                    self.tool_call_buf.push_str(body);
+                    self.buffer.drain(..end + TOOL_CALL_CLOSE.len());
+                    self.emit_completed_tool_call(&mut events);
+                    self.in_tool_call = false;
+                } else {
+                    // Hold back exactly the suffix that could be the
+                    // start of `</tool_call>`. Everything before it
+                    // is safely part of the call body.
+                    let hold = longest_marker_prefix_suffix(&self.buffer, TOOL_CALL_CLOSE);
+                    let safe = self.buffer.len() - hold;
+                    if safe > 0 {
+                        self.tool_call_buf.push_str(&self.buffer[..safe]);
+                        self.buffer.drain(..safe);
+                    }
+                    return events;
+                }
+            } else if let Some(start) = self.buffer.find(TOOL_CALL_OPEN) {
+                let text = &self.buffer[..start];
+                if !text.is_empty() {
+                    events.push(ParserEvent::Text(text.to_string()));
+                }
+                self.buffer.drain(..start + TOOL_CALL_OPEN.len());
+                self.in_tool_call = true;
+            } else {
+                let hold = longest_marker_prefix_suffix(&self.buffer, TOOL_CALL_OPEN);
+                let safe = self.buffer.len() - hold;
+                if safe > 0 {
+                    let text: String = self.buffer.drain(..safe).collect();
+                    events.push(ParserEvent::Text(text));
+                }
+                return events;
+            }
+        }
+    }
+
+    fn emit_completed_tool_call(&mut self, events: &mut Vec<ParserEvent>) {
+        let body = std::mem::take(&mut self.tool_call_buf);
+        let trimmed = body.trim();
+        let parsed: Result<ToolCallBody, _> = serde_json::from_str(trimmed);
+        match parsed {
+            Ok(call) => {
+                let index = self.next_index;
+                self.next_index += 1;
+                let name = call.name;
+                let args_json =
+                    serde_json::to_string(&call.arguments).unwrap_or_else(|_| "{}".to_string());
+                events.push(ParserEvent::Start { index, name });
+                events.push(ParserEvent::Args { index, args_json });
+            }
+            Err(_) => {
+                events.push(ParserEvent::Malformed { raw: body });
+            }
+        }
+    }
+}
+
+/// Returns the length of the longest suffix of `haystack` that is a
+/// proper prefix of `needle`. Used to decide how many trailing bytes
+/// to hold back when scanning for `needle`: anything that could
+/// possibly be the start of `needle` is held; everything else is
+/// safe to emit.
+fn longest_marker_prefix_suffix(haystack: &str, needle: &str) -> usize {
+    // Try prefixes of needle from longest to shortest; the first one
+    // that matches as a suffix of haystack wins. O(|needle|^2) which
+    // is fine — both markers are < 20 chars.
+    let max = needle.len().min(haystack.len());
+    for n in (1..=max).rev() {
+        if !haystack.is_char_boundary(haystack.len() - n) || !needle.is_char_boundary(n) {
+            continue;
+        }
+        if haystack.ends_with(&needle[..n]) {
+            return n;
+        }
+    }
+    0
+}
+
+#[derive(Debug, serde::Deserialize)]
+struct ToolCallBody {
+    name: String,
+    // The model is supposed to emit a JSON object here; in practice
+    // some Qwen3 variants stringify it. Deserialize-as-value handles
+    // both.
+    #[serde(default)]
+    arguments: serde_json::Value,
+}
+
+// ── Tests ───────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    fn tool(name: &str) -> ToolSpec {
+        ToolSpec {
+            name: name.to_string(),
+            description: format!("desc of {name}"),
+            parameters: json!({
+                "type": "object",
+                "properties": {"path": {"type": "string"}},
+                "required": ["path"]
+            }),
+        }
+    }
+
+    // ── render_tool_block ───────────────────────────────────────────
+
+    #[test]
+    fn empty_tools_renders_empty() {
+        assert_eq!(render_tool_block(&[]), "");
+    }
+
+    #[test]
+    fn tool_block_contains_hermes_markers_and_each_function() {
+        let block = render_tool_block(&[tool("read_file"), tool("write_file")]);
+        assert!(block.contains("# Tools"));
+        assert!(block.contains("<tools>"));
+        assert!(block.contains("</tools>"));
+        assert!(block.contains("\"name\":\"read_file\""));
+        assert!(block.contains("\"name\":\"write_file\""));
+        assert!(block.contains("<tool_call>"));
+        assert!(block.contains("</tool_call>"));
+    }
+
+    // ── render_assistant_with_tool_calls ────────────────────────────
+
+    #[test]
+    fn renders_pure_text_when_no_calls() {
+        let out = render_assistant_with_tool_calls(Some("hi"), &[]);
+        assert_eq!(out, "hi");
+    }
+
+    #[test]
+    fn renders_text_then_tool_call_block() {
+        let calls = vec![ToolCall {
+            id: "call_0".into(),
+            name: "read_file".into(),
+            arguments: r#"{"path":"/etc/hostname"}"#.into(),
+        }];
+        let out = render_assistant_with_tool_calls(Some("reading"), &calls);
+        assert!(out.starts_with("reading\n<tool_call>"));
+        assert!(out.contains(r#""name":"read_file""#));
+        assert!(out.contains(r#""path":"/etc/hostname""#));
+        assert!(out.ends_with("</tool_call>"));
+    }
+
+    #[test]
+    fn multiple_calls_separated_by_newlines() {
+        let calls = vec![
+            ToolCall {
+                id: "call_0".into(),
+                name: "a".into(),
+                arguments: "{}".into(),
+            },
+            ToolCall {
+                id: "call_1".into(),
+                name: "b".into(),
+                arguments: "{}".into(),
+            },
+        ];
+        let out = render_assistant_with_tool_calls(None, &calls);
+        assert_eq!(out.matches("<tool_call>").count(), 2);
+        assert_eq!(out.matches("</tool_call>").count(), 2);
+    }
+
+    #[test]
+    fn invalid_arguments_json_is_wrapped_as_string() {
+        let calls = vec![ToolCall {
+            id: "call_0".into(),
+            name: "x".into(),
+            arguments: "not even json".into(),
+        }];
+        let out = render_assistant_with_tool_calls(None, &calls);
+        // Wrapped as JSON string rather than breaking the envelope.
+        assert!(out.contains(r#""arguments":"not even json""#));
+    }
+
+    // ── render_tool_response ────────────────────────────────────────
+
+    #[test]
+    fn tool_response_wraps_content() {
+        let out = render_tool_response("hello world");
+        assert_eq!(out, "<tool_response>\nhello world\n</tool_response>");
+    }
+
+    // ── longest_marker_prefix_suffix ────────────────────────────────
+
+    #[test]
+    fn marker_prefix_suffix_returns_longest_match() {
+        assert_eq!(longest_marker_prefix_suffix("foo<tool", "<tool_call>"), 5);
+        assert_eq!(longest_marker_prefix_suffix("foo<", "<tool_call>"), 1);
+        assert_eq!(longest_marker_prefix_suffix("foo<bar", "<tool_call>"), 0);
+        assert_eq!(longest_marker_prefix_suffix("foo", "<tool_call>"), 0);
+        assert_eq!(longest_marker_prefix_suffix("", "<tool_call>"), 0);
+        // Exact prefix length matches.
+        assert_eq!(
+            longest_marker_prefix_suffix("foo<tool_call", "<tool_call>"),
+            10
+        );
+    }
+
+    // ── ToolCallParser ──────────────────────────────────────────────
+
+    fn drive(parser: &mut ToolCallParser, chunks: &[&str]) -> Vec<ParserEvent> {
+        let mut events = Vec::new();
+        for c in chunks {
+            events.extend(parser.feed(c));
+        }
+        events.extend(parser.finish());
+        events
+    }
+
+    #[test]
+    fn plain_text_passes_through() {
+        let mut p = ToolCallParser::new();
+        let events = drive(&mut p, &["hello ", "world"]);
+        assert_eq!(events.len(), 2);
+        assert_eq!(events[0], ParserEvent::Text("hello ".to_string()));
+        assert_eq!(events[1], ParserEvent::Text("world".to_string()));
+    }
+
+    #[test]
+    fn single_complete_tool_call() {
+        let mut p = ToolCallParser::new();
+        let input =
+            r#"before <tool_call>{"name":"read_file","arguments":{"path":"/x"}}</tool_call> after"#;
+        let events = drive(&mut p, &[input]);
+        // "before " (text) → Start → Args → " after" (text)
+        assert_eq!(events[0], ParserEvent::Text("before ".to_string()));
+        assert!(matches!(
+            &events[1],
+            ParserEvent::Start { index: 0, name } if name == "read_file"
+        ));
+        assert!(matches!(
+            &events[2],
+            ParserEvent::Args { index: 0, args_json } if args_json.contains(r#""path":"/x""#)
+        ));
+        assert_eq!(events[3], ParserEvent::Text(" after".to_string()));
+    }
+
+    #[test]
+    fn open_marker_split_across_chunks_is_buffered() {
+        // The chunk boundary lands inside the opening marker.
+        let chunks = [
+            "before <",
+            "tool_call>",
+            r#"{"name":"a","arguments":{}}"#,
+            "</tool_call> after",
+        ];
+        let mut p = ToolCallParser::new();
+        let events = drive(&mut p, &chunks);
+        // Despite the split, we get exactly:
+        // - "before " as text (the "<" suffix was held)
+        // - Start { name: "a" }
+        // - Args
+        // - " after"
+        let texts: Vec<&str> = events
+            .iter()
+            .filter_map(|e| match e {
+                ParserEvent::Text(t) => Some(t.as_str()),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(texts.join(""), "before  after");
+        assert!(
+            events
+                .iter()
+                .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "a"))
+        );
+        assert!(events.iter().any(|e| matches!(e, ParserEvent::Args { .. })));
+    }
+
+    #[test]
+    fn close_marker_split_across_chunks() {
+        let chunks = [
+            r#"<tool_call>{"name":"a","arguments":{}}<"#,
+            "/tool_",
+            "call>tail",
+        ];
+        let mut p = ToolCallParser::new();
+        let events = drive(&mut p, &chunks);
+        // Tail should arrive as text after the call is fully parsed.
+        assert!(
+            events
+                .iter()
+                .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "a"))
+        );
+        let last_text = events.iter().rev().find_map(|e| match e {
+            ParserEvent::Text(t) => Some(t.as_str()),
+            _ => None,
+        });
+        assert_eq!(last_text, Some("tail"));
+    }
+
+    #[test]
+    fn one_byte_at_a_time_produces_same_events_as_one_chunk() {
+        let input = r#"a<tool_call>{"name":"f","arguments":{"k":1}}</tool_call>b"#;
+
+        let mut single = ToolCallParser::new();
+        let single_events = drive(&mut single, &[input]);
+
+        let chunks: Vec<String> = input.chars().map(|c| c.to_string()).collect();
+        let chunk_refs: Vec<&str> = chunks.iter().map(|s| s.as_str()).collect();
+        let mut byte = ToolCallParser::new();
+        let byte_events = drive(&mut byte, &chunk_refs);
+
+        // Concatenated text equals on both paths.
+        let text = |evs: &[ParserEvent]| -> String {
+            evs.iter()
+                .filter_map(|e| match e {
+                    ParserEvent::Text(t) => Some(t.as_str()),
+                    _ => None,
+                })
+                .collect()
+        };
+        assert_eq!(text(&single_events), text(&byte_events));
+        // Both paths see exactly one Start and one Args, with the
+        // same name and arguments payload.
+        let starts: Vec<&str> = byte_events
+            .iter()
+            .filter_map(|e| match e {
+                ParserEvent::Start { name, .. } => Some(name.as_str()),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(starts, vec!["f"]);
+        let args: Vec<&str> = byte_events
+            .iter()
+            .filter_map(|e| match e {
+                ParserEvent::Args { args_json, .. } => Some(args_json.as_str()),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(args.len(), 1);
+        assert!(args[0].contains(r#""k":1"#));
+    }
+
+    #[test]
+    fn multiple_tool_calls_get_distinct_indices() {
+        let input = concat!(
+            "lead ",
+            r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#,
+            " mid ",
+            r#"<tool_call>{"name":"b","arguments":{}}</tool_call>"#,
+            " tail",
+        );
+        let mut p = ToolCallParser::new();
+        let events = drive(&mut p, &[input]);
+        let starts: Vec<(usize, String)> = events
+            .iter()
+            .filter_map(|e| match e {
+                ParserEvent::Start { index, name } => Some((*index, name.clone())),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(starts, vec![(0, "a".into()), (1, "b".into())]);
+    }
+
+    #[test]
+    fn malformed_tool_call_does_not_crash() {
+        let mut p = ToolCallParser::new();
+        let events = drive(&mut p, &[r#"x<tool_call>not valid json</tool_call>y"#]);
+        assert!(
+            events
+                .iter()
+                .any(|e| matches!(e, ParserEvent::Malformed { .. }))
+        );
+        // Bracketing text still flows.
+        assert!(
+            events
+                .iter()
+                .any(|e| matches!(e, ParserEvent::Text(t) if t == "x"))
+        );
+        assert!(
+            events
+                .iter()
+                .any(|e| matches!(e, ParserEvent::Text(t) if t == "y"))
+        );
+    }
+
+    #[test]
+    fn unterminated_tool_call_is_reported_on_finish() {
+        let mut p = ToolCallParser::new();
+        let events = drive(&mut p, &[r#"x<tool_call>{"name":"a""#]);
+        assert!(
+            events
+                .iter()
+                .any(|e| matches!(e, ParserEvent::Malformed { .. }))
+        );
+    }
+
+    #[test]
+    fn quoted_lt_inside_args_does_not_trigger_marker() {
+        // Sanity: a string value that happens to contain "<tool" is
+        // not a marker. (Our marker search is on the literal byte
+        // sequence "<tool_call>" / "</tool_call>", so this would
+        // only break if a literal "</tool_call>" appeared in args
+        // — which the model has no reason to emit.)
+        let input = r#"<tool_call>{"name":"f","arguments":{"q":"why <tool emit?"}}</tool_call>"#;
+        let mut p = ToolCallParser::new();
+        let events = drive(&mut p, &[input]);
+        let starts: Vec<&str> = events
+            .iter()
+            .filter_map(|e| match e {
+                ParserEvent::Start { name, .. } => Some(name.as_str()),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(starts, vec!["f"]);
+    }
+}