diff --git a/crates/helexa-acp/src/agent.rs b/crates/helexa-acp/src/agent.rs index 4e7b970..189a697 100644 --- a/crates/helexa-acp/src/agent.rs +++ b/crates/helexa-acp/src/agent.rs @@ -322,7 +322,8 @@ async fn drive_prompt( ) }; - let system_prompt = build_system_prompt(&cwd, inner.system_prompt_path.as_deref()) + let tool_specs = tools::all_tools(); + let system_prompt = build_system_prompt(&cwd, inner.system_prompt_path.as_deref(), &tool_specs) .map_err(|e| anyhow::anyhow!("build system prompt: {e:#}"))?; let (provider, local_model) = @@ -361,7 +362,6 @@ async fn drive_prompt( // future prompts see them. let mut new_turns: Vec = Vec::new(); - let tool_specs = tools::all_tools(); let mut stop_reason = StopReason::EndTurn; for round in 0..MAX_TOOL_ROUNDS { @@ -370,10 +370,15 @@ async fn drive_prompt( break; } + // Tool descriptions reach the model via the Qwen3 `# Tools` + // block in the system prompt, not via the OpenAI `tools` + // request field — cortex/neuron pass that field through to + // the encoder unread, and including it would double-describe + // tools once a strict-OpenAI backend lands. Leave empty. let completion_req = CompletionRequest { model: local_model.clone(), messages: messages.clone(), - tools: tool_specs.clone(), + tools: vec![], temperature: None, top_p: None, max_tokens: None, diff --git a/crates/helexa-acp/src/main.rs b/crates/helexa-acp/src/main.rs index 9ca9523..c819a58 100644 --- a/crates/helexa-acp/src/main.rs +++ b/crates/helexa-acp/src/main.rs @@ -19,6 +19,7 @@ mod agent; mod config; mod prompt; mod provider; +mod qwen3; mod session; mod tool_runner; mod tools; diff --git a/crates/helexa-acp/src/prompt.rs b/crates/helexa-acp/src/prompt.rs index 2613366..4b9b1b3 100644 --- a/crates/helexa-acp/src/prompt.rs +++ b/crates/helexa-acp/src/prompt.rs @@ -1,53 +1,59 @@ //! System prompt assembly. //! -//! The built-in prompt tells the model the working directory and -//! enumerates the tools it actually has — without this, models trained -//! to "be safe when you don't know your environment" tend to refuse -//! tool use and ask the user to paste content instead. Override with -//! `HELEXA_ACP_SYSTEM_PROMPT_PATH` (env) or `system_prompt_path` -//! (TOML); the literal token `{cwd}` in a user-supplied file is -//! substituted with the session's working directory. +//! The system message has two parts: +//! +//! 1. A short human-readable preamble (working directory, style +//! instructions). Either the built-in [`DEFAULT_PROMPT`] or a +//! user-supplied file at `HELEXA_ACP_SYSTEM_PROMPT_PATH` / +//! `system_prompt_path`. `{cwd}` is substituted in both. +//! 2. A `# Tools` block in Qwen3 Hermes format (see [`crate::qwen3`]) +//! describing the available functions. This is what makes the +//! model actually call them — neuron/cortex don't honour the +//! OpenAI `tools` API field, so the tool list has to live in the +//! prompt itself. use anyhow::Context; use std::path::Path; +use crate::provider::ToolSpec; +use crate::qwen3; + const DEFAULT_PROMPT: &str = "\ You are helexa-acp, a coding assistant working inside an editor. Working directory: {cwd} -You have the following tools. Call them whenever the user's request -involves looking at or modifying files, or running commands — do not -ask the user to paste file contents you could read yourself. - -- read_file(path, line?, limit?) — Read a text file's contents. -- write_file(path, content) — Create or overwrite a file. -- edit_file(path, old_text, new_text) — Replace one unique substring - in a file. Fails if old_text is not unique; call multiple times for - multiple edits. -- list_dir(path) — List a directory's entries. -- bash(command, cwd?) — Run a shell command via `sh -c`. Returns - combined stdout+stderr and the exit status. - -All file paths must be absolute. Writes and shell commands may -prompt the user for permission depending on the session mode. +Use the tools described below whenever the user's request involves +looking at or modifying files, or running commands. Do not ask the +user to paste file contents you could read yourself. All file paths +must be absolute. Writes and shell commands may prompt the user for +permission depending on the session mode. Be concise; the user is reading your output in an editor pane."; /// Build the system prompt for a session. /// -/// `cwd` is the session's working directory (substituted for `{cwd}` -/// in both the default prompt and any user-supplied template). -/// `override_path` is the user's `system_prompt_path` (TOML) or -/// `HELEXA_ACP_SYSTEM_PROMPT_PATH` (env) value, already resolved by -/// [`crate::config::Config`]. -pub fn build_system_prompt(cwd: &Path, override_path: Option<&Path>) -> anyhow::Result { +/// - `cwd`: session working directory (substituted for `{cwd}` in +/// the preamble — both the default and any user-supplied template). +/// - `override_path`: path to a user-supplied template, already +/// resolved by [`crate::config::Config`]. The `# Tools` block is +/// appended *after* the user's template so a custom preamble +/// still gets the tool descriptions the model needs. +/// - `tools`: the tools to advertise. Empty list → no `# Tools` +/// block is appended at all. +pub fn build_system_prompt( + cwd: &Path, + override_path: Option<&Path>, + tools: &[ToolSpec], +) -> anyhow::Result { let template = match override_path { Some(path) => std::fs::read_to_string(path) .with_context(|| format!("read system prompt from {}", path.display()))?, None => DEFAULT_PROMPT.to_string(), }; - Ok(template.replace("{cwd}", &cwd.display().to_string())) + let mut prompt = template.replace("{cwd}", &cwd.display().to_string()); + prompt.push_str(&qwen3::render_tool_block(tools)); + Ok(prompt) } #[cfg(test)] @@ -57,7 +63,7 @@ mod tests { #[test] fn default_prompt_substitutes_cwd() { - let prompt = build_system_prompt(Path::new("/home/me/proj"), None).unwrap(); + let prompt = build_system_prompt(Path::new("/home/me/proj"), None, &[]).unwrap(); assert!( prompt.contains("/home/me/proj"), "cwd not interpolated: {prompt}" @@ -67,6 +73,22 @@ mod tests { !prompt.contains("{cwd}"), "left-over placeholder in default prompt" ); + // With no tools, the # Tools block is absent. + assert!(!prompt.contains("# Tools")); + } + + #[test] + fn tools_are_appended_in_hermes_format() { + let spec = ToolSpec { + name: "read_file".into(), + description: "Read a file.".into(), + parameters: serde_json::json!({"type":"object","properties":{}, "required":[]}), + }; + let prompt = build_system_prompt(Path::new("/x"), None, &[spec]).unwrap(); + assert!(prompt.contains("# Tools")); + assert!(prompt.contains("")); + assert!(prompt.contains("\"name\":\"read_file\"")); + assert!(prompt.contains("")); } #[test] @@ -78,8 +100,8 @@ mod tests { let path = tmp.path().to_path_buf(); drop(tmp); - let prompt = - build_system_prompt(Path::new("/etc"), Some(path.as_path())).expect("read override"); + let prompt = build_system_prompt(Path::new("/etc"), Some(path.as_path()), &[]) + .expect("read override"); assert_eq!(prompt, "custom prompt for /etc only"); let _ = std::fs::remove_file(&path); @@ -90,6 +112,7 @@ mod tests { let err = build_system_prompt( Path::new("/tmp"), Some(Path::new("/definitely/not/a/real/path")), + &[], ) .unwrap_err(); assert!(format!("{err:#}").contains("read system prompt")); diff --git a/crates/helexa-acp/src/provider/mod.rs b/crates/helexa-acp/src/provider/mod.rs index 7aba5e6..3335058 100644 --- a/crates/helexa-acp/src/provider/mod.rs +++ b/crates/helexa-acp/src/provider/mod.rs @@ -115,7 +115,14 @@ pub enum MessageContent { #[derive(Debug, Clone)] pub struct ToolCall { - /// Provider-assigned id that ties the call to its result. + /// Provider-assigned id that ties the call to its result. The + /// Qwen3 wire format we use today doesn't carry this on the + /// model side (calls and results are matched positionally inside + /// a turn), so the field looks unused in the prod build — but it + /// flows through to `MessageContent::ToolResult.tool_call_id` for + /// history bookkeeping and a future strict-OpenAI backend will + /// consume it directly. + #[allow(dead_code)] pub id: String, pub name: String, /// JSON-encoded arguments. Kept as a string because providers diff --git a/crates/helexa-acp/src/provider/openai_chat.rs b/crates/helexa-acp/src/provider/openai_chat.rs index 0102f6e..77c33d2 100644 --- a/crates/helexa-acp/src/provider/openai_chat.rs +++ b/crates/helexa-acp/src/provider/openai_chat.rs @@ -219,19 +219,40 @@ mod tests { max_tokens: None, }; let body = encode_request(&req); - // Tool defs flow through: + // Tool defs flow through as a courtesy to any future + // strict-OpenAI backend; today's Qwen3 path puts them in + // the prompt instead. let tools = body["tools"].as_array().unwrap(); assert_eq!(tools[0]["function"]["name"], "read_file"); - // Assistant tool_calls flow through: + + // Qwen3 wire shape for the assistant turn: tool calls are + // inline in `content` as `{…}` blocks, + // *not* in a structured `tool_calls` field. let asst = &body["messages"][0]; assert_eq!(asst["role"], "assistant"); - assert_eq!(asst["tool_calls"][0]["id"], "call_1"); - assert_eq!(asst["tool_calls"][0]["function"]["name"], "read_file"); - // Tool result flows through: + assert!( + asst.get("tool_calls").is_none(), + "tool_calls should not be set" + ); + let content = asst["content"].as_str().expect("content is a string"); + assert!( + content.starts_with("calling read_file\n"), + "content was: {content}" + ); + assert!(content.contains(r#""name":"read_file""#)); + assert!(content.contains(r#""path":"/tmp/a.txt""#)); + assert!(content.ends_with("")); + + // Qwen3 wire shape for the tool result: a user-role turn + // wrapped in ``. No `role: "tool"`. let tool = &body["messages"][1]; - assert_eq!(tool["role"], "tool"); - assert_eq!(tool["tool_call_id"], "call_1"); - assert_eq!(tool["content"], "file contents"); + assert_eq!(tool["role"], "user"); + assert!(tool.get("tool_call_id").is_none()); + let tool_content = tool["content"].as_str().expect("content is a string"); + assert_eq!( + tool_content, + "\nfile contents\n" + ); } /// Build a fake eventsource stream from canned SSE `data:` lines. @@ -275,6 +296,56 @@ mod tests { assert_eq!(events.len(), 4); } + #[tokio::test] + async fn decodes_qwen3_inline_tool_call_from_content_stream() { + // Qwen3-shaped output: `{…}` inside + // ordinary `delta.content`, split across multiple chunks at + // arbitrary byte boundaries. + let sse = fake_sse(vec![ + r#"{"choices":[{"delta":{"content":"sure, let me read it.\n\n{\"name\":\"read_file\","}}]}"#, + r#"{"choices":[{"delta":{"content":"\"arguments\":{\"path\":\"/etc/hostname\"}}\n"}}]}"#, + r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#, + "[DONE]", + ]); + let events: Vec<_> = decode_stream(sse, CancellationToken::new()) + .collect::>() + .await + .into_iter() + .map(|r| r.unwrap()) + .collect(); + + // Concatenated text deltas should equal the leading prose + // (everything before ``). + let text: String = events + .iter() + .filter_map(|e| match e { + CompletionEvent::TextDelta(t) => Some(t.as_str()), + _ => None, + }) + .collect(); + assert_eq!(text, "sure, let me read it.\n"); + // Exactly one structured tool call. + assert!(matches!( + events.iter().find(|e| matches!(e, CompletionEvent::ToolCallStart { .. })), + Some(CompletionEvent::ToolCallStart { index: 0, name, .. }) if name == "read_file" + )); + let args: Vec<&str> = events + .iter() + .filter_map(|e| match e { + CompletionEvent::ToolCallArgsDelta { args_delta, .. } => Some(args_delta.as_str()), + _ => None, + }) + .collect(); + assert_eq!(args.len(), 1); + assert!(args[0].contains(r#""path":"/etc/hostname""#)); + // Finish reason still propagates. + assert!(matches!( + events.last(), + Some(CompletionEvent::Finish { reason }) if reason.as_deref() == Some("stop") + )); + } + #[tokio::test] async fn decodes_tool_call_progressively() { let sse = fake_sse(vec![ @@ -391,41 +462,31 @@ fn encode_message(m: &Message) -> Value { (Role::System, MessageContent::Text(s)) => json!({"role": "system", "content": s}), (Role::User, MessageContent::Text(s)) => json!({"role": "user", "content": s}), (Role::Assistant, MessageContent::Text(s)) => json!({"role": "assistant", "content": s}), + // Qwen3 wire shape: assistant turns that called tools come + // back to the model with `{…}` blocks + // inline in `content`, *not* via the structured `tool_calls` + // field. Using the OpenAI shape here would invisibly drop + // the tool calls from the model's context the next round, + // because neuron's chat template only renders `content`. (Role::Assistant, MessageContent::ToolCalls { text, calls }) => { - let calls_json: Vec = calls - .iter() - .map(|c| { - json!({ - "id": c.id, - "type": "function", - "function": { - "name": c.name, - "arguments": c.arguments, - } - }) - }) - .collect(); json!({ "role": "assistant", - "content": text.clone().unwrap_or_default(), - "tool_calls": calls_json, + "content": crate::qwen3::render_assistant_with_tool_calls(text.as_deref(), calls), }) } + // Qwen3 convention: tool results live in a *user* turn + // wrapped in ``. The model + // wasn't trained on a separate `role: "tool"`. ( Role::Tool, MessageContent::ToolResult { - tool_call_id, + tool_call_id: _, content, }, ) => json!({ - "role": "tool", - "tool_call_id": tool_call_id, - "content": content, + "role": "user", + "content": crate::qwen3::render_tool_response(content), }), - // Mismatched (role, content) combinations shouldn't happen - // — the agent constructs them in pairs. If they do, degrade - // gracefully to a plain text turn so the request still goes - // out rather than crashing the conversation. (role, content) => { tracing::warn!( ?role, @@ -562,17 +623,25 @@ where { async_stream::stream! { // Track which (index) tool calls we've already announced. The - // OpenAI stream emits the id and name only on the first delta - // for each tool call; later deltas just carry argument bytes. + // For structured OpenAI tool calls (the canonical wire + // format) we still want to dedupe ToolCallStart events per + // index — only the first chunk for a given index carries the + // id and name. This stays alongside the qwen3 text-stream + // parser below; backends that *do* emit structured + // tool_calls (a future strict-OpenAI endpoint) just keep + // working without going through the Qwen3 path. let mut announced: std::collections::HashSet = Default::default(); + // Qwen3 wire path: tool calls come through `delta.content` as + // literal `{…}` blocks. The parser + // splits content into plain-text passthrough and + // structured tool-call events, holding back only the suffix + // bytes that could be the start of a marker. + let mut qwen_parser = crate::qwen3::ToolCallParser::new(); + let mut sse = Box::pin(sse); loop { tokio::select! { - // `biased;` checks `cancel.cancelled()` first on every - // poll — without it, a pre-cancelled token loses to a - // ready SSE chunk, and a mid-stream cancellation could - // still consume one more chunk before noticing. biased; _ = cancel.cancelled() => { tracing::debug!("openai_chat: cancellation requested, ending stream"); @@ -606,13 +675,43 @@ where if let Some(text) = choice.delta.content && !text.is_empty() { - yield Ok(CompletionEvent::TextDelta(text)); + for ev in qwen_parser.feed(&text) { + match ev { + crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => { + yield Ok(CompletionEvent::TextDelta(t)); + } + crate::qwen3::ParserEvent::Text(_) => {} + crate::qwen3::ParserEvent::Start { index, name } => { + yield Ok(CompletionEvent::ToolCallStart { + index, + id: format!("call_{index}"), + name, + }); + } + crate::qwen3::ParserEvent::Args { index, args_json } => { + yield Ok(CompletionEvent::ToolCallArgsDelta { + index, + args_delta: args_json, + }); + } + crate::qwen3::ParserEvent::Malformed { raw } => { + tracing::warn!(raw = %raw, "qwen3: malformed block; passing through as text"); + yield Ok(CompletionEvent::TextDelta(format!( + "{raw}" + ))); + } + } + } } if let Some(reasoning) = choice.delta.reasoning_content && !reasoning.is_empty() { yield Ok(CompletionEvent::ReasoningDelta(reasoning)); } + // Pass-through for backends that *do* emit + // structured tool_calls (a future strict + // OpenAI endpoint). Today cortex never + // populates this, so this branch stays cold. for tc in choice.delta.tool_calls { let idx = tc.index; if announced.insert(idx) { @@ -639,6 +738,36 @@ where } } if let Some(reason) = choice.finish_reason { + // Flush any tail bytes from the qwen + // parser before announcing the finish so + // the agent's stop-reason logic sees the + // complete picture (in particular, any + // trailing block that + // arrived without a close tag). + for ev in qwen_parser.finish() { + match ev { + crate::qwen3::ParserEvent::Text(t) if !t.is_empty() => { + yield Ok(CompletionEvent::TextDelta(t)); + } + crate::qwen3::ParserEvent::Text(_) => {} + crate::qwen3::ParserEvent::Start { index, name } => { + yield Ok(CompletionEvent::ToolCallStart { + index, + id: format!("call_{index}"), + name, + }); + } + crate::qwen3::ParserEvent::Args { index, args_json } => { + yield Ok(CompletionEvent::ToolCallArgsDelta { + index, + args_delta: args_json, + }); + } + crate::qwen3::ParserEvent::Malformed { raw } => { + tracing::warn!(raw = %raw, "qwen3: unterminated at stream end"); + } + } + } yield Ok(CompletionEvent::Finish { reason: Some(reason) }); } } diff --git a/crates/helexa-acp/src/qwen3.rs b/crates/helexa-acp/src/qwen3.rs new file mode 100644 index 0000000..474e8e7 --- /dev/null +++ b/crates/helexa-acp/src/qwen3.rs @@ -0,0 +1,622 @@ +//! Qwen3 tool-call wire conventions. +//! +//! Qwen3 (and the Hermes-derived chat templates it inherits) wires tool +//! use entirely through the prompt and the model's text output — +//! nothing on the server cares about the OpenAI `tools` API field. +//! This module owns both sides of that convention so the rest of +//! helexa-acp can stay generic. +//! +//! **System prompt** — a `# Tools` block is appended to the system +//! message describing every available function. Models trained on +//! this template recognise it and emit calls as +//! `{"name":"…","arguments":{…}}` inside the +//! normal content stream. +//! +//! **Streaming parse** — [`ToolCallParser`] is a small state machine +//! fed SSE content chunks. It emits a sequence of +//! [`ParserEvent`]s — plain text outside tool calls; `Start` + `Args` +//! events for each `` block. Marker detection is split-safe: +//! a chunk that ends with `` blocks inline in its +//! content, and the tool result rides in a user turn wrapped in +//! ``. [`render_assistant_with_tool_calls`] +//! and [`render_tool_response`] handle those. + +use serde_json::json; + +use crate::provider::{ToolCall, ToolSpec}; + +/// One opening marker. Length 11. +const TOOL_CALL_OPEN: &str = ""; +/// One closing marker. Length 12. +const TOOL_CALL_CLOSE: &str = ""; + +// ── System-prompt-side rendering ──────────────────────────────────── + +/// Append-this-to-the-system-prompt block describing the available +/// tools in Qwen3's expected format. Returns the empty string if +/// `tools` is empty (no separator, no `# Tools` header — keeps the +/// prompt clean when tools are absent for any reason). +pub fn render_tool_block(tools: &[ToolSpec]) -> String { + if tools.is_empty() { + return String::new(); + } + let mut out = String::new(); + out.push_str("\n\n# Tools\n\n"); + out.push_str( + "You may call one or more functions to assist with the user query.\n\n\ + You are provided with function signatures within XML tags:\n", + ); + out.push_str("\n"); + for spec in tools { + // Each entry is one JSON object on its own line — newline- + // delimited, no commas between entries. This is the format + // Qwen3's training tokenisation expects. + let entry = json!({ + "type": "function", + "function": { + "name": spec.name, + "description": spec.description, + "parameters": spec.parameters, + } + }); + out.push_str(&serde_json::to_string(&entry).unwrap_or_default()); + out.push('\n'); + } + out.push_str("\n\n"); + out.push_str( + "For each function call, return a json object with function name \ + and arguments within XML tags:\n\ + \n\ + {\"name\": , \"arguments\": }\n\ + ", + ); + out +} + +// ── Multi-turn replay rendering ───────────────────────────────────── + +/// Build the assistant-turn content the model expects when we replay +/// a turn that included tool calls. Format: any visible text first, +/// then one `{json}` block per call, joined by +/// newlines. +pub fn render_assistant_with_tool_calls(text: Option<&str>, calls: &[ToolCall]) -> String { + let mut out = String::new(); + if let Some(t) = text + && !t.is_empty() + { + out.push_str(t); + if !calls.is_empty() { + out.push('\n'); + } + } + for (i, call) in calls.iter().enumerate() { + if i > 0 { + out.push('\n'); + } + // The arguments field on a `ToolCall` is a JSON-encoded + // string; we want it inlined as an object inside the + // tool_call body. Best-effort parse; if it isn't valid JSON, + // pass the raw string through wrapped in quotes so the + // emission stays well-formed. + let args_value: serde_json::Value = serde_json::from_str(&call.arguments) + .unwrap_or_else(|_| serde_json::Value::String(call.arguments.clone())); + let body = json!({ "name": call.name, "arguments": args_value }); + out.push_str(TOOL_CALL_OPEN); + out.push('\n'); + out.push_str(&serde_json::to_string(&body).unwrap_or_default()); + out.push('\n'); + out.push_str(TOOL_CALL_CLOSE); + } + out +} + +/// Wrap a tool-result string in the Qwen3 `` block +/// that goes inside a `user` role message on the next turn. +pub fn render_tool_response(content: &str) -> String { + format!("\n{content}\n") +} + +// ── Streaming parser ──────────────────────────────────────────────── + +/// Events produced by [`ToolCallParser`]. Distinct from the +/// `CompletionEvent` enum because the parser is provider-agnostic — +/// the caller decides how to translate these into +/// `CompletionEvent::ToolCall*` and `TextDelta`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ParserEvent { + /// Plain text that lives outside any tool_call block. + Text(String), + /// Beginning of a tool call. The index increments per call within + /// the same parser lifetime. + Start { index: usize, name: String }, + /// JSON-encoded arguments for the most recent `Start`. Always + /// follows its `Start` immediately; never split across multiple + /// `Args` events for a single call (the parser buffers the whole + /// `` body before emitting). + Args { index: usize, args_json: String }, + /// Parser encountered a malformed `` body. Emitted so + /// the agent can log and continue rather than crashing the + /// conversation. + Malformed { raw: String }, +} + +/// Streaming parser for Qwen3 tool calls embedded in the model's text +/// output. Feed it chunks via [`feed`](Self::feed); call +/// [`finish`](Self::finish) at end-of-stream to drain any trailing +/// buffered bytes. +/// +/// Design notes: +/// +/// - Markers (`` / ``) can be split across +/// chunks at any byte. The parser holds back exactly as much suffix +/// as could be the start of the marker it's currently looking for, +/// and no more. +/// - JSON inside a tool_call is held in a separate buffer until the +/// closing marker arrives. We don't try to stream-parse JSON; the +/// bodies are tiny (one function call) and assembling first +/// yields a much simpler implementation. +/// - Index is monotonic across the parser's lifetime — one +/// conversation turn can contain multiple `` blocks and +/// each gets its own index. +#[derive(Debug, Default)] +pub struct ToolCallParser { + /// Unprocessed input bytes carried over between feeds. + buffer: String, + /// True while we're between `` and ``. + in_tool_call: bool, + /// Bytes accumulated inside the current `` block. + tool_call_buf: String, + /// Next tool-call index to assign. + next_index: usize, +} + +impl ToolCallParser { + pub fn new() -> Self { + Self::default() + } + + pub fn feed(&mut self, chunk: &str) -> Vec { + self.buffer.push_str(chunk); + self.drain() + } + + /// End-of-stream: emit anything still in the buffers. An + /// unterminated tool_call is reported as `Malformed` so the + /// caller can decide what to surface to the user. + pub fn finish(&mut self) -> Vec { + let mut events = self.drain(); + if self.in_tool_call { + let raw = std::mem::take(&mut self.tool_call_buf) + &std::mem::take(&mut self.buffer); + events.push(ParserEvent::Malformed { raw }); + self.in_tool_call = false; + } else if !self.buffer.is_empty() { + events.push(ParserEvent::Text(std::mem::take(&mut self.buffer))); + } + events + } + + fn drain(&mut self) -> Vec { + let mut events = Vec::new(); + loop { + if self.in_tool_call { + if let Some(end) = self.buffer.find(TOOL_CALL_CLOSE) { + let body = &self.buffer[..end]; + self.tool_call_buf.push_str(body); + self.buffer.drain(..end + TOOL_CALL_CLOSE.len()); + self.emit_completed_tool_call(&mut events); + self.in_tool_call = false; + } else { + // Hold back exactly the suffix that could be the + // start of ``. Everything before it + // is safely part of the call body. + let hold = longest_marker_prefix_suffix(&self.buffer, TOOL_CALL_CLOSE); + let safe = self.buffer.len() - hold; + if safe > 0 { + self.tool_call_buf.push_str(&self.buffer[..safe]); + self.buffer.drain(..safe); + } + return events; + } + } else if let Some(start) = self.buffer.find(TOOL_CALL_OPEN) { + let text = &self.buffer[..start]; + if !text.is_empty() { + events.push(ParserEvent::Text(text.to_string())); + } + self.buffer.drain(..start + TOOL_CALL_OPEN.len()); + self.in_tool_call = true; + } else { + let hold = longest_marker_prefix_suffix(&self.buffer, TOOL_CALL_OPEN); + let safe = self.buffer.len() - hold; + if safe > 0 { + let text: String = self.buffer.drain(..safe).collect(); + events.push(ParserEvent::Text(text)); + } + return events; + } + } + } + + fn emit_completed_tool_call(&mut self, events: &mut Vec) { + let body = std::mem::take(&mut self.tool_call_buf); + let trimmed = body.trim(); + let parsed: Result = serde_json::from_str(trimmed); + match parsed { + Ok(call) => { + let index = self.next_index; + self.next_index += 1; + let name = call.name; + let args_json = + serde_json::to_string(&call.arguments).unwrap_or_else(|_| "{}".to_string()); + events.push(ParserEvent::Start { index, name }); + events.push(ParserEvent::Args { index, args_json }); + } + Err(_) => { + events.push(ParserEvent::Malformed { raw: body }); + } + } + } +} + +/// Returns the length of the longest suffix of `haystack` that is a +/// proper prefix of `needle`. Used to decide how many trailing bytes +/// to hold back when scanning for `needle`: anything that could +/// possibly be the start of `needle` is held; everything else is +/// safe to emit. +fn longest_marker_prefix_suffix(haystack: &str, needle: &str) -> usize { + // Try prefixes of needle from longest to shortest; the first one + // that matches as a suffix of haystack wins. O(|needle|^2) which + // is fine — both markers are < 20 chars. + let max = needle.len().min(haystack.len()); + for n in (1..=max).rev() { + if !haystack.is_char_boundary(haystack.len() - n) || !needle.is_char_boundary(n) { + continue; + } + if haystack.ends_with(&needle[..n]) { + return n; + } + } + 0 +} + +#[derive(Debug, serde::Deserialize)] +struct ToolCallBody { + name: String, + // The model is supposed to emit a JSON object here; in practice + // some Qwen3 variants stringify it. Deserialize-as-value handles + // both. + #[serde(default)] + arguments: serde_json::Value, +} + +// ── Tests ─────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn tool(name: &str) -> ToolSpec { + ToolSpec { + name: name.to_string(), + description: format!("desc of {name}"), + parameters: json!({ + "type": "object", + "properties": {"path": {"type": "string"}}, + "required": ["path"] + }), + } + } + + // ── render_tool_block ─────────────────────────────────────────── + + #[test] + fn empty_tools_renders_empty() { + assert_eq!(render_tool_block(&[]), ""); + } + + #[test] + fn tool_block_contains_hermes_markers_and_each_function() { + let block = render_tool_block(&[tool("read_file"), tool("write_file")]); + assert!(block.contains("# Tools")); + assert!(block.contains("")); + assert!(block.contains("")); + assert!(block.contains("\"name\":\"read_file\"")); + assert!(block.contains("\"name\":\"write_file\"")); + assert!(block.contains("")); + assert!(block.contains("")); + } + + // ── render_assistant_with_tool_calls ──────────────────────────── + + #[test] + fn renders_pure_text_when_no_calls() { + let out = render_assistant_with_tool_calls(Some("hi"), &[]); + assert_eq!(out, "hi"); + } + + #[test] + fn renders_text_then_tool_call_block() { + let calls = vec![ToolCall { + id: "call_0".into(), + name: "read_file".into(), + arguments: r#"{"path":"/etc/hostname"}"#.into(), + }]; + let out = render_assistant_with_tool_calls(Some("reading"), &calls); + assert!(out.starts_with("reading\n")); + assert!(out.contains(r#""name":"read_file""#)); + assert!(out.contains(r#""path":"/etc/hostname""#)); + assert!(out.ends_with("")); + } + + #[test] + fn multiple_calls_separated_by_newlines() { + let calls = vec![ + ToolCall { + id: "call_0".into(), + name: "a".into(), + arguments: "{}".into(), + }, + ToolCall { + id: "call_1".into(), + name: "b".into(), + arguments: "{}".into(), + }, + ]; + let out = render_assistant_with_tool_calls(None, &calls); + assert_eq!(out.matches("").count(), 2); + assert_eq!(out.matches("").count(), 2); + } + + #[test] + fn invalid_arguments_json_is_wrapped_as_string() { + let calls = vec![ToolCall { + id: "call_0".into(), + name: "x".into(), + arguments: "not even json".into(), + }]; + let out = render_assistant_with_tool_calls(None, &calls); + // Wrapped as JSON string rather than breaking the envelope. + assert!(out.contains(r#""arguments":"not even json""#)); + } + + // ── render_tool_response ──────────────────────────────────────── + + #[test] + fn tool_response_wraps_content() { + let out = render_tool_response("hello world"); + assert_eq!(out, "\nhello world\n"); + } + + // ── longest_marker_prefix_suffix ──────────────────────────────── + + #[test] + fn marker_prefix_suffix_returns_longest_match() { + assert_eq!(longest_marker_prefix_suffix("foo"), 5); + assert_eq!(longest_marker_prefix_suffix("foo<", ""), 1); + assert_eq!(longest_marker_prefix_suffix("foo"), 0); + assert_eq!(longest_marker_prefix_suffix("foo", ""), 0); + assert_eq!(longest_marker_prefix_suffix("", ""), 0); + // Exact prefix length matches. + assert_eq!( + longest_marker_prefix_suffix("foo"), + 10 + ); + } + + // ── ToolCallParser ────────────────────────────────────────────── + + fn drive(parser: &mut ToolCallParser, chunks: &[&str]) -> Vec { + let mut events = Vec::new(); + for c in chunks { + events.extend(parser.feed(c)); + } + events.extend(parser.finish()); + events + } + + #[test] + fn plain_text_passes_through() { + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &["hello ", "world"]); + assert_eq!(events.len(), 2); + assert_eq!(events[0], ParserEvent::Text("hello ".to_string())); + assert_eq!(events[1], ParserEvent::Text("world".to_string())); + } + + #[test] + fn single_complete_tool_call() { + let mut p = ToolCallParser::new(); + let input = + r#"before {"name":"read_file","arguments":{"path":"/x"}} after"#; + let events = drive(&mut p, &[input]); + // "before " (text) → Start → Args → " after" (text) + assert_eq!(events[0], ParserEvent::Text("before ".to_string())); + assert!(matches!( + &events[1], + ParserEvent::Start { index: 0, name } if name == "read_file" + )); + assert!(matches!( + &events[2], + ParserEvent::Args { index: 0, args_json } if args_json.contains(r#""path":"/x""#) + )); + assert_eq!(events[3], ParserEvent::Text(" after".to_string())); + } + + #[test] + fn open_marker_split_across_chunks_is_buffered() { + // The chunk boundary lands inside the opening marker. + let chunks = [ + "before <", + "tool_call>", + r#"{"name":"a","arguments":{}}"#, + " after", + ]; + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &chunks); + // Despite the split, we get exactly: + // - "before " as text (the "<" suffix was held) + // - Start { name: "a" } + // - Args + // - " after" + let texts: Vec<&str> = events + .iter() + .filter_map(|e| match e { + ParserEvent::Text(t) => Some(t.as_str()), + _ => None, + }) + .collect(); + assert_eq!(texts.join(""), "before after"); + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "a")) + ); + assert!(events.iter().any(|e| matches!(e, ParserEvent::Args { .. }))); + } + + #[test] + fn close_marker_split_across_chunks() { + let chunks = [ + r#"{"name":"a","arguments":{}}<"#, + "/tool_", + "call>tail", + ]; + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &chunks); + // Tail should arrive as text after the call is fully parsed. + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Start { name, .. } if name == "a")) + ); + let last_text = events.iter().rev().find_map(|e| match e { + ParserEvent::Text(t) => Some(t.as_str()), + _ => None, + }); + assert_eq!(last_text, Some("tail")); + } + + #[test] + fn one_byte_at_a_time_produces_same_events_as_one_chunk() { + let input = r#"a{"name":"f","arguments":{"k":1}}b"#; + + let mut single = ToolCallParser::new(); + let single_events = drive(&mut single, &[input]); + + let chunks: Vec = input.chars().map(|c| c.to_string()).collect(); + let chunk_refs: Vec<&str> = chunks.iter().map(|s| s.as_str()).collect(); + let mut byte = ToolCallParser::new(); + let byte_events = drive(&mut byte, &chunk_refs); + + // Concatenated text equals on both paths. + let text = |evs: &[ParserEvent]| -> String { + evs.iter() + .filter_map(|e| match e { + ParserEvent::Text(t) => Some(t.as_str()), + _ => None, + }) + .collect() + }; + assert_eq!(text(&single_events), text(&byte_events)); + // Both paths see exactly one Start and one Args, with the + // same name and arguments payload. + let starts: Vec<&str> = byte_events + .iter() + .filter_map(|e| match e { + ParserEvent::Start { name, .. } => Some(name.as_str()), + _ => None, + }) + .collect(); + assert_eq!(starts, vec!["f"]); + let args: Vec<&str> = byte_events + .iter() + .filter_map(|e| match e { + ParserEvent::Args { args_json, .. } => Some(args_json.as_str()), + _ => None, + }) + .collect(); + assert_eq!(args.len(), 1); + assert!(args[0].contains(r#""k":1"#)); + } + + #[test] + fn multiple_tool_calls_get_distinct_indices() { + let input = concat!( + "lead ", + r#"{"name":"a","arguments":{}}"#, + " mid ", + r#"{"name":"b","arguments":{}}"#, + " tail", + ); + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &[input]); + let starts: Vec<(usize, String)> = events + .iter() + .filter_map(|e| match e { + ParserEvent::Start { index, name } => Some((*index, name.clone())), + _ => None, + }) + .collect(); + assert_eq!(starts, vec![(0, "a".into()), (1, "b".into())]); + } + + #[test] + fn malformed_tool_call_does_not_crash() { + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &[r#"xnot valid jsony"#]); + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Malformed { .. })) + ); + // Bracketing text still flows. + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Text(t) if t == "x")) + ); + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Text(t) if t == "y")) + ); + } + + #[test] + fn unterminated_tool_call_is_reported_on_finish() { + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &[r#"x{"name":"a""#]); + assert!( + events + .iter() + .any(|e| matches!(e, ParserEvent::Malformed { .. })) + ); + } + + #[test] + fn quoted_lt_inside_args_does_not_trigger_marker() { + // Sanity: a string value that happens to contain "" / "", so this would + // only break if a literal "" appeared in args + // — which the model has no reason to emit.) + let input = r#"{"name":"f","arguments":{"q":"why "#; + let mut p = ToolCallParser::new(); + let events = drive(&mut p, &[input]); + let starts: Vec<&str> = events + .iter() + .filter_map(|e| match e { + ParserEvent::Start { name, .. } => Some(name.as_str()), + _ => None, + }) + .collect(); + assert_eq!(starts, vec!["f"]); + } +}