feat(helexa-acp): scaffold ACP bridge with provider trait + OpenAI chat

Adds a new workspace crate `helexa-acp` (binary, Apache-2.0) — the start of "the missing ACP binary" for multi-endpoint LLM setups mixing public APIs, private LAN deployments, and various wire formats. Today it speaks OpenAI /v1/chat/completions; the Provider trait is the seam that lets OpenAI Responses, Anthropic /v1/messages, and other wire formats slot in later without touching the agent loop. The crate is intentionally self-contained — no dependencies on the other workspace crates (cortex-core, cortex-gateway, neuron) — so a future migration to a dedicated GitHub repo is a Cargo.toml-only change. All deps come from crates.io. This commit lands: * `config.rs` — TOML config at $XDG_CONFIG_HOME/helexa-acp/config.toml with multi-endpoint support (each `[[endpoints]]` declares its name, base_url, wire_api, default_model, optional API key / api_key_env). Falls back to env-only single-endpoint config when no TOML exists (HELEXA_ACP_BASE_URL, HELEXA_ACP_MODEL, etc.). The `endpoint:model` selector syntax is validated and tested. * `provider/mod.rs` — `Provider` trait + provider-agnostic types (`CompletionRequest`, `CompletionEvent`, `Message`, `ToolCall`, `ToolSpec`, `Role`, `UsageStats`). Agent loop consumes these without knowing the wire format on the other side. * `provider/openai_chat.rs` — `OpenAIChatProvider` impl. Compatible with cortex, LM Studio, Ollama (compat mode), OpenRouter, OpenAI itself. Streams via reqwest + eventsource-stream + async-stream. Surfaces text deltas, reasoning deltas (for models that emit `reasoning_content`), tool-call lifecycle (start, args-delta, completion), usage, finish reason. Cancellation-token aware. * `main.rs` — tokio + stderr-only tracing-subscriber + Stdio transport. Builds a provider per configured endpoint at startup, surfacing config mistakes before the editor even initializes. Currently responds to `initialize`; everything else stubs to `not implemented yet` until the agent loop lands in the next commit. 12 unit tests pass — encoder shape, decoder shape (text-only, tool-call progressive, cancellation, malformed-chunk recovery), config parsing (multi-endpoint TOML, env fallback, validation). The `#![allow(dead_code)]` on `provider/mod.rs` is temporary — the agent loop in the next commit reads every field. It's noted in the module-level docstring so the next reader knows. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 08:13:47 +03:00
parent 249b2e5c98
commit e23d5011d0
7 changed files with 2123 additions and 15 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ members = [
    "crates/cortex-gateway",
    "crates/cortex-cli",
    "crates/neuron",
    "crates/helexa-acp",
 ]
 [workspace.package]
--- a/crates/helexa-acp/Cargo.toml
+++ b/crates/helexa-acp/Cargo.toml
@@ -0,0 +1,39 @@
 [package]
 name = "helexa-acp"
 version = "0.1.16"
 edition = "2024"
 license = "Apache-2.0"
 repository = "https://git.lair.cafe/helexa/cortex"
 description = """
 Agent Client Protocol bridge for the helexa self-hosted LLM stack.
 Speaks ACP to ACP-compatible editor clients (Zed, etc.) and forwards
 the conversation to any OpenAI-compatible HTTP endpoint — defaulting
 to cortex (helexa's reverse-proxy / fleet gateway).
 """
 # This crate is intentionally self-contained — no dependencies on other
 # workspace crates (cortex-core, cortex-gateway, neuron). The goal is
 # a painless migration to a dedicated GitHub repo in the future if the
 # project grows beyond helexa's needs. All deps are crates.io.
 [dependencies]
 agent-client-protocol = "0.12"
 tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync", "io-util", "process", "signal"] }
 reqwest = { version = "0.12", features = ["json", "stream", "rustls-tls"], default-features = false }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 toml = "0.8"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 anyhow = "1"
 thiserror = "2"
 async-trait = "0.1"
 futures = "0.3"
 tokio-stream = "0.1"
 tokio-util = { version = "0.7", features = ["rt"] }
 eventsource-stream = "0.2"
 async-stream = "0.3"
 url = { version = "2", features = ["serde"] }
 [[bin]]
 name = "helexa-acp"
 path = "src/main.rs"
--- a/crates/helexa-acp/src/config.rs
+++ b/crates/helexa-acp/src/config.rs
@@ -0,0 +1,378 @@
 //! Configuration for the helexa-acp bridge.
 //!
 //! Loaded from `$XDG_CONFIG_HOME/helexa-acp/config.toml` (or
 //! `~/.config/helexa-acp/config.toml` as a fallback). If no config file
 //! exists, falls back to building a single anonymous endpoint from env
 //! vars — that keeps "just point at one cortex" frictionless without
 //! requiring a config file on disk.
 //!
 //! The design goal is "the missing ACP binary for users with multiple
 //! API endpoints (possibly on a private LAN, possibly mixing wire
 //! types)". Hence: every endpoint is named, has its own wire API, and
 //! has its own default model. The agent's selected model id can be
 //! prefixed `endpoint:model` to route across endpoints; a bare
 //! `model` falls through to the configured `default_endpoint`.
 //!
 //! ### Example TOML
 //!
 //! ```toml
 //! default_endpoint = "helexa"
 //!
 //! [[endpoints]]
 //! name = "helexa"
 //! base_url = "http://hanzalova.internal:31313/v1"
 //! wire_api = "openai-chat"
 //! default_model = "helexa/large"
 //!
 //! [[endpoints]]
 //! name = "openrouter"
 //! base_url = "https://openrouter.ai/api/v1"
 //! wire_api = "openai-chat"
 //! api_key_env = "OPENROUTER_API_KEY"
 //! default_model = "anthropic/claude-opus-4"
 //!
 //! [[endpoints]]
 //! name = "lmstudio"
 //! base_url = "http://localhost:1234/v1"
 //! wire_api = "openai-chat"
 //! default_model = "auto"
 //! ```
 use anyhow::{Context, anyhow};
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
 use url::Url;
 const DEFAULT_BASE_URL: &str = "http://hanzalova.internal:31313/v1";
 const DEFAULT_MODEL: &str = "helexa/large";
 const DEFAULT_ENDPOINT_NAME: &str = "default";
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Config {
    /// Name of the endpoint used when a request doesn't pick one
    /// explicitly. Must reference an entry in `endpoints`. Defaults to
    /// the first endpoint declared if unset.
    #[serde(default)]
    pub default_endpoint: Option<String>,
    /// Per-endpoint configuration. At least one entry is required.
    #[serde(default)]
    pub endpoints: Vec<EndpointConfig>,
    /// Optional path to a system-prompt file. When unset, the built-in
    /// default prompt from `prompt.rs` is used.
    #[serde(default)]
    pub system_prompt_path: Option<PathBuf>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct EndpointConfig {
    /// Short identifier used in `endpoint:model` routing and in logs.
    pub name: String,
    /// Base URL of the OpenAI-compatible API. Must include the `/v1`
    /// (or equivalent) suffix — paths like `chat/completions` and
    /// `models` are joined onto this.
    pub base_url: Url,
    /// Wire protocol the endpoint speaks. Phase 1 supports
    /// [`WireApi::OpenAiChat`] only; `openai-responses` and
    /// `anthropic-messages` land later behind their own providers.
    #[serde(default)]
    pub wire_api: WireApi,
    /// Model to use when the client hasn't picked one via
    /// `session/set_model`.
    #[serde(default)]
    pub default_model: Option<String>,
    /// Static API key to send as `Authorization: Bearer …`. Prefer
    /// `api_key_env` for anything sensitive — keys in plain TOML are a
    /// liability.
    #[serde(default)]
    pub api_key: Option<String>,
    /// Env var name to read for the API key. Resolved at startup so a
    /// missing env var yields a clear error rather than silent
    /// unauthenticated calls.
    #[serde(default)]
    pub api_key_env: Option<String>,
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
 pub enum WireApi {
    /// `POST {base}/chat/completions` returning OpenAI-format SSE.
    /// Compatible with cortex, LM Studio, Ollama (compat mode),
    /// OpenRouter, OpenAI itself.
    #[default]
    #[serde(rename = "openai-chat")]
    OpenAiChat,
    /// `POST {base}/responses` — OpenAI's newer Responses API. Not
    /// implemented yet; the variant is reserved so endpoint configs
    /// can be authored ahead of provider support.
    #[serde(rename = "openai-responses")]
    OpenAiResponses,
    /// `POST {base}/messages` — Anthropic format. Reserved.
    #[serde(rename = "anthropic-messages")]
    AnthropicMessages,
 }
 impl EndpointConfig {
    /// Resolve the API key from `api_key` (literal) or `api_key_env`
    /// (env-var lookup). Returns `Ok(None)` when neither is set;
    /// `Err` when `api_key_env` references a missing variable.
    pub fn resolve_api_key(&self) -> anyhow::Result<Option<String>> {
        if let Some(literal) = &self.api_key {
            return Ok(Some(literal.clone()));
        }
        if let Some(var) = &self.api_key_env {
            return Ok(Some(std::env::var(var).with_context(|| {
                format!(
                    "endpoint '{}' references missing env var {}",
                    self.name, var
                )
            })?));
        }
        Ok(None)
    }
    /// `{base_url}/chat/completions`.
    pub fn chat_completions_url(&self) -> Url {
        join_segments(&self.base_url, &["chat", "completions"])
    }
    /// `{base_url}/models`.
    pub fn models_url(&self) -> Url {
        join_segments(&self.base_url, &["models"])
    }
 }
 impl Config {
    /// Load from TOML at the standard config path, or build from env
    /// vars if no file exists. Env-fallback yields a single endpoint
    /// named `"default"`.
    pub fn load() -> anyhow::Result<Self> {
        let path = config_path();
        if let Some(path) = &path
            && path.exists()
        {
            return Self::from_file(path);
        }
        Self::from_env()
    }
    /// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`,
    /// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`,
    /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`.
    pub fn from_env() -> anyhow::Result<Self> {
        let base_url = std::env::var("HELEXA_ACP_BASE_URL")
            .ok()
            .unwrap_or_else(|| DEFAULT_BASE_URL.into());
        let base_url = Url::parse(&base_url)
            .with_context(|| format!("HELEXA_ACP_BASE_URL is not a valid URL ({base_url})"))?;
        let default_model = std::env::var("HELEXA_ACP_MODEL")
            .ok()
            .unwrap_or_else(|| DEFAULT_MODEL.into());
        let api_key = std::env::var("HELEXA_ACP_API_KEY")
            .ok()
            .filter(|s| !s.is_empty());
        let system_prompt_path = std::env::var("HELEXA_ACP_SYSTEM_PROMPT_PATH")
            .ok()
            .filter(|s| !s.is_empty())
            .map(PathBuf::from);
        Ok(Self {
            default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()),
            endpoints: vec![EndpointConfig {
                name: DEFAULT_ENDPOINT_NAME.into(),
                base_url,
                wire_api: WireApi::OpenAiChat,
                default_model: Some(default_model),
                api_key,
                api_key_env: None,
            }],
            system_prompt_path,
        })
    }
    pub fn from_file(path: &Path) -> anyhow::Result<Self> {
        let text = std::fs::read_to_string(path)
            .with_context(|| format!("read config {}", path.display()))?;
        let mut cfg: Self =
            toml::from_str(&text).with_context(|| format!("parse config {}", path.display()))?;
        cfg.validate()?;
        Ok(cfg)
    }
    fn validate(&mut self) -> anyhow::Result<()> {
        if self.endpoints.is_empty() {
            return Err(anyhow!("config has no [[endpoints]] entries"));
        }
        for (i, ep) in self.endpoints.iter().enumerate() {
            if ep.name.is_empty() {
                return Err(anyhow!("endpoints[{i}] has empty name"));
            }
            if ep.name.contains(':') {
                return Err(anyhow!(
                    "endpoints[{i}].name '{}' contains ':' which would clash \
                     with the endpoint:model selector syntax",
                    ep.name
                ));
            }
        }
        // Pick a default endpoint if none was named.
        if self.default_endpoint.is_none() {
            self.default_endpoint = Some(self.endpoints[0].name.clone());
        }
        let default_name = self.default_endpoint.as_deref().unwrap();
        if !self.endpoints.iter().any(|e| e.name == default_name) {
            return Err(anyhow!(
                "default_endpoint '{default_name}' is not declared in [[endpoints]]"
            ));
        }
        Ok(())
    }
    /// Look up an endpoint by name. Returns `None` if not configured.
    pub fn endpoint(&self, name: &str) -> Option<&EndpointConfig> {
        self.endpoints.iter().find(|e| e.name == name)
    }
    /// The default endpoint (guaranteed to exist after `validate`).
    pub fn default_endpoint(&self) -> &EndpointConfig {
        let name = self
            .default_endpoint
            .as_deref()
            .expect("default_endpoint set by validate");
        self.endpoint(name)
            .expect("default_endpoint resolves after validate")
    }
 }
 /// Parse an ACP-side `model` field into (endpoint name, raw model id).
 ///
 /// `helexa:helexa/large` → (`Some("helexa")`, `"helexa/large"`).
 /// `helexa/large` → (`None`, `"helexa/large"`).
 ///
 /// The split happens at the FIRST colon. Model ids commonly contain
 /// `/` (HuggingFace style) but rarely `:`; if a model id ever does, the
 /// user can quote-prefix with the default endpoint name.
 pub fn parse_model_selector(input: &str) -> (Option<&str>, &str) {
    match input.split_once(':') {
        Some((endpoint, model)) if !endpoint.is_empty() && !model.is_empty() => {
            (Some(endpoint), model)
        }
        _ => (None, input),
    }
 }
 fn config_path() -> Option<PathBuf> {
    if let Ok(override_path) = std::env::var("HELEXA_ACP_CONFIG_PATH") {
        return Some(PathBuf::from(override_path));
    }
    let xdg = std::env::var("XDG_CONFIG_HOME")
        .ok()
        .filter(|s| !s.is_empty());
    let base = xdg.map(PathBuf::from).or_else(|| {
        std::env::var("HOME")
            .ok()
            .map(|h| PathBuf::from(h).join(".config"))
    })?;
    Some(base.join("helexa-acp").join("config.toml"))
 }
 fn join_segments(base: &Url, segments: &[&str]) -> Url {
    let mut out = base.clone();
    if let Ok(mut path) = out.path_segments_mut() {
        path.pop_if_empty().extend(segments.iter().copied());
    }
    out
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn url_join_handles_trailing_slash() {
        let ep = EndpointConfig {
            name: "x".into(),
            base_url: Url::parse("http://h.internal:31313/v1").unwrap(),
            wire_api: WireApi::OpenAiChat,
            default_model: None,
            api_key: None,
            api_key_env: None,
        };
        assert_eq!(
            ep.chat_completions_url().as_str(),
            "http://h.internal:31313/v1/chat/completions"
        );
        assert_eq!(
            ep.models_url().as_str(),
            "http://h.internal:31313/v1/models"
        );
    }
    #[test]
    fn parses_model_selector() {
        assert_eq!(
            parse_model_selector("helexa:helexa/large"),
            (Some("helexa"), "helexa/large")
        );
        assert_eq!(parse_model_selector("helexa/large"), (None, "helexa/large"));
        assert_eq!(parse_model_selector("gpt-5"), (None, "gpt-5"));
        // Edge case: a leading colon → no endpoint.
        assert_eq!(parse_model_selector(":gpt-5"), (None, ":gpt-5"));
    }
    #[test]
    fn env_fallback_builds_single_endpoint() {
        // Don't actually set env vars (would race with other tests);
        // just confirm the default path constructs cleanly.
        unsafe {
            std::env::remove_var("HELEXA_ACP_BASE_URL");
            std::env::remove_var("HELEXA_ACP_MODEL");
            std::env::remove_var("HELEXA_ACP_API_KEY");
        }
        let cfg = Config::from_env().unwrap();
        assert_eq!(cfg.endpoints.len(), 1);
        assert_eq!(cfg.endpoints[0].name, "default");
        assert_eq!(cfg.endpoints[0].base_url.as_str(), DEFAULT_BASE_URL);
        assert_eq!(
            cfg.endpoints[0].default_model.as_deref(),
            Some(DEFAULT_MODEL)
        );
    }
    #[test]
    fn toml_parses_multi_endpoint() {
        let toml_text = r#"
            default_endpoint = "helexa"
            [[endpoints]]
            name = "helexa"
            base_url = "http://hanzalova.internal:31313/v1"
            default_model = "helexa/large"
            [[endpoints]]
            name = "openrouter"
            base_url = "https://openrouter.ai/api/v1"
            wire_api = "openai-chat"
            api_key_env = "OPENROUTER_API_KEY"
            default_model = "anthropic/claude-opus-4"
        "#;
        let mut cfg: Config = toml::from_str(toml_text).unwrap();
        cfg.validate().unwrap();
        assert_eq!(cfg.endpoints.len(), 2);
        assert_eq!(cfg.default_endpoint().name, "helexa");
        assert_eq!(cfg.endpoints[0].wire_api, WireApi::OpenAiChat);
        assert_eq!(
            cfg.endpoints[1].api_key_env.as_deref(),
            Some("OPENROUTER_API_KEY")
        );
    }
    #[test]
    fn validate_rejects_colon_in_endpoint_name() {
        let toml_text = r#"
            [[endpoints]]
            name = "bad:name"
            base_url = "http://x/v1"
        "#;
        let mut cfg: Config = toml::from_str(toml_text).unwrap();
        let err = cfg.validate().unwrap_err();
        assert!(format!("{err}").contains("clash"));
    }
 }
--- a/crates/helexa-acp/src/main.rs
+++ b/crates/helexa-acp/src/main.rs
@@ -0,0 +1,121 @@
 //! helexa-acp — Agent Client Protocol bridge for multi-endpoint LLM
 //! setups (helexa, LM Studio, Ollama, OpenRouter, OpenAI, Anthropic,
 //! …) with a clean per-endpoint wire-format selector.
 //!
 //! Speaks ACP over stdio to an editor client (Zed today). The
 //! conversation is forwarded to one of the configured endpoints via
 //! a wire-format-specific [`provider::Provider`] implementation.
 //! The agent loop itself is provider-agnostic — adding e.g. an
 //! Anthropic /v1/messages provider doesn't touch `agent.rs`.
 //!
 //! Config: `$XDG_CONFIG_HOME/helexa-acp/config.toml` for the multi-
 //! endpoint case; env vars (`HELEXA_ACP_BASE_URL`, etc.) for the
 //! single-endpoint case when no config file exists.
 use agent_client_protocol::schema::{AgentCapabilities, InitializeRequest, InitializeResponse};
 use agent_client_protocol::{Agent, Client, ConnectionTo, Dispatch, Result, Stdio};
 use std::sync::Arc;
 mod config;
 mod provider;
 use config::{Config, EndpointConfig, WireApi};
 use provider::{Provider, openai_chat::OpenAIChatProvider};
 /// Build a provider for `endpoint` according to its declared
 /// `wire_api`. Future wire types (OpenAI Responses, Anthropic
 /// /v1/messages, Ollama native) slot in here without changing the
 /// caller.
 fn build_provider(endpoint: EndpointConfig) -> anyhow::Result<Arc<dyn Provider>> {
    match endpoint.wire_api {
        WireApi::OpenAiChat => Ok(Arc::new(OpenAIChatProvider::new(endpoint)?)),
        WireApi::OpenAiResponses => Err(anyhow::anyhow!(
            "endpoint '{}' wire_api 'openai-responses' is reserved for a future provider; \
             use 'openai-chat' for now or wait for the OpenAIResponsesProvider impl",
            endpoint.name
        )),
        WireApi::AnthropicMessages => Err(anyhow::anyhow!(
            "endpoint '{}' wire_api 'anthropic-messages' is reserved for a future provider",
            endpoint.name
        )),
    }
 }
 #[tokio::main]
 async fn main() -> Result<()> {
    // Logs go to stderr — stdout is reserved for the JSON-RPC stream.
    tracing_subscriber::fmt()
        .with_writer(std::io::stderr)
        .with_env_filter(
            tracing_subscriber::EnvFilter::try_from_default_env()
                .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
        )
        .init();
    let cfg = Config::load()
        .map_err(|e| agent_client_protocol::util::internal_error(format!("config: {e:#}")))?;
    tracing::info!(
        endpoints = cfg.endpoints.len(),
        default_endpoint = %cfg.default_endpoint().name,
        default_model = ?cfg.default_endpoint().default_model,
        "helexa-acp starting"
    );
    // Build a provider for each configured endpoint up-front. Cheap —
    // just sets up a reqwest::Client and resolves the API key — and
    // surfaces config mistakes (missing API key env var, unsupported
    // wire_api) before the editor even sends an initialize request.
    let mut providers: Vec<Arc<dyn Provider>> = Vec::with_capacity(cfg.endpoints.len());
    for endpoint in &cfg.endpoints {
        match build_provider(endpoint.clone()) {
            Ok(p) => {
                tracing::info!(
                    endpoint = %endpoint.name,
                    base_url = %endpoint.base_url,
                    wire_api = ?endpoint.wire_api,
                    "registered provider"
                );
                providers.push(p);
            }
            Err(e) => {
                tracing::warn!(
                    endpoint = %endpoint.name,
                    error = %format!("{e:#}"),
                    "skipping endpoint with invalid config"
                );
            }
        }
    }
    if providers.is_empty() {
        return Err(agent_client_protocol::util::internal_error(
            "no usable endpoints — check config",
        ));
    }
    Agent
        .builder()
        .name("helexa-acp")
        .on_receive_request(
            async move |initialize: InitializeRequest, responder, _connection| {
                // Phase 1 wiring — capabilities only. Real session
                // handling lands in the next iteration (agent.rs).
                responder.respond(
                    InitializeResponse::new(initialize.protocol_version)
                        .agent_capabilities(AgentCapabilities::new()),
                )
            },
            agent_client_protocol::on_receive_request!(),
        )
        .on_receive_dispatch(
            async move |message: Dispatch, cx: ConnectionTo<Client>| {
                tracing::warn!(method = ?message.method(), "unhandled ACP message");
                message.respond_with_error(
                    agent_client_protocol::util::internal_error("not implemented yet"),
                    cx,
                )
            },
            agent_client_protocol::on_receive_dispatch!(),
        )
        .connect_to(Stdio::new())
        .await
 }
--- a/crates/helexa-acp/src/provider/mod.rs
+++ b/crates/helexa-acp/src/provider/mod.rs
@@ -0,0 +1,162 @@
 //! Provider trait — the seam between the ACP-side agent loop and
 //! whatever wire protocol an endpoint actually speaks.
 //!
 //! Every concrete provider (OpenAI chat completions, OpenAI Responses,
 //! Anthropic /v1/messages, Ollama native, …) implements
 //! [`Provider`]. The agent constructs a [`CompletionRequest`] using
 //! provider-agnostic types and consumes a stream of
 //! [`CompletionEvent`]s — neither end knows which wire format is on
 //! the other side of the trait.
 //!
 //! Day-1 provider: [`openai_chat::OpenAIChatProvider`]. Day-N
 //! providers slot in without touching `agent.rs`.
 // Many fields and variants in the public surface here aren't read yet:
 // the agent loop that consumes `CompletionEvent`s and constructs
 // `CompletionRequest`s lands in the next commit. They're not
 // speculative — the unit tests in `provider::openai_chat::tests`
 // already verify the encoder/decoder produces them. Once `agent.rs`
 // arrives this allow comes off.
 #![allow(dead_code)]
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use tokio_util::sync::CancellationToken;
 pub mod openai_chat;
 /// Provider-agnostic LLM endpoint. Implementations translate between
 /// [`CompletionRequest`] / [`CompletionEvent`] and whatever wire
 /// format their endpoint speaks.
 #[async_trait]
 pub trait Provider: Send + Sync {
    /// Endpoint name as configured by the user (e.g. `"helexa"`,
    /// `"openrouter"`). Used in logs and in the `endpoint:model`
    /// selector.
    fn name(&self) -> &str;
    /// List models available at this endpoint. Used to build the
    /// model-picker dropdown in editor clients. Should return quickly
    /// (cache if necessary).
    async fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>>;
    /// Run a chat completion. Returns a stream of provider-agnostic
    /// events. The stream stops when the upstream finishes, when
    /// `cancel` is fired, or when the stream is dropped.
    async fn complete(
        &self,
        request: CompletionRequest,
        cancel: CancellationToken,
    ) -> anyhow::Result<BoxStream<'static, anyhow::Result<CompletionEvent>>>;
 }
 /// One model exposed by a provider.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ModelInfo {
    pub id: String,
    /// Human-friendly name, if the endpoint exposes one. Otherwise
    /// `id` is used as the display name.
    #[serde(default)]
    pub display_name: Option<String>,
 }
 /// Inputs to a completion. Provider-agnostic — concrete providers
 /// translate this into their wire format.
 #[derive(Debug, Clone)]
 pub struct CompletionRequest {
    /// Endpoint-local model id (without the `endpoint:` prefix).
    pub model: String,
    pub messages: Vec<Message>,
    /// Tools the model is allowed to call. Empty list means no tool
    /// support advertised.
    pub tools: Vec<ToolSpec>,
    pub temperature: Option<f64>,
    pub top_p: Option<f64>,
    pub max_tokens: Option<u64>,
 }
 #[derive(Debug, Clone)]
 pub struct Message {
    pub role: Role,
    pub content: MessageContent,
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Role {
    System,
    User,
    Assistant,
    /// Tool result message. Provider impls turn this into whatever
    /// shape the upstream wire format wants (OpenAI uses
    /// `role: "tool"` + `tool_call_id`; Anthropic uses content blocks).
    Tool,
 }
 #[derive(Debug, Clone)]
 pub enum MessageContent {
    Text(String),
    /// Assistant turn that called one or more tools.
    ToolCalls {
        /// Optional text the assistant said alongside the tool calls.
        text: Option<String>,
        calls: Vec<ToolCall>,
    },
    /// Tool result. `tool_call_id` matches the assistant's call id.
    ToolResult {
        tool_call_id: String,
        content: String,
    },
 }
 #[derive(Debug, Clone)]
 pub struct ToolCall {
    /// Provider-assigned id that ties the call to its result.
    pub id: String,
    pub name: String,
    /// JSON-encoded arguments. Kept as a string because providers
    /// stream argument bytes incrementally and only validate at the
    /// end; the agent decodes once the call is complete.
    pub arguments: String,
 }
 #[derive(Debug, Clone)]
 pub struct ToolSpec {
    pub name: String,
    pub description: String,
    /// JSON Schema of the arguments object.
    pub parameters: Value,
 }
 /// Events emitted by a provider during a streaming completion.
 #[derive(Debug, Clone)]
 pub enum CompletionEvent {
    /// Incremental visible text from the assistant.
    TextDelta(String),
    /// Incremental "reasoning" / thought text, if the model emits one
    /// (e.g. Qwen3 with `<think>` tags surfaced as a separate stream,
    /// or OpenAI reasoning models).
    ReasoningDelta(String),
    /// A new tool call has started.
    ToolCallStart {
        index: usize,
        id: String,
        name: String,
    },
    /// More argument bytes for a tool call already announced via
    /// [`Self::ToolCallStart`].
    ToolCallArgsDelta { index: usize, args_delta: String },
    /// Stream finished. Carries the upstream `finish_reason` if it
    /// gave one (`"stop"`, `"length"`, `"tool_calls"`, …).
    Finish { reason: Option<String> },
    /// Final usage stats, if the provider supplied them.
    Usage(UsageStats),
 }
 #[derive(Debug, Clone, Copy, Default)]
 pub struct UsageStats {
    pub prompt_tokens: u64,
    pub completion_tokens: u64,
    pub total_tokens: u64,
 }
--- a/crates/helexa-acp/src/provider/openai_chat.rs
+++ b/crates/helexa-acp/src/provider/openai_chat.rs
@@ -0,0 +1,645 @@
 //! OpenAI `/v1/chat/completions` provider.
 //!
 //! Covers cortex, LM Studio, Ollama (compat mode), OpenRouter, and
 //! OpenAI itself. The wire format is well-documented and stable;
 //! tool calls follow the `tools` request param + `tool_calls`
 //! response delta convention shared by every reasonably-modern
 //! OpenAI-compatible server.
 use async_trait::async_trait;
 use eventsource_stream::Eventsource;
 use futures::{Stream, StreamExt, stream::BoxStream};
 use serde::{Deserialize, Serialize};
 use serde_json::{Value, json};
 use tokio_util::sync::CancellationToken;
 use super::{
    CompletionEvent, CompletionRequest, Message, MessageContent, ModelInfo, Provider, Role,
    ToolCall, ToolSpec, UsageStats,
 };
 use crate::config::EndpointConfig;
 // Several fields and types in this module are only used through the
 // async HTTP path in `complete()` and `list_models()`. Tests don't
 // stand up a mock HTTP server (we'd be over-engineering for the
 // payoff), so clippy's dead-code pass under `--tests` flags them.
 // Each `allow(dead_code)` below names exactly what's exercised only
 // at runtime, with a one-line rationale so the next reader can tell
 // it's intentional.
 pub struct OpenAIChatProvider {
    endpoint: EndpointConfig,
    /// Read by `list_models` and `complete` (bearer auth header).
    #[allow(dead_code)]
    api_key: Option<String>,
    /// Read by `list_models` and `complete` (request builder).
    #[allow(dead_code)]
    http: reqwest::Client,
 }
 impl OpenAIChatProvider {
    pub fn new(endpoint: EndpointConfig) -> anyhow::Result<Self> {
        let api_key = endpoint.resolve_api_key()?;
        let http = reqwest::Client::builder()
            // Generous timeout: cortex may need to cold-load a model
            // before serving the first chunk, which can be tens of
            // seconds. We rely on cancellation for early termination,
            // not on timeout.
            .timeout(std::time::Duration::from_secs(600))
            .build()?;
        Ok(Self {
            endpoint,
            api_key,
            http,
        })
    }
 }
 #[async_trait]
 impl Provider for OpenAIChatProvider {
    fn name(&self) -> &str {
        &self.endpoint.name
    }
    async fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
        let mut req = self.http.get(self.endpoint.models_url());
        if let Some(key) = &self.api_key {
            req = req.bearer_auth(key);
        }
        let resp = req
            .send()
            .await
            .map_err(|e| anyhow::anyhow!("{} list_models: {e}", self.endpoint.name))?;
        let status = resp.status();
        if !status.is_success() {
            let body = resp.text().await.unwrap_or_default();
            anyhow::bail!(
                "{} list_models returned {}: {}",
                self.endpoint.name,
                status,
                body
            );
        }
        let body: WireModelsResponse = resp.json().await?;
        Ok(body
            .data
            .into_iter()
            .map(|m| ModelInfo {
                id: m.id,
                display_name: None,
            })
            .collect())
    }
    async fn complete(
        &self,
        request: CompletionRequest,
        cancel: CancellationToken,
    ) -> anyhow::Result<BoxStream<'static, anyhow::Result<CompletionEvent>>> {
        let body = encode_request(&request);
        let mut req = self
            .http
            .post(self.endpoint.chat_completions_url())
            .json(&body);
        if let Some(key) = &self.api_key {
            req = req.bearer_auth(key);
        }
        let resp = req
            .send()
            .await
            .map_err(|e| anyhow::anyhow!("{} chat_completion send: {e}", self.endpoint.name))?;
        let status = resp.status();
        if !status.is_success() {
            let body = resp.text().await.unwrap_or_default();
            anyhow::bail!(
                "{} chat_completion returned {}: {}",
                self.endpoint.name,
                status,
                body
            );
        }
        let sse = resp.bytes_stream().eventsource();
        let stream = decode_stream(sse, cancel);
        Ok(Box::pin(stream))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use futures::stream;
    use url::Url;
    fn ep() -> EndpointConfig {
        EndpointConfig {
            name: "test".into(),
            base_url: Url::parse("http://localhost:9999/v1").unwrap(),
            wire_api: crate::config::WireApi::OpenAiChat,
            default_model: None,
            api_key: None,
            api_key_env: None,
        }
    }
    #[test]
    fn encodes_text_only_request() {
        let req = CompletionRequest {
            model: "helexa/large".into(),
            messages: vec![
                Message {
                    role: Role::System,
                    content: MessageContent::Text("you are helpful".into()),
                },
                Message {
                    role: Role::User,
                    content: MessageContent::Text("hi".into()),
                },
            ],
            tools: vec![],
            temperature: Some(0.7),
            top_p: None,
            max_tokens: Some(256),
        };
        let body = encode_request(&req);
        assert_eq!(body["model"], "helexa/large");
        assert_eq!(body["stream"], true);
        assert_eq!(body["temperature"], 0.7);
        assert_eq!(body["max_tokens"], 256);
        assert!(body.get("top_p").is_none(), "absent options are omitted");
        let messages = body["messages"].as_array().unwrap();
        assert_eq!(messages.len(), 2);
        assert_eq!(messages[0]["role"], "system");
        assert_eq!(messages[1]["role"], "user");
        assert_eq!(messages[1]["content"], "hi");
        assert!(body.get("tools").is_none(), "empty tools omitted");
        assert_eq!(body["stream_options"]["include_usage"], true);
    }
    #[test]
    fn encodes_tool_call_round_trip() {
        let req = CompletionRequest {
            model: "x".into(),
            messages: vec![
                Message {
                    role: Role::Assistant,
                    content: MessageContent::ToolCalls {
                        text: Some("calling read_file".into()),
                        calls: vec![ToolCall {
                            id: "call_1".into(),
                            name: "read_file".into(),
                            arguments: "{\"path\":\"/tmp/a.txt\"}".into(),
                        }],
                    },
                },
                Message {
                    role: Role::Tool,
                    content: MessageContent::ToolResult {
                        tool_call_id: "call_1".into(),
                        content: "file contents".into(),
                    },
                },
            ],
            tools: vec![ToolSpec {
                name: "read_file".into(),
                description: "Read a file".into(),
                parameters: json!({"type": "object", "properties": {"path": {"type": "string"}}}),
            }],
            temperature: None,
            top_p: None,
            max_tokens: None,
        };
        let body = encode_request(&req);
        // Tool defs flow through:
        let tools = body["tools"].as_array().unwrap();
        assert_eq!(tools[0]["function"]["name"], "read_file");
        // Assistant tool_calls flow through:
        let asst = &body["messages"][0];
        assert_eq!(asst["role"], "assistant");
        assert_eq!(asst["tool_calls"][0]["id"], "call_1");
        assert_eq!(asst["tool_calls"][0]["function"]["name"], "read_file");
        // Tool result flows through:
        let tool = &body["messages"][1];
        assert_eq!(tool["role"], "tool");
        assert_eq!(tool["tool_call_id"], "call_1");
        assert_eq!(tool["content"], "file contents");
    }
    /// Build a fake eventsource stream from canned SSE `data:` lines.
    fn fake_sse(
        lines: Vec<&'static str>,
    ) -> impl Stream<
        Item = std::result::Result<
            eventsource_stream::Event,
            eventsource_stream::EventStreamError<reqwest::Error>,
        >,
    > {
        stream::iter(lines.into_iter().map(|data| {
            Ok(eventsource_stream::Event {
                event: "message".into(),
                data: data.into(),
                id: String::new(),
                retry: None,
            })
        }))
    }
    #[tokio::test]
    async fn decodes_text_then_finish() {
        let sse = fake_sse(vec![
            r#"{"choices":[{"delta":{"content":"hel"},"finish_reason":null}]}"#,
            r#"{"choices":[{"delta":{"content":"lo"},"finish_reason":null}]}"#,
            r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
            r#"{"choices":[],"usage":{"prompt_tokens":5,"completion_tokens":2,"total_tokens":7}}"#,
            "[DONE]",
        ]);
        let stream = decode_stream(sse, CancellationToken::new());
        let events: Vec<_> = stream.collect().await;
        let events: Vec<_> = events.into_iter().map(|r| r.unwrap()).collect();
        assert!(matches!(&events[0], CompletionEvent::TextDelta(s) if s == "hel"));
        assert!(matches!(&events[1], CompletionEvent::TextDelta(s) if s == "lo"));
        assert!(
            matches!(&events[2], CompletionEvent::Finish { reason } if reason.as_deref() == Some("stop"))
        );
        assert!(matches!(&events[3], CompletionEvent::Usage(u) if u.total_tokens == 7));
        assert_eq!(events.len(), 4);
    }
    #[tokio::test]
    async fn decodes_tool_call_progressively() {
        let sse = fake_sse(vec![
            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"c1","function":{"name":"read_file"}}]}}]}"#,
            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"pa"}}]}}]}"#,
            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"th\":\"/tmp/a\"}"}}]}}]}"#,
            r#"{"choices":[{"delta":{},"finish_reason":"tool_calls"}]}"#,
            "[DONE]",
        ]);
        let events: Vec<_> = decode_stream(sse, CancellationToken::new())
            .collect::<Vec<_>>()
            .await
            .into_iter()
            .map(|r| r.unwrap())
            .collect();
        assert!(matches!(
            &events[0],
            CompletionEvent::ToolCallStart { index: 0, id, name }
            if id == "c1" && name == "read_file"
        ));
        assert!(matches!(
            &events[1],
            CompletionEvent::ToolCallArgsDelta { index: 0, args_delta }
            if args_delta == "{\"pa"
        ));
        assert!(matches!(
            &events[2],
            CompletionEvent::ToolCallArgsDelta { index: 0, args_delta }
            if args_delta == "th\":\"/tmp/a\"}"
        ));
        assert!(matches!(
            &events[3],
            CompletionEvent::Finish { reason } if reason.as_deref() == Some("tool_calls")
        ));
    }
    #[tokio::test]
    async fn cancellation_ends_stream() {
        let sse = fake_sse(vec![
            r#"{"choices":[{"delta":{"content":"hello"}}]}"#,
            // These chunks should NOT be consumed once we cancel.
            r#"{"choices":[{"delta":{"content":" world"}}]}"#,
        ]);
        let cancel = CancellationToken::new();
        cancel.cancel(); // pre-cancel so the first select! arm wins.
        let events: Vec<_> = decode_stream(sse, cancel).collect().await;
        assert!(events.is_empty(), "cancelled stream yields nothing");
    }
    #[tokio::test]
    async fn skips_malformed_chunks() {
        let sse = fake_sse(vec![
            r#"{"choices":[{"delta":{"content":"before"}}]}"#,
            r#"not valid json"#,
            r#"{"choices":[{"delta":{"content":"after"}}]}"#,
            "[DONE]",
        ]);
        let events: Vec<_> = decode_stream(sse, CancellationToken::new())
            .collect::<Vec<_>>()
            .await
            .into_iter()
            .map(|r| r.unwrap())
            .collect();
        // The bad chunk is skipped with a warn; the bracketing
        // chunks both come through.
        assert!(matches!(&events[0], CompletionEvent::TextDelta(s) if s == "before"));
        assert!(matches!(&events[1], CompletionEvent::TextDelta(s) if s == "after"));
        assert_eq!(events.len(), 2);
    }
    #[test]
    fn provider_construction_is_cheap() {
        // Ensures construction doesn't accidentally make any HTTP calls
        // — important because helexa-acp builds a provider per
        // configured endpoint at startup, before the editor has
        // necessarily connected.
        let p = OpenAIChatProvider::new(ep()).expect("construction");
        assert_eq!(p.name(), "test");
    }
 }
 // ── Request encoding ────────────────────────────────────────────────
 fn encode_request(req: &CompletionRequest) -> Value {
    let messages: Vec<Value> = req.messages.iter().map(encode_message).collect();
    let mut body = json!({
        "model": req.model,
        "messages": messages,
        "stream": true,
    });
    if let Value::Object(map) = &mut body {
        if let Some(t) = req.temperature {
            map.insert("temperature".into(), json!(t));
        }
        if let Some(p) = req.top_p {
            map.insert("top_p".into(), json!(p));
        }
        if let Some(m) = req.max_tokens {
            map.insert("max_tokens".into(), json!(m));
        }
        if !req.tools.is_empty() {
            map.insert("tools".into(), encode_tools(&req.tools));
        }
        // Some servers (cortex via neuron, OpenAI) report usage at the
        // end of the stream only when explicitly requested.
        map.insert("stream_options".into(), json!({ "include_usage": true }));
    }
    body
 }
 fn encode_message(m: &Message) -> Value {
    match (m.role, &m.content) {
        (Role::System, MessageContent::Text(s)) => json!({"role": "system", "content": s}),
        (Role::User, MessageContent::Text(s)) => json!({"role": "user", "content": s}),
        (Role::Assistant, MessageContent::Text(s)) => json!({"role": "assistant", "content": s}),
        (Role::Assistant, MessageContent::ToolCalls { text, calls }) => {
            let calls_json: Vec<Value> = calls
                .iter()
                .map(|c| {
                    json!({
                        "id": c.id,
                        "type": "function",
                        "function": {
                            "name": c.name,
                            "arguments": c.arguments,
                        }
                    })
                })
                .collect();
            json!({
                "role": "assistant",
                "content": text.clone().unwrap_or_default(),
                "tool_calls": calls_json,
            })
        }
        (
            Role::Tool,
            MessageContent::ToolResult {
                tool_call_id,
                content,
            },
        ) => json!({
            "role": "tool",
            "tool_call_id": tool_call_id,
            "content": content,
        }),
        // Mismatched (role, content) combinations shouldn't happen
        // — the agent constructs them in pairs. If they do, degrade
        // gracefully to a plain text turn so the request still goes
        // out rather than crashing the conversation.
        (role, content) => {
            tracing::warn!(
                ?role,
                ?content,
                "encode_message: unexpected (role, content) shape"
            );
            json!({"role": role_str(role), "content": content_as_text(content)})
        }
    }
 }
 fn role_str(r: Role) -> &'static str {
    match r {
        Role::System => "system",
        Role::User => "user",
        Role::Assistant => "assistant",
        Role::Tool => "tool",
    }
 }
 fn content_as_text(c: &MessageContent) -> String {
    match c {
        MessageContent::Text(s) => s.clone(),
        MessageContent::ToolCalls { text, .. } => text.clone().unwrap_or_default(),
        MessageContent::ToolResult { content, .. } => content.clone(),
    }
 }
 fn encode_tools(tools: &[ToolSpec]) -> Value {
    let arr: Vec<Value> = tools
        .iter()
        .map(|t| {
            json!({
                "type": "function",
                "function": {
                    "name": t.name,
                    "description": t.description,
                    "parameters": t.parameters,
                }
            })
        })
        .collect();
    Value::Array(arr)
 }
 // ── Response decoding ───────────────────────────────────────────────
 // Both types are deserialised through `list_models()`. Tests don't
 // exercise that path (no mock HTTP server), so clippy --tests reports
 // them as dead; in real use they're hit on every Zed model-picker
 // refresh.
 #[allow(dead_code)]
 #[derive(Debug, Deserialize)]
 struct WireModelsResponse {
    data: Vec<WireModelObject>,
 }
 #[allow(dead_code)]
 #[derive(Debug, Deserialize)]
 struct WireModelObject {
    id: String,
 }
 #[derive(Debug, Deserialize)]
 struct WireChunk {
    #[serde(default)]
    choices: Vec<WireChunkChoice>,
    #[serde(default)]
    usage: Option<WireUsage>,
 }
 #[derive(Debug, Deserialize)]
 struct WireChunkChoice {
    #[serde(default)]
    delta: WireDelta,
    #[serde(default)]
    finish_reason: Option<String>,
 }
 #[derive(Debug, Default, Deserialize)]
 struct WireDelta {
    #[serde(default)]
    content: Option<String>,
    /// Some servers expose chain-of-thought text via this field
    /// (mirroring OpenAI's reasoning-model schema). When present we
    /// surface it as `ReasoningDelta`.
    #[serde(default)]
    reasoning_content: Option<String>,
    #[serde(default)]
    tool_calls: Vec<WireToolCallDelta>,
 }
 #[derive(Debug, Deserialize)]
 struct WireToolCallDelta {
    #[serde(default)]
    index: usize,
    #[serde(default)]
    id: Option<String>,
    #[serde(default)]
    function: Option<WireFunctionDelta>,
 }
 #[derive(Debug, Default, Deserialize)]
 struct WireFunctionDelta {
    #[serde(default)]
    name: Option<String>,
    #[serde(default)]
    arguments: Option<String>,
 }
 #[derive(Debug, Deserialize, Serialize)]
 struct WireUsage {
    prompt_tokens: u64,
    completion_tokens: u64,
    total_tokens: u64,
 }
 /// Convert the eventsource-stream byte SSE into provider-agnostic
 /// events. Bails the stream on the first parse failure with a logged
 /// warning — partial state is preferable to silently corrupting a
 /// conversation by skipping bad events.
 fn decode_stream<S>(
    sse: S,
    cancel: CancellationToken,
 ) -> impl Stream<Item = anyhow::Result<CompletionEvent>>
 where
    S: Stream<
            Item = Result<
                eventsource_stream::Event,
                eventsource_stream::EventStreamError<reqwest::Error>,
            >,
        > + Send
        + 'static,
 {
    async_stream::stream! {
        // Track which (index) tool calls we've already announced. The
        // OpenAI stream emits the id and name only on the first delta
        // for each tool call; later deltas just carry argument bytes.
        let mut announced: std::collections::HashSet<usize> = Default::default();
        let mut sse = Box::pin(sse);
        loop {
            tokio::select! {
                // `biased;` checks `cancel.cancelled()` first on every
                // poll — without it, a pre-cancelled token loses to a
                // ready SSE chunk, and a mid-stream cancellation could
                // still consume one more chunk before noticing.
                biased;
                _ = cancel.cancelled() => {
                    tracing::debug!("openai_chat: cancellation requested, ending stream");
                    break;
                }
                next = sse.next() => {
                    let Some(event) = next else { break };
                    let event = match event {
                        Ok(e) => e,
                        Err(e) => {
                            yield Err(anyhow::anyhow!("SSE transport: {e}"));
                            break;
                        }
                    };
                    let data = event.data;
                    if data == "[DONE]" {
                        break;
                    }
                    let chunk: WireChunk = match serde_json::from_str(&data) {
                        Ok(c) => c,
                        Err(e) => {
                            tracing::warn!(
                                error = %e,
                                raw = %data,
                                "openai_chat: failed to parse SSE chunk; skipping"
                            );
                            continue;
                        }
                    };
                    for choice in chunk.choices {
                        if let Some(text) = choice.delta.content
                            && !text.is_empty()
                        {
                            yield Ok(CompletionEvent::TextDelta(text));
                        }
                        if let Some(reasoning) = choice.delta.reasoning_content
                            && !reasoning.is_empty()
                        {
                            yield Ok(CompletionEvent::ReasoningDelta(reasoning));
                        }
                        for tc in choice.delta.tool_calls {
                            let idx = tc.index;
                            if announced.insert(idx) {
                                let id = tc.id.unwrap_or_default();
                                let name = tc
                                    .function
                                    .as_ref()
                                    .and_then(|f| f.name.clone())
                                    .unwrap_or_default();
                                yield Ok(CompletionEvent::ToolCallStart {
                                    index: idx,
                                    id,
                                    name,
                                });
                            }
                            if let Some(f) = tc.function
                                && let Some(args) = f.arguments
                                && !args.is_empty()
                            {
                                yield Ok(CompletionEvent::ToolCallArgsDelta {
                                    index: idx,
                                    args_delta: args,
                                });
                            }
                        }
                        if let Some(reason) = choice.finish_reason {
                            yield Ok(CompletionEvent::Finish { reason: Some(reason) });
                        }
                    }
                    if let Some(u) = chunk.usage {
                        yield Ok(CompletionEvent::Usage(UsageStats {
                            prompt_tokens: u.prompt_tokens,
                            completion_tokens: u.completion_tokens,
                            total_tokens: u.total_tokens,
                        }));
                    }
                }
            }
        }
    }
 }