feat(helexa-acp): scaffold ACP bridge with provider trait + OpenAI chat
Adds a new workspace crate `helexa-acp` (binary, Apache-2.0) — the
start of "the missing ACP binary" for multi-endpoint LLM setups
mixing public APIs, private LAN deployments, and various wire
formats. Today it speaks OpenAI /v1/chat/completions; the
Provider trait is the seam that lets OpenAI Responses, Anthropic
/v1/messages, and other wire formats slot in later without touching
the agent loop.
The crate is intentionally self-contained — no dependencies on the
other workspace crates (cortex-core, cortex-gateway, neuron) — so a
future migration to a dedicated GitHub repo is a Cargo.toml-only
change. All deps come from crates.io.
This commit lands:
* `config.rs` — TOML config at $XDG_CONFIG_HOME/helexa-acp/config.toml
with multi-endpoint support (each `[[endpoints]]` declares its
name, base_url, wire_api, default_model, optional API key /
api_key_env). Falls back to env-only single-endpoint config when
no TOML exists (HELEXA_ACP_BASE_URL, HELEXA_ACP_MODEL, etc.). The
`endpoint:model` selector syntax is validated and tested.
* `provider/mod.rs` — `Provider` trait + provider-agnostic types
(`CompletionRequest`, `CompletionEvent`, `Message`, `ToolCall`,
`ToolSpec`, `Role`, `UsageStats`). Agent loop consumes these
without knowing the wire format on the other side.
* `provider/openai_chat.rs` — `OpenAIChatProvider` impl. Compatible
with cortex, LM Studio, Ollama (compat mode), OpenRouter, OpenAI
itself. Streams via reqwest + eventsource-stream + async-stream.
Surfaces text deltas, reasoning deltas (for models that emit
`reasoning_content`), tool-call lifecycle (start, args-delta,
completion), usage, finish reason. Cancellation-token aware.
* `main.rs` — tokio + stderr-only tracing-subscriber + Stdio
transport. Builds a provider per configured endpoint at startup,
surfacing config mistakes before the editor even initializes.
Currently responds to `initialize`; everything else stubs to
`not implemented yet` until the agent loop lands in the next
commit.
12 unit tests pass — encoder shape, decoder shape (text-only,
tool-call progressive, cancellation, malformed-chunk recovery),
config parsing (multi-endpoint TOML, env fallback, validation).
The `#![allow(dead_code)]` on `provider/mod.rs` is temporary — the
agent loop in the next commit reads every field. It's noted in the
module-level docstring so the next reader knows.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
792
Cargo.lock
generated
792
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,7 @@ members = [
|
|||||||
"crates/cortex-gateway",
|
"crates/cortex-gateway",
|
||||||
"crates/cortex-cli",
|
"crates/cortex-cli",
|
||||||
"crates/neuron",
|
"crates/neuron",
|
||||||
|
"crates/helexa-acp",
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
|
|||||||
39
crates/helexa-acp/Cargo.toml
Normal file
39
crates/helexa-acp/Cargo.toml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
[package]
|
||||||
|
name = "helexa-acp"
|
||||||
|
version = "0.1.16"
|
||||||
|
edition = "2024"
|
||||||
|
license = "Apache-2.0"
|
||||||
|
repository = "https://git.lair.cafe/helexa/cortex"
|
||||||
|
description = """
|
||||||
|
Agent Client Protocol bridge for the helexa self-hosted LLM stack.
|
||||||
|
Speaks ACP to ACP-compatible editor clients (Zed, etc.) and forwards
|
||||||
|
the conversation to any OpenAI-compatible HTTP endpoint — defaulting
|
||||||
|
to cortex (helexa's reverse-proxy / fleet gateway).
|
||||||
|
"""
|
||||||
|
|
||||||
|
# This crate is intentionally self-contained — no dependencies on other
|
||||||
|
# workspace crates (cortex-core, cortex-gateway, neuron). The goal is
|
||||||
|
# a painless migration to a dedicated GitHub repo in the future if the
|
||||||
|
# project grows beyond helexa's needs. All deps are crates.io.
|
||||||
|
[dependencies]
|
||||||
|
agent-client-protocol = "0.12"
|
||||||
|
tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync", "io-util", "process", "signal"] }
|
||||||
|
reqwest = { version = "0.12", features = ["json", "stream", "rustls-tls"], default-features = false }
|
||||||
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
serde_json = "1"
|
||||||
|
toml = "0.8"
|
||||||
|
tracing = "0.1"
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
|
anyhow = "1"
|
||||||
|
thiserror = "2"
|
||||||
|
async-trait = "0.1"
|
||||||
|
futures = "0.3"
|
||||||
|
tokio-stream = "0.1"
|
||||||
|
tokio-util = { version = "0.7", features = ["rt"] }
|
||||||
|
eventsource-stream = "0.2"
|
||||||
|
async-stream = "0.3"
|
||||||
|
url = { version = "2", features = ["serde"] }
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "helexa-acp"
|
||||||
|
path = "src/main.rs"
|
||||||
378
crates/helexa-acp/src/config.rs
Normal file
378
crates/helexa-acp/src/config.rs
Normal file
@@ -0,0 +1,378 @@
|
|||||||
|
//! Configuration for the helexa-acp bridge.
|
||||||
|
//!
|
||||||
|
//! Loaded from `$XDG_CONFIG_HOME/helexa-acp/config.toml` (or
|
||||||
|
//! `~/.config/helexa-acp/config.toml` as a fallback). If no config file
|
||||||
|
//! exists, falls back to building a single anonymous endpoint from env
|
||||||
|
//! vars — that keeps "just point at one cortex" frictionless without
|
||||||
|
//! requiring a config file on disk.
|
||||||
|
//!
|
||||||
|
//! The design goal is "the missing ACP binary for users with multiple
|
||||||
|
//! API endpoints (possibly on a private LAN, possibly mixing wire
|
||||||
|
//! types)". Hence: every endpoint is named, has its own wire API, and
|
||||||
|
//! has its own default model. The agent's selected model id can be
|
||||||
|
//! prefixed `endpoint:model` to route across endpoints; a bare
|
||||||
|
//! `model` falls through to the configured `default_endpoint`.
|
||||||
|
//!
|
||||||
|
//! ### Example TOML
|
||||||
|
//!
|
||||||
|
//! ```toml
|
||||||
|
//! default_endpoint = "helexa"
|
||||||
|
//!
|
||||||
|
//! [[endpoints]]
|
||||||
|
//! name = "helexa"
|
||||||
|
//! base_url = "http://hanzalova.internal:31313/v1"
|
||||||
|
//! wire_api = "openai-chat"
|
||||||
|
//! default_model = "helexa/large"
|
||||||
|
//!
|
||||||
|
//! [[endpoints]]
|
||||||
|
//! name = "openrouter"
|
||||||
|
//! base_url = "https://openrouter.ai/api/v1"
|
||||||
|
//! wire_api = "openai-chat"
|
||||||
|
//! api_key_env = "OPENROUTER_API_KEY"
|
||||||
|
//! default_model = "anthropic/claude-opus-4"
|
||||||
|
//!
|
||||||
|
//! [[endpoints]]
|
||||||
|
//! name = "lmstudio"
|
||||||
|
//! base_url = "http://localhost:1234/v1"
|
||||||
|
//! wire_api = "openai-chat"
|
||||||
|
//! default_model = "auto"
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use anyhow::{Context, anyhow};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
const DEFAULT_BASE_URL: &str = "http://hanzalova.internal:31313/v1";
|
||||||
|
const DEFAULT_MODEL: &str = "helexa/large";
|
||||||
|
const DEFAULT_ENDPOINT_NAME: &str = "default";
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
/// Name of the endpoint used when a request doesn't pick one
|
||||||
|
/// explicitly. Must reference an entry in `endpoints`. Defaults to
|
||||||
|
/// the first endpoint declared if unset.
|
||||||
|
#[serde(default)]
|
||||||
|
pub default_endpoint: Option<String>,
|
||||||
|
/// Per-endpoint configuration. At least one entry is required.
|
||||||
|
#[serde(default)]
|
||||||
|
pub endpoints: Vec<EndpointConfig>,
|
||||||
|
/// Optional path to a system-prompt file. When unset, the built-in
|
||||||
|
/// default prompt from `prompt.rs` is used.
|
||||||
|
#[serde(default)]
|
||||||
|
pub system_prompt_path: Option<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct EndpointConfig {
|
||||||
|
/// Short identifier used in `endpoint:model` routing and in logs.
|
||||||
|
pub name: String,
|
||||||
|
/// Base URL of the OpenAI-compatible API. Must include the `/v1`
|
||||||
|
/// (or equivalent) suffix — paths like `chat/completions` and
|
||||||
|
/// `models` are joined onto this.
|
||||||
|
pub base_url: Url,
|
||||||
|
/// Wire protocol the endpoint speaks. Phase 1 supports
|
||||||
|
/// [`WireApi::OpenAiChat`] only; `openai-responses` and
|
||||||
|
/// `anthropic-messages` land later behind their own providers.
|
||||||
|
#[serde(default)]
|
||||||
|
pub wire_api: WireApi,
|
||||||
|
/// Model to use when the client hasn't picked one via
|
||||||
|
/// `session/set_model`.
|
||||||
|
#[serde(default)]
|
||||||
|
pub default_model: Option<String>,
|
||||||
|
/// Static API key to send as `Authorization: Bearer …`. Prefer
|
||||||
|
/// `api_key_env` for anything sensitive — keys in plain TOML are a
|
||||||
|
/// liability.
|
||||||
|
#[serde(default)]
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
/// Env var name to read for the API key. Resolved at startup so a
|
||||||
|
/// missing env var yields a clear error rather than silent
|
||||||
|
/// unauthenticated calls.
|
||||||
|
#[serde(default)]
|
||||||
|
pub api_key_env: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
|
||||||
|
pub enum WireApi {
|
||||||
|
/// `POST {base}/chat/completions` returning OpenAI-format SSE.
|
||||||
|
/// Compatible with cortex, LM Studio, Ollama (compat mode),
|
||||||
|
/// OpenRouter, OpenAI itself.
|
||||||
|
#[default]
|
||||||
|
#[serde(rename = "openai-chat")]
|
||||||
|
OpenAiChat,
|
||||||
|
/// `POST {base}/responses` — OpenAI's newer Responses API. Not
|
||||||
|
/// implemented yet; the variant is reserved so endpoint configs
|
||||||
|
/// can be authored ahead of provider support.
|
||||||
|
#[serde(rename = "openai-responses")]
|
||||||
|
OpenAiResponses,
|
||||||
|
/// `POST {base}/messages` — Anthropic format. Reserved.
|
||||||
|
#[serde(rename = "anthropic-messages")]
|
||||||
|
AnthropicMessages,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EndpointConfig {
|
||||||
|
/// Resolve the API key from `api_key` (literal) or `api_key_env`
|
||||||
|
/// (env-var lookup). Returns `Ok(None)` when neither is set;
|
||||||
|
/// `Err` when `api_key_env` references a missing variable.
|
||||||
|
pub fn resolve_api_key(&self) -> anyhow::Result<Option<String>> {
|
||||||
|
if let Some(literal) = &self.api_key {
|
||||||
|
return Ok(Some(literal.clone()));
|
||||||
|
}
|
||||||
|
if let Some(var) = &self.api_key_env {
|
||||||
|
return Ok(Some(std::env::var(var).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"endpoint '{}' references missing env var {}",
|
||||||
|
self.name, var
|
||||||
|
)
|
||||||
|
})?));
|
||||||
|
}
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `{base_url}/chat/completions`.
|
||||||
|
pub fn chat_completions_url(&self) -> Url {
|
||||||
|
join_segments(&self.base_url, &["chat", "completions"])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `{base_url}/models`.
|
||||||
|
pub fn models_url(&self) -> Url {
|
||||||
|
join_segments(&self.base_url, &["models"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config {
|
||||||
|
/// Load from TOML at the standard config path, or build from env
|
||||||
|
/// vars if no file exists. Env-fallback yields a single endpoint
|
||||||
|
/// named `"default"`.
|
||||||
|
pub fn load() -> anyhow::Result<Self> {
|
||||||
|
let path = config_path();
|
||||||
|
if let Some(path) = &path
|
||||||
|
&& path.exists()
|
||||||
|
{
|
||||||
|
return Self::from_file(path);
|
||||||
|
}
|
||||||
|
Self::from_env()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`,
|
||||||
|
/// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`,
|
||||||
|
/// `HELEXA_ACP_SYSTEM_PROMPT_PATH`.
|
||||||
|
pub fn from_env() -> anyhow::Result<Self> {
|
||||||
|
let base_url = std::env::var("HELEXA_ACP_BASE_URL")
|
||||||
|
.ok()
|
||||||
|
.unwrap_or_else(|| DEFAULT_BASE_URL.into());
|
||||||
|
let base_url = Url::parse(&base_url)
|
||||||
|
.with_context(|| format!("HELEXA_ACP_BASE_URL is not a valid URL ({base_url})"))?;
|
||||||
|
let default_model = std::env::var("HELEXA_ACP_MODEL")
|
||||||
|
.ok()
|
||||||
|
.unwrap_or_else(|| DEFAULT_MODEL.into());
|
||||||
|
let api_key = std::env::var("HELEXA_ACP_API_KEY")
|
||||||
|
.ok()
|
||||||
|
.filter(|s| !s.is_empty());
|
||||||
|
let system_prompt_path = std::env::var("HELEXA_ACP_SYSTEM_PROMPT_PATH")
|
||||||
|
.ok()
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.map(PathBuf::from);
|
||||||
|
Ok(Self {
|
||||||
|
default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()),
|
||||||
|
endpoints: vec![EndpointConfig {
|
||||||
|
name: DEFAULT_ENDPOINT_NAME.into(),
|
||||||
|
base_url,
|
||||||
|
wire_api: WireApi::OpenAiChat,
|
||||||
|
default_model: Some(default_model),
|
||||||
|
api_key,
|
||||||
|
api_key_env: None,
|
||||||
|
}],
|
||||||
|
system_prompt_path,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_file(path: &Path) -> anyhow::Result<Self> {
|
||||||
|
let text = std::fs::read_to_string(path)
|
||||||
|
.with_context(|| format!("read config {}", path.display()))?;
|
||||||
|
let mut cfg: Self =
|
||||||
|
toml::from_str(&text).with_context(|| format!("parse config {}", path.display()))?;
|
||||||
|
cfg.validate()?;
|
||||||
|
Ok(cfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate(&mut self) -> anyhow::Result<()> {
|
||||||
|
if self.endpoints.is_empty() {
|
||||||
|
return Err(anyhow!("config has no [[endpoints]] entries"));
|
||||||
|
}
|
||||||
|
for (i, ep) in self.endpoints.iter().enumerate() {
|
||||||
|
if ep.name.is_empty() {
|
||||||
|
return Err(anyhow!("endpoints[{i}] has empty name"));
|
||||||
|
}
|
||||||
|
if ep.name.contains(':') {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"endpoints[{i}].name '{}' contains ':' which would clash \
|
||||||
|
with the endpoint:model selector syntax",
|
||||||
|
ep.name
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Pick a default endpoint if none was named.
|
||||||
|
if self.default_endpoint.is_none() {
|
||||||
|
self.default_endpoint = Some(self.endpoints[0].name.clone());
|
||||||
|
}
|
||||||
|
let default_name = self.default_endpoint.as_deref().unwrap();
|
||||||
|
if !self.endpoints.iter().any(|e| e.name == default_name) {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"default_endpoint '{default_name}' is not declared in [[endpoints]]"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look up an endpoint by name. Returns `None` if not configured.
|
||||||
|
pub fn endpoint(&self, name: &str) -> Option<&EndpointConfig> {
|
||||||
|
self.endpoints.iter().find(|e| e.name == name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The default endpoint (guaranteed to exist after `validate`).
|
||||||
|
pub fn default_endpoint(&self) -> &EndpointConfig {
|
||||||
|
let name = self
|
||||||
|
.default_endpoint
|
||||||
|
.as_deref()
|
||||||
|
.expect("default_endpoint set by validate");
|
||||||
|
self.endpoint(name)
|
||||||
|
.expect("default_endpoint resolves after validate")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse an ACP-side `model` field into (endpoint name, raw model id).
|
||||||
|
///
|
||||||
|
/// `helexa:helexa/large` → (`Some("helexa")`, `"helexa/large"`).
|
||||||
|
/// `helexa/large` → (`None`, `"helexa/large"`).
|
||||||
|
///
|
||||||
|
/// The split happens at the FIRST colon. Model ids commonly contain
|
||||||
|
/// `/` (HuggingFace style) but rarely `:`; if a model id ever does, the
|
||||||
|
/// user can quote-prefix with the default endpoint name.
|
||||||
|
pub fn parse_model_selector(input: &str) -> (Option<&str>, &str) {
|
||||||
|
match input.split_once(':') {
|
||||||
|
Some((endpoint, model)) if !endpoint.is_empty() && !model.is_empty() => {
|
||||||
|
(Some(endpoint), model)
|
||||||
|
}
|
||||||
|
_ => (None, input),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn config_path() -> Option<PathBuf> {
|
||||||
|
if let Ok(override_path) = std::env::var("HELEXA_ACP_CONFIG_PATH") {
|
||||||
|
return Some(PathBuf::from(override_path));
|
||||||
|
}
|
||||||
|
let xdg = std::env::var("XDG_CONFIG_HOME")
|
||||||
|
.ok()
|
||||||
|
.filter(|s| !s.is_empty());
|
||||||
|
let base = xdg.map(PathBuf::from).or_else(|| {
|
||||||
|
std::env::var("HOME")
|
||||||
|
.ok()
|
||||||
|
.map(|h| PathBuf::from(h).join(".config"))
|
||||||
|
})?;
|
||||||
|
Some(base.join("helexa-acp").join("config.toml"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn join_segments(base: &Url, segments: &[&str]) -> Url {
|
||||||
|
let mut out = base.clone();
|
||||||
|
if let Ok(mut path) = out.path_segments_mut() {
|
||||||
|
path.pop_if_empty().extend(segments.iter().copied());
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn url_join_handles_trailing_slash() {
|
||||||
|
let ep = EndpointConfig {
|
||||||
|
name: "x".into(),
|
||||||
|
base_url: Url::parse("http://h.internal:31313/v1").unwrap(),
|
||||||
|
wire_api: WireApi::OpenAiChat,
|
||||||
|
default_model: None,
|
||||||
|
api_key: None,
|
||||||
|
api_key_env: None,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
ep.chat_completions_url().as_str(),
|
||||||
|
"http://h.internal:31313/v1/chat/completions"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
ep.models_url().as_str(),
|
||||||
|
"http://h.internal:31313/v1/models"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_model_selector() {
|
||||||
|
assert_eq!(
|
||||||
|
parse_model_selector("helexa:helexa/large"),
|
||||||
|
(Some("helexa"), "helexa/large")
|
||||||
|
);
|
||||||
|
assert_eq!(parse_model_selector("helexa/large"), (None, "helexa/large"));
|
||||||
|
assert_eq!(parse_model_selector("gpt-5"), (None, "gpt-5"));
|
||||||
|
// Edge case: a leading colon → no endpoint.
|
||||||
|
assert_eq!(parse_model_selector(":gpt-5"), (None, ":gpt-5"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn env_fallback_builds_single_endpoint() {
|
||||||
|
// Don't actually set env vars (would race with other tests);
|
||||||
|
// just confirm the default path constructs cleanly.
|
||||||
|
unsafe {
|
||||||
|
std::env::remove_var("HELEXA_ACP_BASE_URL");
|
||||||
|
std::env::remove_var("HELEXA_ACP_MODEL");
|
||||||
|
std::env::remove_var("HELEXA_ACP_API_KEY");
|
||||||
|
}
|
||||||
|
let cfg = Config::from_env().unwrap();
|
||||||
|
assert_eq!(cfg.endpoints.len(), 1);
|
||||||
|
assert_eq!(cfg.endpoints[0].name, "default");
|
||||||
|
assert_eq!(cfg.endpoints[0].base_url.as_str(), DEFAULT_BASE_URL);
|
||||||
|
assert_eq!(
|
||||||
|
cfg.endpoints[0].default_model.as_deref(),
|
||||||
|
Some(DEFAULT_MODEL)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn toml_parses_multi_endpoint() {
|
||||||
|
let toml_text = r#"
|
||||||
|
default_endpoint = "helexa"
|
||||||
|
|
||||||
|
[[endpoints]]
|
||||||
|
name = "helexa"
|
||||||
|
base_url = "http://hanzalova.internal:31313/v1"
|
||||||
|
default_model = "helexa/large"
|
||||||
|
|
||||||
|
[[endpoints]]
|
||||||
|
name = "openrouter"
|
||||||
|
base_url = "https://openrouter.ai/api/v1"
|
||||||
|
wire_api = "openai-chat"
|
||||||
|
api_key_env = "OPENROUTER_API_KEY"
|
||||||
|
default_model = "anthropic/claude-opus-4"
|
||||||
|
"#;
|
||||||
|
let mut cfg: Config = toml::from_str(toml_text).unwrap();
|
||||||
|
cfg.validate().unwrap();
|
||||||
|
assert_eq!(cfg.endpoints.len(), 2);
|
||||||
|
assert_eq!(cfg.default_endpoint().name, "helexa");
|
||||||
|
assert_eq!(cfg.endpoints[0].wire_api, WireApi::OpenAiChat);
|
||||||
|
assert_eq!(
|
||||||
|
cfg.endpoints[1].api_key_env.as_deref(),
|
||||||
|
Some("OPENROUTER_API_KEY")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_rejects_colon_in_endpoint_name() {
|
||||||
|
let toml_text = r#"
|
||||||
|
[[endpoints]]
|
||||||
|
name = "bad:name"
|
||||||
|
base_url = "http://x/v1"
|
||||||
|
"#;
|
||||||
|
let mut cfg: Config = toml::from_str(toml_text).unwrap();
|
||||||
|
let err = cfg.validate().unwrap_err();
|
||||||
|
assert!(format!("{err}").contains("clash"));
|
||||||
|
}
|
||||||
|
}
|
||||||
121
crates/helexa-acp/src/main.rs
Normal file
121
crates/helexa-acp/src/main.rs
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
//! helexa-acp — Agent Client Protocol bridge for multi-endpoint LLM
|
||||||
|
//! setups (helexa, LM Studio, Ollama, OpenRouter, OpenAI, Anthropic,
|
||||||
|
//! …) with a clean per-endpoint wire-format selector.
|
||||||
|
//!
|
||||||
|
//! Speaks ACP over stdio to an editor client (Zed today). The
|
||||||
|
//! conversation is forwarded to one of the configured endpoints via
|
||||||
|
//! a wire-format-specific [`provider::Provider`] implementation.
|
||||||
|
//! The agent loop itself is provider-agnostic — adding e.g. an
|
||||||
|
//! Anthropic /v1/messages provider doesn't touch `agent.rs`.
|
||||||
|
//!
|
||||||
|
//! Config: `$XDG_CONFIG_HOME/helexa-acp/config.toml` for the multi-
|
||||||
|
//! endpoint case; env vars (`HELEXA_ACP_BASE_URL`, etc.) for the
|
||||||
|
//! single-endpoint case when no config file exists.
|
||||||
|
|
||||||
|
use agent_client_protocol::schema::{AgentCapabilities, InitializeRequest, InitializeResponse};
|
||||||
|
use agent_client_protocol::{Agent, Client, ConnectionTo, Dispatch, Result, Stdio};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
mod config;
|
||||||
|
mod provider;
|
||||||
|
|
||||||
|
use config::{Config, EndpointConfig, WireApi};
|
||||||
|
use provider::{Provider, openai_chat::OpenAIChatProvider};
|
||||||
|
|
||||||
|
/// Build a provider for `endpoint` according to its declared
|
||||||
|
/// `wire_api`. Future wire types (OpenAI Responses, Anthropic
|
||||||
|
/// /v1/messages, Ollama native) slot in here without changing the
|
||||||
|
/// caller.
|
||||||
|
fn build_provider(endpoint: EndpointConfig) -> anyhow::Result<Arc<dyn Provider>> {
|
||||||
|
match endpoint.wire_api {
|
||||||
|
WireApi::OpenAiChat => Ok(Arc::new(OpenAIChatProvider::new(endpoint)?)),
|
||||||
|
WireApi::OpenAiResponses => Err(anyhow::anyhow!(
|
||||||
|
"endpoint '{}' wire_api 'openai-responses' is reserved for a future provider; \
|
||||||
|
use 'openai-chat' for now or wait for the OpenAIResponsesProvider impl",
|
||||||
|
endpoint.name
|
||||||
|
)),
|
||||||
|
WireApi::AnthropicMessages => Err(anyhow::anyhow!(
|
||||||
|
"endpoint '{}' wire_api 'anthropic-messages' is reserved for a future provider",
|
||||||
|
endpoint.name
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
// Logs go to stderr — stdout is reserved for the JSON-RPC stream.
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_writer(std::io::stderr)
|
||||||
|
.with_env_filter(
|
||||||
|
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||||
|
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
|
||||||
|
)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let cfg = Config::load()
|
||||||
|
.map_err(|e| agent_client_protocol::util::internal_error(format!("config: {e:#}")))?;
|
||||||
|
tracing::info!(
|
||||||
|
endpoints = cfg.endpoints.len(),
|
||||||
|
default_endpoint = %cfg.default_endpoint().name,
|
||||||
|
default_model = ?cfg.default_endpoint().default_model,
|
||||||
|
"helexa-acp starting"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Build a provider for each configured endpoint up-front. Cheap —
|
||||||
|
// just sets up a reqwest::Client and resolves the API key — and
|
||||||
|
// surfaces config mistakes (missing API key env var, unsupported
|
||||||
|
// wire_api) before the editor even sends an initialize request.
|
||||||
|
let mut providers: Vec<Arc<dyn Provider>> = Vec::with_capacity(cfg.endpoints.len());
|
||||||
|
for endpoint in &cfg.endpoints {
|
||||||
|
match build_provider(endpoint.clone()) {
|
||||||
|
Ok(p) => {
|
||||||
|
tracing::info!(
|
||||||
|
endpoint = %endpoint.name,
|
||||||
|
base_url = %endpoint.base_url,
|
||||||
|
wire_api = ?endpoint.wire_api,
|
||||||
|
"registered provider"
|
||||||
|
);
|
||||||
|
providers.push(p);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
endpoint = %endpoint.name,
|
||||||
|
error = %format!("{e:#}"),
|
||||||
|
"skipping endpoint with invalid config"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if providers.is_empty() {
|
||||||
|
return Err(agent_client_protocol::util::internal_error(
|
||||||
|
"no usable endpoints — check config",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Agent
|
||||||
|
.builder()
|
||||||
|
.name("helexa-acp")
|
||||||
|
.on_receive_request(
|
||||||
|
async move |initialize: InitializeRequest, responder, _connection| {
|
||||||
|
// Phase 1 wiring — capabilities only. Real session
|
||||||
|
// handling lands in the next iteration (agent.rs).
|
||||||
|
responder.respond(
|
||||||
|
InitializeResponse::new(initialize.protocol_version)
|
||||||
|
.agent_capabilities(AgentCapabilities::new()),
|
||||||
|
)
|
||||||
|
},
|
||||||
|
agent_client_protocol::on_receive_request!(),
|
||||||
|
)
|
||||||
|
.on_receive_dispatch(
|
||||||
|
async move |message: Dispatch, cx: ConnectionTo<Client>| {
|
||||||
|
tracing::warn!(method = ?message.method(), "unhandled ACP message");
|
||||||
|
message.respond_with_error(
|
||||||
|
agent_client_protocol::util::internal_error("not implemented yet"),
|
||||||
|
cx,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
agent_client_protocol::on_receive_dispatch!(),
|
||||||
|
)
|
||||||
|
.connect_to(Stdio::new())
|
||||||
|
.await
|
||||||
|
}
|
||||||
162
crates/helexa-acp/src/provider/mod.rs
Normal file
162
crates/helexa-acp/src/provider/mod.rs
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
//! Provider trait — the seam between the ACP-side agent loop and
|
||||||
|
//! whatever wire protocol an endpoint actually speaks.
|
||||||
|
//!
|
||||||
|
//! Every concrete provider (OpenAI chat completions, OpenAI Responses,
|
||||||
|
//! Anthropic /v1/messages, Ollama native, …) implements
|
||||||
|
//! [`Provider`]. The agent constructs a [`CompletionRequest`] using
|
||||||
|
//! provider-agnostic types and consumes a stream of
|
||||||
|
//! [`CompletionEvent`]s — neither end knows which wire format is on
|
||||||
|
//! the other side of the trait.
|
||||||
|
//!
|
||||||
|
//! Day-1 provider: [`openai_chat::OpenAIChatProvider`]. Day-N
|
||||||
|
//! providers slot in without touching `agent.rs`.
|
||||||
|
|
||||||
|
// Many fields and variants in the public surface here aren't read yet:
|
||||||
|
// the agent loop that consumes `CompletionEvent`s and constructs
|
||||||
|
// `CompletionRequest`s lands in the next commit. They're not
|
||||||
|
// speculative — the unit tests in `provider::openai_chat::tests`
|
||||||
|
// already verify the encoder/decoder produces them. Once `agent.rs`
|
||||||
|
// arrives this allow comes off.
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use futures::stream::BoxStream;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::Value;
|
||||||
|
use tokio_util::sync::CancellationToken;
|
||||||
|
|
||||||
|
pub mod openai_chat;
|
||||||
|
|
||||||
|
/// Provider-agnostic LLM endpoint. Implementations translate between
|
||||||
|
/// [`CompletionRequest`] / [`CompletionEvent`] and whatever wire
|
||||||
|
/// format their endpoint speaks.
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Provider: Send + Sync {
|
||||||
|
/// Endpoint name as configured by the user (e.g. `"helexa"`,
|
||||||
|
/// `"openrouter"`). Used in logs and in the `endpoint:model`
|
||||||
|
/// selector.
|
||||||
|
fn name(&self) -> &str;
|
||||||
|
|
||||||
|
/// List models available at this endpoint. Used to build the
|
||||||
|
/// model-picker dropdown in editor clients. Should return quickly
|
||||||
|
/// (cache if necessary).
|
||||||
|
async fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>>;
|
||||||
|
|
||||||
|
/// Run a chat completion. Returns a stream of provider-agnostic
|
||||||
|
/// events. The stream stops when the upstream finishes, when
|
||||||
|
/// `cancel` is fired, or when the stream is dropped.
|
||||||
|
async fn complete(
|
||||||
|
&self,
|
||||||
|
request: CompletionRequest,
|
||||||
|
cancel: CancellationToken,
|
||||||
|
) -> anyhow::Result<BoxStream<'static, anyhow::Result<CompletionEvent>>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One model exposed by a provider.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ModelInfo {
|
||||||
|
pub id: String,
|
||||||
|
/// Human-friendly name, if the endpoint exposes one. Otherwise
|
||||||
|
/// `id` is used as the display name.
|
||||||
|
#[serde(default)]
|
||||||
|
pub display_name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inputs to a completion. Provider-agnostic — concrete providers
|
||||||
|
/// translate this into their wire format.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CompletionRequest {
|
||||||
|
/// Endpoint-local model id (without the `endpoint:` prefix).
|
||||||
|
pub model: String,
|
||||||
|
pub messages: Vec<Message>,
|
||||||
|
/// Tools the model is allowed to call. Empty list means no tool
|
||||||
|
/// support advertised.
|
||||||
|
pub tools: Vec<ToolSpec>,
|
||||||
|
pub temperature: Option<f64>,
|
||||||
|
pub top_p: Option<f64>,
|
||||||
|
pub max_tokens: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Message {
|
||||||
|
pub role: Role,
|
||||||
|
pub content: MessageContent,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum Role {
|
||||||
|
System,
|
||||||
|
User,
|
||||||
|
Assistant,
|
||||||
|
/// Tool result message. Provider impls turn this into whatever
|
||||||
|
/// shape the upstream wire format wants (OpenAI uses
|
||||||
|
/// `role: "tool"` + `tool_call_id`; Anthropic uses content blocks).
|
||||||
|
Tool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum MessageContent {
|
||||||
|
Text(String),
|
||||||
|
/// Assistant turn that called one or more tools.
|
||||||
|
ToolCalls {
|
||||||
|
/// Optional text the assistant said alongside the tool calls.
|
||||||
|
text: Option<String>,
|
||||||
|
calls: Vec<ToolCall>,
|
||||||
|
},
|
||||||
|
/// Tool result. `tool_call_id` matches the assistant's call id.
|
||||||
|
ToolResult {
|
||||||
|
tool_call_id: String,
|
||||||
|
content: String,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ToolCall {
|
||||||
|
/// Provider-assigned id that ties the call to its result.
|
||||||
|
pub id: String,
|
||||||
|
pub name: String,
|
||||||
|
/// JSON-encoded arguments. Kept as a string because providers
|
||||||
|
/// stream argument bytes incrementally and only validate at the
|
||||||
|
/// end; the agent decodes once the call is complete.
|
||||||
|
pub arguments: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ToolSpec {
|
||||||
|
pub name: String,
|
||||||
|
pub description: String,
|
||||||
|
/// JSON Schema of the arguments object.
|
||||||
|
pub parameters: Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Events emitted by a provider during a streaming completion.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum CompletionEvent {
|
||||||
|
/// Incremental visible text from the assistant.
|
||||||
|
TextDelta(String),
|
||||||
|
/// Incremental "reasoning" / thought text, if the model emits one
|
||||||
|
/// (e.g. Qwen3 with `<think>` tags surfaced as a separate stream,
|
||||||
|
/// or OpenAI reasoning models).
|
||||||
|
ReasoningDelta(String),
|
||||||
|
/// A new tool call has started.
|
||||||
|
ToolCallStart {
|
||||||
|
index: usize,
|
||||||
|
id: String,
|
||||||
|
name: String,
|
||||||
|
},
|
||||||
|
/// More argument bytes for a tool call already announced via
|
||||||
|
/// [`Self::ToolCallStart`].
|
||||||
|
ToolCallArgsDelta { index: usize, args_delta: String },
|
||||||
|
/// Stream finished. Carries the upstream `finish_reason` if it
|
||||||
|
/// gave one (`"stop"`, `"length"`, `"tool_calls"`, …).
|
||||||
|
Finish { reason: Option<String> },
|
||||||
|
/// Final usage stats, if the provider supplied them.
|
||||||
|
Usage(UsageStats),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Default)]
|
||||||
|
pub struct UsageStats {
|
||||||
|
pub prompt_tokens: u64,
|
||||||
|
pub completion_tokens: u64,
|
||||||
|
pub total_tokens: u64,
|
||||||
|
}
|
||||||
645
crates/helexa-acp/src/provider/openai_chat.rs
Normal file
645
crates/helexa-acp/src/provider/openai_chat.rs
Normal file
@@ -0,0 +1,645 @@
|
|||||||
|
//! OpenAI `/v1/chat/completions` provider.
|
||||||
|
//!
|
||||||
|
//! Covers cortex, LM Studio, Ollama (compat mode), OpenRouter, and
|
||||||
|
//! OpenAI itself. The wire format is well-documented and stable;
|
||||||
|
//! tool calls follow the `tools` request param + `tool_calls`
|
||||||
|
//! response delta convention shared by every reasonably-modern
|
||||||
|
//! OpenAI-compatible server.
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use eventsource_stream::Eventsource;
|
||||||
|
use futures::{Stream, StreamExt, stream::BoxStream};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::{Value, json};
|
||||||
|
use tokio_util::sync::CancellationToken;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
CompletionEvent, CompletionRequest, Message, MessageContent, ModelInfo, Provider, Role,
|
||||||
|
ToolCall, ToolSpec, UsageStats,
|
||||||
|
};
|
||||||
|
use crate::config::EndpointConfig;
|
||||||
|
|
||||||
|
// Several fields and types in this module are only used through the
|
||||||
|
// async HTTP path in `complete()` and `list_models()`. Tests don't
|
||||||
|
// stand up a mock HTTP server (we'd be over-engineering for the
|
||||||
|
// payoff), so clippy's dead-code pass under `--tests` flags them.
|
||||||
|
// Each `allow(dead_code)` below names exactly what's exercised only
|
||||||
|
// at runtime, with a one-line rationale so the next reader can tell
|
||||||
|
// it's intentional.
|
||||||
|
pub struct OpenAIChatProvider {
|
||||||
|
endpoint: EndpointConfig,
|
||||||
|
/// Read by `list_models` and `complete` (bearer auth header).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
api_key: Option<String>,
|
||||||
|
/// Read by `list_models` and `complete` (request builder).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
http: reqwest::Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenAIChatProvider {
|
||||||
|
pub fn new(endpoint: EndpointConfig) -> anyhow::Result<Self> {
|
||||||
|
let api_key = endpoint.resolve_api_key()?;
|
||||||
|
let http = reqwest::Client::builder()
|
||||||
|
// Generous timeout: cortex may need to cold-load a model
|
||||||
|
// before serving the first chunk, which can be tens of
|
||||||
|
// seconds. We rely on cancellation for early termination,
|
||||||
|
// not on timeout.
|
||||||
|
.timeout(std::time::Duration::from_secs(600))
|
||||||
|
.build()?;
|
||||||
|
Ok(Self {
|
||||||
|
endpoint,
|
||||||
|
api_key,
|
||||||
|
http,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Provider for OpenAIChatProvider {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
&self.endpoint.name
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
|
||||||
|
let mut req = self.http.get(self.endpoint.models_url());
|
||||||
|
if let Some(key) = &self.api_key {
|
||||||
|
req = req.bearer_auth(key);
|
||||||
|
}
|
||||||
|
let resp = req
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| anyhow::anyhow!("{} list_models: {e}", self.endpoint.name))?;
|
||||||
|
let status = resp.status();
|
||||||
|
if !status.is_success() {
|
||||||
|
let body = resp.text().await.unwrap_or_default();
|
||||||
|
anyhow::bail!(
|
||||||
|
"{} list_models returned {}: {}",
|
||||||
|
self.endpoint.name,
|
||||||
|
status,
|
||||||
|
body
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let body: WireModelsResponse = resp.json().await?;
|
||||||
|
Ok(body
|
||||||
|
.data
|
||||||
|
.into_iter()
|
||||||
|
.map(|m| ModelInfo {
|
||||||
|
id: m.id,
|
||||||
|
display_name: None,
|
||||||
|
})
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn complete(
|
||||||
|
&self,
|
||||||
|
request: CompletionRequest,
|
||||||
|
cancel: CancellationToken,
|
||||||
|
) -> anyhow::Result<BoxStream<'static, anyhow::Result<CompletionEvent>>> {
|
||||||
|
let body = encode_request(&request);
|
||||||
|
let mut req = self
|
||||||
|
.http
|
||||||
|
.post(self.endpoint.chat_completions_url())
|
||||||
|
.json(&body);
|
||||||
|
if let Some(key) = &self.api_key {
|
||||||
|
req = req.bearer_auth(key);
|
||||||
|
}
|
||||||
|
let resp = req
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| anyhow::anyhow!("{} chat_completion send: {e}", self.endpoint.name))?;
|
||||||
|
let status = resp.status();
|
||||||
|
if !status.is_success() {
|
||||||
|
let body = resp.text().await.unwrap_or_default();
|
||||||
|
anyhow::bail!(
|
||||||
|
"{} chat_completion returned {}: {}",
|
||||||
|
self.endpoint.name,
|
||||||
|
status,
|
||||||
|
body
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let sse = resp.bytes_stream().eventsource();
|
||||||
|
let stream = decode_stream(sse, cancel);
|
||||||
|
Ok(Box::pin(stream))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use futures::stream;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
fn ep() -> EndpointConfig {
|
||||||
|
EndpointConfig {
|
||||||
|
name: "test".into(),
|
||||||
|
base_url: Url::parse("http://localhost:9999/v1").unwrap(),
|
||||||
|
wire_api: crate::config::WireApi::OpenAiChat,
|
||||||
|
default_model: None,
|
||||||
|
api_key: None,
|
||||||
|
api_key_env: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encodes_text_only_request() {
|
||||||
|
let req = CompletionRequest {
|
||||||
|
model: "helexa/large".into(),
|
||||||
|
messages: vec![
|
||||||
|
Message {
|
||||||
|
role: Role::System,
|
||||||
|
content: MessageContent::Text("you are helpful".into()),
|
||||||
|
},
|
||||||
|
Message {
|
||||||
|
role: Role::User,
|
||||||
|
content: MessageContent::Text("hi".into()),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tools: vec![],
|
||||||
|
temperature: Some(0.7),
|
||||||
|
top_p: None,
|
||||||
|
max_tokens: Some(256),
|
||||||
|
};
|
||||||
|
let body = encode_request(&req);
|
||||||
|
assert_eq!(body["model"], "helexa/large");
|
||||||
|
assert_eq!(body["stream"], true);
|
||||||
|
assert_eq!(body["temperature"], 0.7);
|
||||||
|
assert_eq!(body["max_tokens"], 256);
|
||||||
|
assert!(body.get("top_p").is_none(), "absent options are omitted");
|
||||||
|
let messages = body["messages"].as_array().unwrap();
|
||||||
|
assert_eq!(messages.len(), 2);
|
||||||
|
assert_eq!(messages[0]["role"], "system");
|
||||||
|
assert_eq!(messages[1]["role"], "user");
|
||||||
|
assert_eq!(messages[1]["content"], "hi");
|
||||||
|
assert!(body.get("tools").is_none(), "empty tools omitted");
|
||||||
|
assert_eq!(body["stream_options"]["include_usage"], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encodes_tool_call_round_trip() {
|
||||||
|
let req = CompletionRequest {
|
||||||
|
model: "x".into(),
|
||||||
|
messages: vec![
|
||||||
|
Message {
|
||||||
|
role: Role::Assistant,
|
||||||
|
content: MessageContent::ToolCalls {
|
||||||
|
text: Some("calling read_file".into()),
|
||||||
|
calls: vec![ToolCall {
|
||||||
|
id: "call_1".into(),
|
||||||
|
name: "read_file".into(),
|
||||||
|
arguments: "{\"path\":\"/tmp/a.txt\"}".into(),
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Message {
|
||||||
|
role: Role::Tool,
|
||||||
|
content: MessageContent::ToolResult {
|
||||||
|
tool_call_id: "call_1".into(),
|
||||||
|
content: "file contents".into(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tools: vec![ToolSpec {
|
||||||
|
name: "read_file".into(),
|
||||||
|
description: "Read a file".into(),
|
||||||
|
parameters: json!({"type": "object", "properties": {"path": {"type": "string"}}}),
|
||||||
|
}],
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
max_tokens: None,
|
||||||
|
};
|
||||||
|
let body = encode_request(&req);
|
||||||
|
// Tool defs flow through:
|
||||||
|
let tools = body["tools"].as_array().unwrap();
|
||||||
|
assert_eq!(tools[0]["function"]["name"], "read_file");
|
||||||
|
// Assistant tool_calls flow through:
|
||||||
|
let asst = &body["messages"][0];
|
||||||
|
assert_eq!(asst["role"], "assistant");
|
||||||
|
assert_eq!(asst["tool_calls"][0]["id"], "call_1");
|
||||||
|
assert_eq!(asst["tool_calls"][0]["function"]["name"], "read_file");
|
||||||
|
// Tool result flows through:
|
||||||
|
let tool = &body["messages"][1];
|
||||||
|
assert_eq!(tool["role"], "tool");
|
||||||
|
assert_eq!(tool["tool_call_id"], "call_1");
|
||||||
|
assert_eq!(tool["content"], "file contents");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a fake eventsource stream from canned SSE `data:` lines.
|
||||||
|
fn fake_sse(
|
||||||
|
lines: Vec<&'static str>,
|
||||||
|
) -> impl Stream<
|
||||||
|
Item = std::result::Result<
|
||||||
|
eventsource_stream::Event,
|
||||||
|
eventsource_stream::EventStreamError<reqwest::Error>,
|
||||||
|
>,
|
||||||
|
> {
|
||||||
|
stream::iter(lines.into_iter().map(|data| {
|
||||||
|
Ok(eventsource_stream::Event {
|
||||||
|
event: "message".into(),
|
||||||
|
data: data.into(),
|
||||||
|
id: String::new(),
|
||||||
|
retry: None,
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn decodes_text_then_finish() {
|
||||||
|
let sse = fake_sse(vec![
|
||||||
|
r#"{"choices":[{"delta":{"content":"hel"},"finish_reason":null}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{"content":"lo"},"finish_reason":null}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
|
||||||
|
r#"{"choices":[],"usage":{"prompt_tokens":5,"completion_tokens":2,"total_tokens":7}}"#,
|
||||||
|
"[DONE]",
|
||||||
|
]);
|
||||||
|
let stream = decode_stream(sse, CancellationToken::new());
|
||||||
|
let events: Vec<_> = stream.collect().await;
|
||||||
|
let events: Vec<_> = events.into_iter().map(|r| r.unwrap()).collect();
|
||||||
|
|
||||||
|
assert!(matches!(&events[0], CompletionEvent::TextDelta(s) if s == "hel"));
|
||||||
|
assert!(matches!(&events[1], CompletionEvent::TextDelta(s) if s == "lo"));
|
||||||
|
assert!(
|
||||||
|
matches!(&events[2], CompletionEvent::Finish { reason } if reason.as_deref() == Some("stop"))
|
||||||
|
);
|
||||||
|
assert!(matches!(&events[3], CompletionEvent::Usage(u) if u.total_tokens == 7));
|
||||||
|
assert_eq!(events.len(), 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn decodes_tool_call_progressively() {
|
||||||
|
let sse = fake_sse(vec![
|
||||||
|
r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"c1","function":{"name":"read_file"}}]}}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"pa"}}]}}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"th\":\"/tmp/a\"}"}}]}}]}"#,
|
||||||
|
r#"{"choices":[{"delta":{},"finish_reason":"tool_calls"}]}"#,
|
||||||
|
"[DONE]",
|
||||||
|
]);
|
||||||
|
let events: Vec<_> = decode_stream(sse, CancellationToken::new())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| r.unwrap())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
&events[0],
|
||||||
|
CompletionEvent::ToolCallStart { index: 0, id, name }
|
||||||
|
if id == "c1" && name == "read_file"
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
&events[1],
|
||||||
|
CompletionEvent::ToolCallArgsDelta { index: 0, args_delta }
|
||||||
|
if args_delta == "{\"pa"
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
&events[2],
|
||||||
|
CompletionEvent::ToolCallArgsDelta { index: 0, args_delta }
|
||||||
|
if args_delta == "th\":\"/tmp/a\"}"
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
&events[3],
|
||||||
|
CompletionEvent::Finish { reason } if reason.as_deref() == Some("tool_calls")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn cancellation_ends_stream() {
|
||||||
|
let sse = fake_sse(vec![
|
||||||
|
r#"{"choices":[{"delta":{"content":"hello"}}]}"#,
|
||||||
|
// These chunks should NOT be consumed once we cancel.
|
||||||
|
r#"{"choices":[{"delta":{"content":" world"}}]}"#,
|
||||||
|
]);
|
||||||
|
let cancel = CancellationToken::new();
|
||||||
|
cancel.cancel(); // pre-cancel so the first select! arm wins.
|
||||||
|
let events: Vec<_> = decode_stream(sse, cancel).collect().await;
|
||||||
|
assert!(events.is_empty(), "cancelled stream yields nothing");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn skips_malformed_chunks() {
|
||||||
|
let sse = fake_sse(vec![
|
||||||
|
r#"{"choices":[{"delta":{"content":"before"}}]}"#,
|
||||||
|
r#"not valid json"#,
|
||||||
|
r#"{"choices":[{"delta":{"content":"after"}}]}"#,
|
||||||
|
"[DONE]",
|
||||||
|
]);
|
||||||
|
let events: Vec<_> = decode_stream(sse, CancellationToken::new())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| r.unwrap())
|
||||||
|
.collect();
|
||||||
|
// The bad chunk is skipped with a warn; the bracketing
|
||||||
|
// chunks both come through.
|
||||||
|
assert!(matches!(&events[0], CompletionEvent::TextDelta(s) if s == "before"));
|
||||||
|
assert!(matches!(&events[1], CompletionEvent::TextDelta(s) if s == "after"));
|
||||||
|
assert_eq!(events.len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn provider_construction_is_cheap() {
|
||||||
|
// Ensures construction doesn't accidentally make any HTTP calls
|
||||||
|
// — important because helexa-acp builds a provider per
|
||||||
|
// configured endpoint at startup, before the editor has
|
||||||
|
// necessarily connected.
|
||||||
|
let p = OpenAIChatProvider::new(ep()).expect("construction");
|
||||||
|
assert_eq!(p.name(), "test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Request encoding ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn encode_request(req: &CompletionRequest) -> Value {
|
||||||
|
let messages: Vec<Value> = req.messages.iter().map(encode_message).collect();
|
||||||
|
let mut body = json!({
|
||||||
|
"model": req.model,
|
||||||
|
"messages": messages,
|
||||||
|
"stream": true,
|
||||||
|
});
|
||||||
|
if let Value::Object(map) = &mut body {
|
||||||
|
if let Some(t) = req.temperature {
|
||||||
|
map.insert("temperature".into(), json!(t));
|
||||||
|
}
|
||||||
|
if let Some(p) = req.top_p {
|
||||||
|
map.insert("top_p".into(), json!(p));
|
||||||
|
}
|
||||||
|
if let Some(m) = req.max_tokens {
|
||||||
|
map.insert("max_tokens".into(), json!(m));
|
||||||
|
}
|
||||||
|
if !req.tools.is_empty() {
|
||||||
|
map.insert("tools".into(), encode_tools(&req.tools));
|
||||||
|
}
|
||||||
|
// Some servers (cortex via neuron, OpenAI) report usage at the
|
||||||
|
// end of the stream only when explicitly requested.
|
||||||
|
map.insert("stream_options".into(), json!({ "include_usage": true }));
|
||||||
|
}
|
||||||
|
body
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_message(m: &Message) -> Value {
|
||||||
|
match (m.role, &m.content) {
|
||||||
|
(Role::System, MessageContent::Text(s)) => json!({"role": "system", "content": s}),
|
||||||
|
(Role::User, MessageContent::Text(s)) => json!({"role": "user", "content": s}),
|
||||||
|
(Role::Assistant, MessageContent::Text(s)) => json!({"role": "assistant", "content": s}),
|
||||||
|
(Role::Assistant, MessageContent::ToolCalls { text, calls }) => {
|
||||||
|
let calls_json: Vec<Value> = calls
|
||||||
|
.iter()
|
||||||
|
.map(|c| {
|
||||||
|
json!({
|
||||||
|
"id": c.id,
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": c.name,
|
||||||
|
"arguments": c.arguments,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
json!({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": text.clone().unwrap_or_default(),
|
||||||
|
"tool_calls": calls_json,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
(
|
||||||
|
Role::Tool,
|
||||||
|
MessageContent::ToolResult {
|
||||||
|
tool_call_id,
|
||||||
|
content,
|
||||||
|
},
|
||||||
|
) => json!({
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": tool_call_id,
|
||||||
|
"content": content,
|
||||||
|
}),
|
||||||
|
// Mismatched (role, content) combinations shouldn't happen
|
||||||
|
// — the agent constructs them in pairs. If they do, degrade
|
||||||
|
// gracefully to a plain text turn so the request still goes
|
||||||
|
// out rather than crashing the conversation.
|
||||||
|
(role, content) => {
|
||||||
|
tracing::warn!(
|
||||||
|
?role,
|
||||||
|
?content,
|
||||||
|
"encode_message: unexpected (role, content) shape"
|
||||||
|
);
|
||||||
|
json!({"role": role_str(role), "content": content_as_text(content)})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn role_str(r: Role) -> &'static str {
|
||||||
|
match r {
|
||||||
|
Role::System => "system",
|
||||||
|
Role::User => "user",
|
||||||
|
Role::Assistant => "assistant",
|
||||||
|
Role::Tool => "tool",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn content_as_text(c: &MessageContent) -> String {
|
||||||
|
match c {
|
||||||
|
MessageContent::Text(s) => s.clone(),
|
||||||
|
MessageContent::ToolCalls { text, .. } => text.clone().unwrap_or_default(),
|
||||||
|
MessageContent::ToolResult { content, .. } => content.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_tools(tools: &[ToolSpec]) -> Value {
|
||||||
|
let arr: Vec<Value> = tools
|
||||||
|
.iter()
|
||||||
|
.map(|t| {
|
||||||
|
json!({
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": t.name,
|
||||||
|
"description": t.description,
|
||||||
|
"parameters": t.parameters,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Value::Array(arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Response decoding ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Both types are deserialised through `list_models()`. Tests don't
|
||||||
|
// exercise that path (no mock HTTP server), so clippy --tests reports
|
||||||
|
// them as dead; in real use they're hit on every Zed model-picker
|
||||||
|
// refresh.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct WireModelsResponse {
|
||||||
|
data: Vec<WireModelObject>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct WireModelObject {
|
||||||
|
id: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct WireChunk {
|
||||||
|
#[serde(default)]
|
||||||
|
choices: Vec<WireChunkChoice>,
|
||||||
|
#[serde(default)]
|
||||||
|
usage: Option<WireUsage>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct WireChunkChoice {
|
||||||
|
#[serde(default)]
|
||||||
|
delta: WireDelta,
|
||||||
|
#[serde(default)]
|
||||||
|
finish_reason: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Deserialize)]
|
||||||
|
struct WireDelta {
|
||||||
|
#[serde(default)]
|
||||||
|
content: Option<String>,
|
||||||
|
/// Some servers expose chain-of-thought text via this field
|
||||||
|
/// (mirroring OpenAI's reasoning-model schema). When present we
|
||||||
|
/// surface it as `ReasoningDelta`.
|
||||||
|
#[serde(default)]
|
||||||
|
reasoning_content: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
tool_calls: Vec<WireToolCallDelta>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct WireToolCallDelta {
|
||||||
|
#[serde(default)]
|
||||||
|
index: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
id: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
function: Option<WireFunctionDelta>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Deserialize)]
|
||||||
|
struct WireFunctionDelta {
|
||||||
|
#[serde(default)]
|
||||||
|
name: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
arguments: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
struct WireUsage {
|
||||||
|
prompt_tokens: u64,
|
||||||
|
completion_tokens: u64,
|
||||||
|
total_tokens: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert the eventsource-stream byte SSE into provider-agnostic
|
||||||
|
/// events. Bails the stream on the first parse failure with a logged
|
||||||
|
/// warning — partial state is preferable to silently corrupting a
|
||||||
|
/// conversation by skipping bad events.
|
||||||
|
fn decode_stream<S>(
|
||||||
|
sse: S,
|
||||||
|
cancel: CancellationToken,
|
||||||
|
) -> impl Stream<Item = anyhow::Result<CompletionEvent>>
|
||||||
|
where
|
||||||
|
S: Stream<
|
||||||
|
Item = Result<
|
||||||
|
eventsource_stream::Event,
|
||||||
|
eventsource_stream::EventStreamError<reqwest::Error>,
|
||||||
|
>,
|
||||||
|
> + Send
|
||||||
|
+ 'static,
|
||||||
|
{
|
||||||
|
async_stream::stream! {
|
||||||
|
// Track which (index) tool calls we've already announced. The
|
||||||
|
// OpenAI stream emits the id and name only on the first delta
|
||||||
|
// for each tool call; later deltas just carry argument bytes.
|
||||||
|
let mut announced: std::collections::HashSet<usize> = Default::default();
|
||||||
|
|
||||||
|
let mut sse = Box::pin(sse);
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
// `biased;` checks `cancel.cancelled()` first on every
|
||||||
|
// poll — without it, a pre-cancelled token loses to a
|
||||||
|
// ready SSE chunk, and a mid-stream cancellation could
|
||||||
|
// still consume one more chunk before noticing.
|
||||||
|
biased;
|
||||||
|
_ = cancel.cancelled() => {
|
||||||
|
tracing::debug!("openai_chat: cancellation requested, ending stream");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
next = sse.next() => {
|
||||||
|
let Some(event) = next else { break };
|
||||||
|
let event = match event {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(e) => {
|
||||||
|
yield Err(anyhow::anyhow!("SSE transport: {e}"));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let data = event.data;
|
||||||
|
if data == "[DONE]" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let chunk: WireChunk = match serde_json::from_str(&data) {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
error = %e,
|
||||||
|
raw = %data,
|
||||||
|
"openai_chat: failed to parse SSE chunk; skipping"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for choice in chunk.choices {
|
||||||
|
if let Some(text) = choice.delta.content
|
||||||
|
&& !text.is_empty()
|
||||||
|
{
|
||||||
|
yield Ok(CompletionEvent::TextDelta(text));
|
||||||
|
}
|
||||||
|
if let Some(reasoning) = choice.delta.reasoning_content
|
||||||
|
&& !reasoning.is_empty()
|
||||||
|
{
|
||||||
|
yield Ok(CompletionEvent::ReasoningDelta(reasoning));
|
||||||
|
}
|
||||||
|
for tc in choice.delta.tool_calls {
|
||||||
|
let idx = tc.index;
|
||||||
|
if announced.insert(idx) {
|
||||||
|
let id = tc.id.unwrap_or_default();
|
||||||
|
let name = tc
|
||||||
|
.function
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|f| f.name.clone())
|
||||||
|
.unwrap_or_default();
|
||||||
|
yield Ok(CompletionEvent::ToolCallStart {
|
||||||
|
index: idx,
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if let Some(f) = tc.function
|
||||||
|
&& let Some(args) = f.arguments
|
||||||
|
&& !args.is_empty()
|
||||||
|
{
|
||||||
|
yield Ok(CompletionEvent::ToolCallArgsDelta {
|
||||||
|
index: idx,
|
||||||
|
args_delta: args,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(reason) = choice.finish_reason {
|
||||||
|
yield Ok(CompletionEvent::Finish { reason: Some(reason) });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(u) = chunk.usage {
|
||||||
|
yield Ok(CompletionEvent::Usage(UsageStats {
|
||||||
|
prompt_tokens: u.prompt_tokens,
|
||||||
|
completion_tokens: u.completion_tokens,
|
||||||
|
total_tokens: u.total_tokens,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user