Adds a new workspace crate `helexa-acp` (binary, Apache-2.0) — the
start of "the missing ACP binary" for multi-endpoint LLM setups
mixing public APIs, private LAN deployments, and various wire
formats. Today it speaks OpenAI /v1/chat/completions; the
Provider trait is the seam that lets OpenAI Responses, Anthropic
/v1/messages, and other wire formats slot in later without touching
the agent loop.
The crate is intentionally self-contained — no dependencies on the
other workspace crates (cortex-core, cortex-gateway, neuron) — so a
future migration to a dedicated GitHub repo is a Cargo.toml-only
change. All deps come from crates.io.
This commit lands:
* `config.rs` — TOML config at $XDG_CONFIG_HOME/helexa-acp/config.toml
with multi-endpoint support (each `[[endpoints]]` declares its
name, base_url, wire_api, default_model, optional API key /
api_key_env). Falls back to env-only single-endpoint config when
no TOML exists (HELEXA_ACP_BASE_URL, HELEXA_ACP_MODEL, etc.). The
`endpoint:model` selector syntax is validated and tested.
* `provider/mod.rs` — `Provider` trait + provider-agnostic types
(`CompletionRequest`, `CompletionEvent`, `Message`, `ToolCall`,
`ToolSpec`, `Role`, `UsageStats`). Agent loop consumes these
without knowing the wire format on the other side.
* `provider/openai_chat.rs` — `OpenAIChatProvider` impl. Compatible
with cortex, LM Studio, Ollama (compat mode), OpenRouter, OpenAI
itself. Streams via reqwest + eventsource-stream + async-stream.
Surfaces text deltas, reasoning deltas (for models that emit
`reasoning_content`), tool-call lifecycle (start, args-delta,
completion), usage, finish reason. Cancellation-token aware.
* `main.rs` — tokio + stderr-only tracing-subscriber + Stdio
transport. Builds a provider per configured endpoint at startup,
surfacing config mistakes before the editor even initializes.
Currently responds to `initialize`; everything else stubs to
`not implemented yet` until the agent loop lands in the next
commit.
12 unit tests pass — encoder shape, decoder shape (text-only,
tool-call progressive, cancellation, malformed-chunk recovery),
config parsing (multi-endpoint TOML, env fallback, validation).
The `#![allow(dead_code)]` on `provider/mod.rs` is temporary — the
agent loop in the next commit reads every field. It's noted in the
module-level docstring so the next reader knows.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
64 lines
1.4 KiB
TOML
64 lines
1.4 KiB
TOML
[workspace]
|
|
resolver = "2"
|
|
members = [
|
|
"crates/cortex-core",
|
|
"crates/cortex-gateway",
|
|
"crates/cortex-cli",
|
|
"crates/neuron",
|
|
"crates/helexa-acp",
|
|
]
|
|
|
|
[workspace.package]
|
|
version = "0.1.16"
|
|
edition = "2024"
|
|
license = "GPL-3.0-or-later"
|
|
repository = "https://git.lair.cafe/helexa/cortex"
|
|
|
|
[workspace.dependencies]
|
|
# async runtime
|
|
tokio = { version = "1", features = ["full"] }
|
|
|
|
# web framework
|
|
axum = { version = "0.8", features = ["macros"] }
|
|
tower = "0.5"
|
|
tower-http = { version = "0.6", features = ["cors", "trace", "timeout"] }
|
|
|
|
# serialization
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
toml = "0.8"
|
|
|
|
# http client (for proxying to neuron backends)
|
|
reqwest = { version = "0.12", features = ["json", "stream"] }
|
|
|
|
# observability
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
|
metrics = "0.24"
|
|
metrics-exporter-prometheus = "0.16"
|
|
|
|
# time
|
|
chrono = { version = "0.4", features = ["serde"] }
|
|
|
|
# config
|
|
figment = { version = "0.10", features = ["toml", "env"] }
|
|
|
|
# error handling
|
|
anyhow = "1"
|
|
thiserror = "2"
|
|
|
|
# async traits
|
|
async-trait = "0.1"
|
|
|
|
# CLI
|
|
clap = { version = "4", features = ["derive"] }
|
|
|
|
# futures / streams (for SSE proxying)
|
|
futures = "0.3"
|
|
tokio-stream = "0.1"
|
|
eventsource-stream = "0.2"
|
|
|
|
# workspace crates
|
|
cortex-core = { path = "crates/cortex-core" }
|
|
cortex-gateway = { path = "crates/cortex-gateway" }
|