From 6cc14e925c49de4015c3bc40c1b8957edd9bc2e2 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Thu, 28 May 2026 12:34:23 +0300 Subject: [PATCH] feat(helexa-acp): per-endpoint max_tokens config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent was sending max_tokens: None, letting cortex/neuron pick its own default — which trips Zed's "Output Limit Reached" on long turns. Add a per-endpoint max_tokens option in EndpointConfig (TOML key and HELEXA_ACP_MAX_TOKENS env var for the single-endpoint fallback) that the agent threads into every CompletionRequest by endpoint name. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/helexa-acp/src/agent.rs | 12 ++++++++++- crates/helexa-acp/src/config.rs | 21 ++++++++++++++++++- crates/helexa-acp/src/provider/openai_chat.rs | 1 + 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/crates/helexa-acp/src/agent.rs b/crates/helexa-acp/src/agent.rs index d736a32..67e4680 100644 --- a/crates/helexa-acp/src/agent.rs +++ b/crates/helexa-acp/src/agent.rs @@ -60,6 +60,10 @@ struct AgentInner { /// for Stage 2 because session/set_model lands in Stage 4 — a /// session with no model can't prompt anything. default_model: Option, + /// Per-endpoint `max_tokens` override. Looked up by endpoint + /// name after resolution. `None` (or an absent entry) means the + /// upstream picks its own default. + max_tokens: std::collections::HashMap, sessions: SessionStore, system_prompt_path: Option, /// Monotonic counter for minting session ids. The wire format is @@ -87,11 +91,17 @@ impl Agent { default.name ); } + let max_tokens = cfg + .endpoints + .iter() + .filter_map(|ep| ep.max_tokens.map(|m| (ep.name.clone(), m))) + .collect(); Ok(Self { inner: Arc::new(AgentInner { providers, default_endpoint_name: default.name.clone(), default_model: default.default_model.clone(), + max_tokens, sessions: session::new_store(), system_prompt_path: cfg.system_prompt_path.clone(), next_session_id: AtomicU64::new(1), @@ -401,7 +411,7 @@ async fn drive_prompt( tools: vec![], temperature: None, top_p: None, - max_tokens: None, + max_tokens: inner.max_tokens.get(provider.name()).copied(), }; let mut stream = match provider.complete(completion_req, cancel.clone()).await { diff --git a/crates/helexa-acp/src/config.rs b/crates/helexa-acp/src/config.rs index 4af61c7..cd764c2 100644 --- a/crates/helexa-acp/src/config.rs +++ b/crates/helexa-acp/src/config.rs @@ -90,6 +90,14 @@ pub struct EndpointConfig { /// unauthenticated calls. #[serde(default)] pub api_key_env: Option, + /// Cap on the model's output tokens per turn. `None` lets the + /// upstream pick its own default (cortex/neuron's default is + /// often small enough to trip Zed's "Output Limit Reached" on + /// long responses). Set to e.g. `32768` to let the model + /// produce longer turns. Goes into the OpenAI `max_tokens` + /// request field. + #[serde(default)] + pub max_tokens: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] @@ -159,7 +167,7 @@ impl Config { /// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`, /// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`, - /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`. + /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`, `HELEXA_ACP_MAX_TOKENS`. pub fn from_env() -> anyhow::Result { let base_url = std::env::var("HELEXA_ACP_BASE_URL") .ok() @@ -176,6 +184,15 @@ impl Config { .ok() .filter(|s| !s.is_empty()) .map(PathBuf::from); + let max_tokens = std::env::var("HELEXA_ACP_MAX_TOKENS") + .ok() + .filter(|s| !s.is_empty()) + .map(|s| { + s.parse::().with_context(|| { + format!("HELEXA_ACP_MAX_TOKENS is not a positive integer ({s})") + }) + }) + .transpose()?; Ok(Self { default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()), endpoints: vec![EndpointConfig { @@ -185,6 +202,7 @@ impl Config { default_model: Some(default_model), api_key, api_key_env: None, + max_tokens, }], system_prompt_path, }) @@ -297,6 +315,7 @@ mod tests { default_model: None, api_key: None, api_key_env: None, + max_tokens: None, }; assert_eq!( ep.chat_completions_url().as_str(), diff --git a/crates/helexa-acp/src/provider/openai_chat.rs b/crates/helexa-acp/src/provider/openai_chat.rs index dc50ec3..a664623 100644 --- a/crates/helexa-acp/src/provider/openai_chat.rs +++ b/crates/helexa-acp/src/provider/openai_chat.rs @@ -148,6 +148,7 @@ mod tests { default_model: None, api_key: None, api_key_env: None, + max_tokens: None, } }