From 6cc14e925c49de4015c3bc40c1b8957edd9bc2e2 Mon Sep 17 00:00:00 2001
From: rob thijssen <grenade@rob.tn>
Date: Thu, 28 May 2026 12:34:23 +0300
Subject: [PATCH] feat(helexa-acp): per-endpoint max_tokens config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The agent was sending max_tokens: None, letting cortex/neuron pick
its own default — which trips Zed's "Output Limit Reached" on long
turns. Add a per-endpoint max_tokens option in EndpointConfig
(TOML key and HELEXA_ACP_MAX_TOKENS env var for the single-endpoint
fallback) that the agent threads into every CompletionRequest by
endpoint name.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/helexa-acp/src/agent.rs                | 12 ++++++++++-
 crates/helexa-acp/src/config.rs               | 21 ++++++++++++++++++-
 crates/helexa-acp/src/provider/openai_chat.rs |  1 +
 3 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/crates/helexa-acp/src/agent.rs b/crates/helexa-acp/src/agent.rs
index d736a32..67e4680 100644
--- a/crates/helexa-acp/src/agent.rs
+++ b/crates/helexa-acp/src/agent.rs
@@ -60,6 +60,10 @@ struct AgentInner {
     /// for Stage 2 because session/set_model lands in Stage 4 — a
     /// session with no model can't prompt anything.
     default_model: Option<String>,
+    /// Per-endpoint `max_tokens` override. Looked up by endpoint
+    /// name after resolution. `None` (or an absent entry) means the
+    /// upstream picks its own default.
+    max_tokens: std::collections::HashMap<String, u64>,
     sessions: SessionStore,
     system_prompt_path: Option<PathBuf>,
     /// Monotonic counter for minting session ids. The wire format is
@@ -87,11 +91,17 @@ impl Agent {
                 default.name
             );
         }
+        let max_tokens = cfg
+            .endpoints
+            .iter()
+            .filter_map(|ep| ep.max_tokens.map(|m| (ep.name.clone(), m)))
+            .collect();
         Ok(Self {
             inner: Arc::new(AgentInner {
                 providers,
                 default_endpoint_name: default.name.clone(),
                 default_model: default.default_model.clone(),
+                max_tokens,
                 sessions: session::new_store(),
                 system_prompt_path: cfg.system_prompt_path.clone(),
                 next_session_id: AtomicU64::new(1),
@@ -401,7 +411,7 @@ async fn drive_prompt(
             tools: vec![],
             temperature: None,
             top_p: None,
-            max_tokens: None,
+            max_tokens: inner.max_tokens.get(provider.name()).copied(),
         };
 
         let mut stream = match provider.complete(completion_req, cancel.clone()).await {
diff --git a/crates/helexa-acp/src/config.rs b/crates/helexa-acp/src/config.rs
index 4af61c7..cd764c2 100644
--- a/crates/helexa-acp/src/config.rs
+++ b/crates/helexa-acp/src/config.rs
@@ -90,6 +90,14 @@ pub struct EndpointConfig {
     /// unauthenticated calls.
     #[serde(default)]
     pub api_key_env: Option<String>,
+    /// Cap on the model's output tokens per turn. `None` lets the
+    /// upstream pick its own default (cortex/neuron's default is
+    /// often small enough to trip Zed's "Output Limit Reached" on
+    /// long responses). Set to e.g. `32768` to let the model
+    /// produce longer turns. Goes into the OpenAI `max_tokens`
+    /// request field.
+    #[serde(default)]
+    pub max_tokens: Option<u64>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
@@ -159,7 +167,7 @@ impl Config {
 
     /// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`,
     /// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`,
-    /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`.
+    /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`, `HELEXA_ACP_MAX_TOKENS`.
     pub fn from_env() -> anyhow::Result<Self> {
         let base_url = std::env::var("HELEXA_ACP_BASE_URL")
             .ok()
@@ -176,6 +184,15 @@ impl Config {
             .ok()
             .filter(|s| !s.is_empty())
             .map(PathBuf::from);
+        let max_tokens = std::env::var("HELEXA_ACP_MAX_TOKENS")
+            .ok()
+            .filter(|s| !s.is_empty())
+            .map(|s| {
+                s.parse::<u64>().with_context(|| {
+                    format!("HELEXA_ACP_MAX_TOKENS is not a positive integer ({s})")
+                })
+            })
+            .transpose()?;
         Ok(Self {
             default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()),
             endpoints: vec![EndpointConfig {
@@ -185,6 +202,7 @@ impl Config {
                 default_model: Some(default_model),
                 api_key,
                 api_key_env: None,
+                max_tokens,
             }],
             system_prompt_path,
         })
@@ -297,6 +315,7 @@ mod tests {
             default_model: None,
             api_key: None,
             api_key_env: None,
+            max_tokens: None,
         };
         assert_eq!(
             ep.chat_completions_url().as_str(),
diff --git a/crates/helexa-acp/src/provider/openai_chat.rs b/crates/helexa-acp/src/provider/openai_chat.rs
index dc50ec3..a664623 100644
--- a/crates/helexa-acp/src/provider/openai_chat.rs
+++ b/crates/helexa-acp/src/provider/openai_chat.rs
@@ -148,6 +148,7 @@ mod tests {
             default_model: None,
             api_key: None,
             api_key_env: None,
+            max_tokens: None,
         }
     }