feat(helexa-acp): per-endpoint max_tokens config

The agent was sending max_tokens: None, letting cortex/neuron pick its own default — which trips Zed's "Output Limit Reached" on long turns. Add a per-endpoint max_tokens option in EndpointConfig (TOML key and HELEXA_ACP_MAX_TOKENS env var for the single-endpoint fallback) that the agent threads into every CompletionRequest by endpoint name. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 12:34:23 +03:00
parent 1c16732668
commit 6cc14e925c
3 changed files with 32 additions and 2 deletions
--- a/crates/helexa-acp/src/agent.rs
+++ b/crates/helexa-acp/src/agent.rs
@@ -60,6 +60,10 @@ struct AgentInner {
    /// for Stage 2 because session/set_model lands in Stage 4 — a
    /// session with no model can't prompt anything.
    default_model: Option<String>,
    /// Per-endpoint `max_tokens` override. Looked up by endpoint
    /// name after resolution. `None` (or an absent entry) means the
    /// upstream picks its own default.
    max_tokens: std::collections::HashMap<String, u64>,
    sessions: SessionStore,
    system_prompt_path: Option<PathBuf>,
    /// Monotonic counter for minting session ids. The wire format is
@@ -87,11 +91,17 @@ impl Agent {
                default.name
            );
        }
        let max_tokens = cfg
            .endpoints
            .iter()
            .filter_map(|ep| ep.max_tokens.map(|m| (ep.name.clone(), m)))
            .collect();
        Ok(Self {
            inner: Arc::new(AgentInner {
                providers,
                default_endpoint_name: default.name.clone(),
                default_model: default.default_model.clone(),
                max_tokens,
                sessions: session::new_store(),
                system_prompt_path: cfg.system_prompt_path.clone(),
                next_session_id: AtomicU64::new(1),
@@ -401,7 +411,7 @@ async fn drive_prompt(
            tools: vec![],
            temperature: None,
            top_p: None,
-            max_tokens: None,
+            max_tokens: inner.max_tokens.get(provider.name()).copied(),
        };
        let mut stream = match provider.complete(completion_req, cancel.clone()).await {
--- a/crates/helexa-acp/src/config.rs
+++ b/crates/helexa-acp/src/config.rs
@@ -90,6 +90,14 @@ pub struct EndpointConfig {
    /// unauthenticated calls.
    #[serde(default)]
    pub api_key_env: Option<String>,
    /// Cap on the model's output tokens per turn. `None` lets the
    /// upstream pick its own default (cortex/neuron's default is
    /// often small enough to trip Zed's "Output Limit Reached" on
    /// long responses). Set to e.g. `32768` to let the model
    /// produce longer turns. Goes into the OpenAI `max_tokens`
    /// request field.
    #[serde(default)]
    pub max_tokens: Option<u64>,
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
@@ -159,7 +167,7 @@ impl Config {
    /// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`,
    /// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`,
-    /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`.
+    /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`, `HELEXA_ACP_MAX_TOKENS`.
    pub fn from_env() -> anyhow::Result<Self> {
        let base_url = std::env::var("HELEXA_ACP_BASE_URL")
            .ok()
@@ -176,6 +184,15 @@ impl Config {
            .ok()
            .filter(|s| !s.is_empty())
            .map(PathBuf::from);
        let max_tokens = std::env::var("HELEXA_ACP_MAX_TOKENS")
            .ok()
            .filter(|s| !s.is_empty())
            .map(|s| {
                s.parse::<u64>().with_context(|| {
                    format!("HELEXA_ACP_MAX_TOKENS is not a positive integer ({s})")
                })
            })
            .transpose()?;
        Ok(Self {
            default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()),
            endpoints: vec![EndpointConfig {
@@ -185,6 +202,7 @@ impl Config {
                default_model: Some(default_model),
                api_key,
                api_key_env: None,
                max_tokens,
            }],
            system_prompt_path,
        })
@@ -297,6 +315,7 @@ mod tests {
            default_model: None,
            api_key: None,
            api_key_env: None,
            max_tokens: None,
        };
        assert_eq!(
            ep.chat_completions_url().as_str(),
--- a/crates/helexa-acp/src/provider/openai_chat.rs
+++ b/crates/helexa-acp/src/provider/openai_chat.rs
@@ -148,6 +148,7 @@ mod tests {
            default_model: None,
            api_key: None,
            api_key_env: None,
            max_tokens: None,
        }
    }