feat(helexa-acp): per-endpoint max_tokens config
Some checks failed
CI / Format (push) Successful in 34s
build-prerelease / Resolve version stamps (push) Successful in 35s
CI / Clippy (push) Failing after 1m3s
CI / Test (push) Failing after 1m4s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Has been cancelled
build-prerelease / Build neuron-ampere (push) Has been cancelled
build-prerelease / Build neuron-ada (push) Has been cancelled
build-prerelease / Package cortex RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ada RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ampere RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-blackwell RPM (push) Has been cancelled
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
build-prerelease / Build neuron-blackwell (push) Has been cancelled

The agent was sending max_tokens: None, letting cortex/neuron pick
its own default — which trips Zed's "Output Limit Reached" on long
turns. Add a per-endpoint max_tokens option in EndpointConfig
(TOML key and HELEXA_ACP_MAX_TOKENS env var for the single-endpoint
fallback) that the agent threads into every CompletionRequest by
endpoint name.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 12:34:23 +03:00
parent 1c16732668
commit 6cc14e925c
3 changed files with 32 additions and 2 deletions

View File

@@ -60,6 +60,10 @@ struct AgentInner {
/// for Stage 2 because session/set_model lands in Stage 4 — a /// for Stage 2 because session/set_model lands in Stage 4 — a
/// session with no model can't prompt anything. /// session with no model can't prompt anything.
default_model: Option<String>, default_model: Option<String>,
/// Per-endpoint `max_tokens` override. Looked up by endpoint
/// name after resolution. `None` (or an absent entry) means the
/// upstream picks its own default.
max_tokens: std::collections::HashMap<String, u64>,
sessions: SessionStore, sessions: SessionStore,
system_prompt_path: Option<PathBuf>, system_prompt_path: Option<PathBuf>,
/// Monotonic counter for minting session ids. The wire format is /// Monotonic counter for minting session ids. The wire format is
@@ -87,11 +91,17 @@ impl Agent {
default.name default.name
); );
} }
let max_tokens = cfg
.endpoints
.iter()
.filter_map(|ep| ep.max_tokens.map(|m| (ep.name.clone(), m)))
.collect();
Ok(Self { Ok(Self {
inner: Arc::new(AgentInner { inner: Arc::new(AgentInner {
providers, providers,
default_endpoint_name: default.name.clone(), default_endpoint_name: default.name.clone(),
default_model: default.default_model.clone(), default_model: default.default_model.clone(),
max_tokens,
sessions: session::new_store(), sessions: session::new_store(),
system_prompt_path: cfg.system_prompt_path.clone(), system_prompt_path: cfg.system_prompt_path.clone(),
next_session_id: AtomicU64::new(1), next_session_id: AtomicU64::new(1),
@@ -401,7 +411,7 @@ async fn drive_prompt(
tools: vec![], tools: vec![],
temperature: None, temperature: None,
top_p: None, top_p: None,
max_tokens: None, max_tokens: inner.max_tokens.get(provider.name()).copied(),
}; };
let mut stream = match provider.complete(completion_req, cancel.clone()).await { let mut stream = match provider.complete(completion_req, cancel.clone()).await {

View File

@@ -90,6 +90,14 @@ pub struct EndpointConfig {
/// unauthenticated calls. /// unauthenticated calls.
#[serde(default)] #[serde(default)]
pub api_key_env: Option<String>, pub api_key_env: Option<String>,
/// Cap on the model's output tokens per turn. `None` lets the
/// upstream pick its own default (cortex/neuron's default is
/// often small enough to trip Zed's "Output Limit Reached" on
/// long responses). Set to e.g. `32768` to let the model
/// produce longer turns. Goes into the OpenAI `max_tokens`
/// request field.
#[serde(default)]
pub max_tokens: Option<u64>,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
@@ -159,7 +167,7 @@ impl Config {
/// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`, /// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`,
/// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`, /// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`,
/// `HELEXA_ACP_SYSTEM_PROMPT_PATH`. /// `HELEXA_ACP_SYSTEM_PROMPT_PATH`, `HELEXA_ACP_MAX_TOKENS`.
pub fn from_env() -> anyhow::Result<Self> { pub fn from_env() -> anyhow::Result<Self> {
let base_url = std::env::var("HELEXA_ACP_BASE_URL") let base_url = std::env::var("HELEXA_ACP_BASE_URL")
.ok() .ok()
@@ -176,6 +184,15 @@ impl Config {
.ok() .ok()
.filter(|s| !s.is_empty()) .filter(|s| !s.is_empty())
.map(PathBuf::from); .map(PathBuf::from);
let max_tokens = std::env::var("HELEXA_ACP_MAX_TOKENS")
.ok()
.filter(|s| !s.is_empty())
.map(|s| {
s.parse::<u64>().with_context(|| {
format!("HELEXA_ACP_MAX_TOKENS is not a positive integer ({s})")
})
})
.transpose()?;
Ok(Self { Ok(Self {
default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()), default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()),
endpoints: vec![EndpointConfig { endpoints: vec![EndpointConfig {
@@ -185,6 +202,7 @@ impl Config {
default_model: Some(default_model), default_model: Some(default_model),
api_key, api_key,
api_key_env: None, api_key_env: None,
max_tokens,
}], }],
system_prompt_path, system_prompt_path,
}) })
@@ -297,6 +315,7 @@ mod tests {
default_model: None, default_model: None,
api_key: None, api_key: None,
api_key_env: None, api_key_env: None,
max_tokens: None,
}; };
assert_eq!( assert_eq!(
ep.chat_completions_url().as_str(), ep.chat_completions_url().as_str(),

View File

@@ -148,6 +148,7 @@ mod tests {
default_model: None, default_model: None,
api_key: None, api_key: None,
api_key_env: None, api_key_env: None,
max_tokens: None,
} }
} }