feat(helexa-acp): per-endpoint max_tokens config
Some checks failed
CI / Format (push) Successful in 34s
build-prerelease / Resolve version stamps (push) Successful in 35s
CI / Clippy (push) Failing after 1m3s
CI / Test (push) Failing after 1m4s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Has been cancelled
build-prerelease / Build neuron-ampere (push) Has been cancelled
build-prerelease / Build neuron-ada (push) Has been cancelled
build-prerelease / Package cortex RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ada RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ampere RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-blackwell RPM (push) Has been cancelled
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
build-prerelease / Build neuron-blackwell (push) Has been cancelled
Some checks failed
CI / Format (push) Successful in 34s
build-prerelease / Resolve version stamps (push) Successful in 35s
CI / Clippy (push) Failing after 1m3s
CI / Test (push) Failing after 1m4s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Has been cancelled
build-prerelease / Build neuron-ampere (push) Has been cancelled
build-prerelease / Build neuron-ada (push) Has been cancelled
build-prerelease / Package cortex RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ada RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-ampere RPM (push) Has been cancelled
build-prerelease / Package helexa-neuron-blackwell RPM (push) Has been cancelled
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
build-prerelease / Build neuron-blackwell (push) Has been cancelled
The agent was sending max_tokens: None, letting cortex/neuron pick its own default — which trips Zed's "Output Limit Reached" on long turns. Add a per-endpoint max_tokens option in EndpointConfig (TOML key and HELEXA_ACP_MAX_TOKENS env var for the single-endpoint fallback) that the agent threads into every CompletionRequest by endpoint name. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -60,6 +60,10 @@ struct AgentInner {
|
||||
/// for Stage 2 because session/set_model lands in Stage 4 — a
|
||||
/// session with no model can't prompt anything.
|
||||
default_model: Option<String>,
|
||||
/// Per-endpoint `max_tokens` override. Looked up by endpoint
|
||||
/// name after resolution. `None` (or an absent entry) means the
|
||||
/// upstream picks its own default.
|
||||
max_tokens: std::collections::HashMap<String, u64>,
|
||||
sessions: SessionStore,
|
||||
system_prompt_path: Option<PathBuf>,
|
||||
/// Monotonic counter for minting session ids. The wire format is
|
||||
@@ -87,11 +91,17 @@ impl Agent {
|
||||
default.name
|
||||
);
|
||||
}
|
||||
let max_tokens = cfg
|
||||
.endpoints
|
||||
.iter()
|
||||
.filter_map(|ep| ep.max_tokens.map(|m| (ep.name.clone(), m)))
|
||||
.collect();
|
||||
Ok(Self {
|
||||
inner: Arc::new(AgentInner {
|
||||
providers,
|
||||
default_endpoint_name: default.name.clone(),
|
||||
default_model: default.default_model.clone(),
|
||||
max_tokens,
|
||||
sessions: session::new_store(),
|
||||
system_prompt_path: cfg.system_prompt_path.clone(),
|
||||
next_session_id: AtomicU64::new(1),
|
||||
@@ -401,7 +411,7 @@ async fn drive_prompt(
|
||||
tools: vec![],
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
max_tokens: None,
|
||||
max_tokens: inner.max_tokens.get(provider.name()).copied(),
|
||||
};
|
||||
|
||||
let mut stream = match provider.complete(completion_req, cancel.clone()).await {
|
||||
|
||||
@@ -90,6 +90,14 @@ pub struct EndpointConfig {
|
||||
/// unauthenticated calls.
|
||||
#[serde(default)]
|
||||
pub api_key_env: Option<String>,
|
||||
/// Cap on the model's output tokens per turn. `None` lets the
|
||||
/// upstream pick its own default (cortex/neuron's default is
|
||||
/// often small enough to trip Zed's "Output Limit Reached" on
|
||||
/// long responses). Set to e.g. `32768` to let the model
|
||||
/// produce longer turns. Goes into the OpenAI `max_tokens`
|
||||
/// request field.
|
||||
#[serde(default)]
|
||||
pub max_tokens: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
|
||||
@@ -159,7 +167,7 @@ impl Config {
|
||||
|
||||
/// Single-endpoint config constructed from `HELEXA_ACP_BASE_URL`,
|
||||
/// `HELEXA_ACP_MODEL`, `HELEXA_ACP_API_KEY`,
|
||||
/// `HELEXA_ACP_SYSTEM_PROMPT_PATH`.
|
||||
/// `HELEXA_ACP_SYSTEM_PROMPT_PATH`, `HELEXA_ACP_MAX_TOKENS`.
|
||||
pub fn from_env() -> anyhow::Result<Self> {
|
||||
let base_url = std::env::var("HELEXA_ACP_BASE_URL")
|
||||
.ok()
|
||||
@@ -176,6 +184,15 @@ impl Config {
|
||||
.ok()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(PathBuf::from);
|
||||
let max_tokens = std::env::var("HELEXA_ACP_MAX_TOKENS")
|
||||
.ok()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| {
|
||||
s.parse::<u64>().with_context(|| {
|
||||
format!("HELEXA_ACP_MAX_TOKENS is not a positive integer ({s})")
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(Self {
|
||||
default_endpoint: Some(DEFAULT_ENDPOINT_NAME.into()),
|
||||
endpoints: vec![EndpointConfig {
|
||||
@@ -185,6 +202,7 @@ impl Config {
|
||||
default_model: Some(default_model),
|
||||
api_key,
|
||||
api_key_env: None,
|
||||
max_tokens,
|
||||
}],
|
||||
system_prompt_path,
|
||||
})
|
||||
@@ -297,6 +315,7 @@ mod tests {
|
||||
default_model: None,
|
||||
api_key: None,
|
||||
api_key_env: None,
|
||||
max_tokens: None,
|
||||
};
|
||||
assert_eq!(
|
||||
ep.chat_completions_url().as_str(),
|
||||
|
||||
@@ -148,6 +148,7 @@ mod tests {
|
||||
default_model: None,
|
||||
api_key: None,
|
||||
api_key_env: None,
|
||||
max_tokens: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user