Files
helexa/crates/cortex-gateway/tests/error_envelope.rs
rob thijssen bc74e0e95f
Some checks failed
CI / Format (push) Successful in 38s
CI / CUDA type-check (push) Successful in 1m39s
CI / Clippy (push) Successful in 2m26s
CI / Test (push) Successful in 4m49s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Package helexa-bench RPM (push) Blocked by required conditions
build-prerelease / Resolve version stamps + change detection (push) Successful in 32s
build-prerelease / Build neuron-blackwell (push) Successful in 1m40s
build-prerelease / Build neuron-ada (push) Successful in 2m19s
build-prerelease / Build neuron-ampere (push) Successful in 2m22s
build-prerelease / Lint (fmt + clippy) (push) Successful in 2m49s
build-prerelease / Build cortex binary (push) Successful in 3m0s
build-prerelease / Test (push) Successful in 4m25s
build-prerelease / Package cortex RPM (push) Successful in 1m32s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 1m50s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 1m49s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 1m54s
build-prerelease / Build helexa-bench binary (push) Successful in 2m12s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
feat(#47 phase 1a): EntitlementProvider trait + local/static provider
Stage 1's build seam (#50): the interface auth, metering, and budget
enforcement all hang off, with a local/static provider so the A0
amplification fix can land before any upstream clearing house exists.
The future helexa-upstream client (#57) is just another impl.

- cortex-core::entitlements: Principal {account_id, key_id}, CapWindow
  (Balance | Rolling{seconds}), Reservation handle, BudgetSnapshot,
  AuthError/BudgetError, and the async EntitlementProvider trait
  (resolve / reserve / settle / release / snapshot). BudgetError carries
  the window semantics so callers pick the #63 code (rate_limit_exceeded
  + Retry-After vs insufficient_quota) without the provider touching HTTP.
- cortex-core::config: [entitlements] section on GatewayConfig
  (require_auth + [[entitlements.keys]] with account_id, optional key_id,
  hard_cap, window). Additive + serde(default) — anonymous/uncapped when
  omitted, so existing setups are unaffected.
- cortex-gateway::entitlements_local: LocalEntitlementProvider. Budget
  math serialized under one Mutex so spent+reserved can never exceed a
  hard cap under concurrency (the #52 guarantee); rolling windows reset
  lazily; uncapped keys (no hard_cap) always reserve but still meter.
- CortexState gains Arc<dyn EntitlementProvider> + require_auth, built in
  from_config. Not yet consumed by the request path — auth middleware is
  1b (#49), enforcement is 1d (#52).
- cortex.example.toml documents the section; test GatewayConfig literals
  updated for the new field.

6 provider unit tests (resolve, unknown-key, round-trip, balance/rolling
over-cap codes, uncapped infra key). Local fmt/clippy/test all green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-17 19:00:05 +03:00

141 lines
4.6 KiB
Rust

mod common;
use serde_json::json;
#[tokio::test]
async fn error_response_model_not_found() {
let neuron_url = common::spawn_mock_neuron().await;
let gateway_url = common::spawn_gateway(&neuron_url).await;
let client = reqwest::Client::new();
// Request a model that isn't loaded on the mock neuron.
let resp = client
.post(format!("{gateway_url}/v1/chat/completions"))
.header("Content-Type", "application/json")
.json(&json!({
"model": "nonexistent-model",
"messages": [{"role": "user", "content": "hi"}]
}))
.send()
.await
.expect("request should succeed");
assert_eq!(resp.status(), axum::http::StatusCode::NOT_FOUND);
let body: serde_json::Value = resp.json().await.expect("valid json");
let err = body.get("error").expect("response has error object");
// Broad type categorization
assert_eq!(err.get("type").unwrap(), "invalid_request_error");
// Specific machine-readable code
assert_eq!(
err.get("code").unwrap().as_str().unwrap(),
"model_not_found"
);
// param is always null
assert!(err.get("param").unwrap().is_null());
}
#[tokio::test]
async fn error_response_missing_model_field() {
let neuron_url = common::spawn_mock_neuron().await;
let gateway_url = common::spawn_gateway(&neuron_url).await;
let client = reqwest::Client::new();
// Request without the required `model` field.
let resp = client
.post(format!("{gateway_url}/v1/chat/completions"))
.header("Content-Type", "application/json")
.json(&json!({
"messages": [{"role": "user", "content": "hi"}]
}))
.send()
.await
.expect("request should succeed");
assert_eq!(resp.status(), axum::http::StatusCode::BAD_REQUEST);
let body: serde_json::Value = resp.json().await.expect("valid json");
let err = body.get("error").expect("response has error object");
assert_eq!(err.get("type").unwrap(), "invalid_request_error");
assert_eq!(
err.get("code").unwrap().as_str().unwrap(),
"missing_model_field"
);
assert!(err.get("param").unwrap().is_null());
}
#[tokio::test]
async fn error_response_no_healthy_nodes() {
use cortex_core::config::{EvictionSettings, GatewayConfig, GatewaySettings, NeuronEndpoint};
use std::sync::Arc;
// Create a gateway config with a neuron pointing at an unreachable port so no node is ever healthy.
let config = GatewayConfig {
gateway: GatewaySettings {
listen: "127.0.0.1:0".into(),
metrics_listen: "127.0.0.1:0".into(),
},
eviction: EvictionSettings {
strategy: cortex_core::config::EvictionStrategy::Lru,
defrag_after_cycles: 0,
},
neurons: vec![NeuronEndpoint {
name: "dead-node".into(),
endpoint: "http://127.0.0.1:1".into(),
}],
models_config: "/dev/null".into(),
entitlements: Default::default(),
};
let fleet = Arc::new(cortex_gateway::state::CortexState::from_config(&config));
let app = cortex_gateway::build_app(fleet);
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move {
axum::serve(listener, app).await.unwrap();
});
// Allow the poller a moment to mark the node unhealthy.
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
let client = reqwest::Client::new();
let resp = client
.post(format!("http://{addr}/v1/chat/completions"))
.header("Content-Type", "application/json")
.json(&json!({
"model": "any-model",
"messages": [{"role": "user", "content": "hi"}]
}))
.send()
.await
.expect("request should succeed");
assert_eq!(resp.status(), axum::http::StatusCode::SERVICE_UNAVAILABLE);
// Transient 503 — the gateway advertises Retry-After so OpenAI-compatible
// clients back off and retry rather than surfacing an opaque error (#63).
let retry_after = resp
.headers()
.get(reqwest::header::RETRY_AFTER)
.expect("transient 503 must carry Retry-After")
.to_str()
.unwrap()
.to_string();
assert_eq!(retry_after, "5");
let body: serde_json::Value = resp.json().await.expect("valid json");
let err = body.get("error").expect("response has error object");
assert_eq!(err.get("type").unwrap(), "api_error");
assert_eq!(
err.get("code").unwrap().as_str().unwrap(),
"service_unavailable"
);
assert!(err.get("param").unwrap().is_null());
}