Some checks failed
CI / Format (push) Successful in 38s
CI / CUDA type-check (push) Successful in 1m39s
CI / Clippy (push) Successful in 2m26s
CI / Test (push) Successful in 4m49s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Package helexa-bench RPM (push) Blocked by required conditions
build-prerelease / Resolve version stamps + change detection (push) Successful in 32s
build-prerelease / Build neuron-blackwell (push) Successful in 1m40s
build-prerelease / Build neuron-ada (push) Successful in 2m19s
build-prerelease / Build neuron-ampere (push) Successful in 2m22s
build-prerelease / Lint (fmt + clippy) (push) Successful in 2m49s
build-prerelease / Build cortex binary (push) Successful in 3m0s
build-prerelease / Test (push) Successful in 4m25s
build-prerelease / Package cortex RPM (push) Successful in 1m32s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 1m50s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 1m49s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 1m54s
build-prerelease / Build helexa-bench binary (push) Successful in 2m12s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Has been cancelled
Stage 1's build seam (#50): the interface auth, metering, and budget enforcement all hang off, with a local/static provider so the A0 amplification fix can land before any upstream clearing house exists. The future helexa-upstream client (#57) is just another impl. - cortex-core::entitlements: Principal {account_id, key_id}, CapWindow (Balance | Rolling{seconds}), Reservation handle, BudgetSnapshot, AuthError/BudgetError, and the async EntitlementProvider trait (resolve / reserve / settle / release / snapshot). BudgetError carries the window semantics so callers pick the #63 code (rate_limit_exceeded + Retry-After vs insufficient_quota) without the provider touching HTTP. - cortex-core::config: [entitlements] section on GatewayConfig (require_auth + [[entitlements.keys]] with account_id, optional key_id, hard_cap, window). Additive + serde(default) — anonymous/uncapped when omitted, so existing setups are unaffected. - cortex-gateway::entitlements_local: LocalEntitlementProvider. Budget math serialized under one Mutex so spent+reserved can never exceed a hard cap under concurrency (the #52 guarantee); rolling windows reset lazily; uncapped keys (no hard_cap) always reserve but still meter. - CortexState gains Arc<dyn EntitlementProvider> + require_auth, built in from_config. Not yet consumed by the request path — auth middleware is 1b (#49), enforcement is 1d (#52). - cortex.example.toml documents the section; test GatewayConfig literals updated for the new field. 6 provider unit tests (resolve, unknown-key, round-trip, balance/rolling over-cap codes, uncapped infra key). Local fmt/clippy/test all green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
141 lines
4.6 KiB
Rust
141 lines
4.6 KiB
Rust
mod common;
|
|
|
|
use serde_json::json;
|
|
|
|
#[tokio::test]
|
|
async fn error_response_model_not_found() {
|
|
let neuron_url = common::spawn_mock_neuron().await;
|
|
let gateway_url = common::spawn_gateway(&neuron_url).await;
|
|
|
|
let client = reqwest::Client::new();
|
|
|
|
// Request a model that isn't loaded on the mock neuron.
|
|
let resp = client
|
|
.post(format!("{gateway_url}/v1/chat/completions"))
|
|
.header("Content-Type", "application/json")
|
|
.json(&json!({
|
|
"model": "nonexistent-model",
|
|
"messages": [{"role": "user", "content": "hi"}]
|
|
}))
|
|
.send()
|
|
.await
|
|
.expect("request should succeed");
|
|
|
|
assert_eq!(resp.status(), axum::http::StatusCode::NOT_FOUND);
|
|
|
|
let body: serde_json::Value = resp.json().await.expect("valid json");
|
|
let err = body.get("error").expect("response has error object");
|
|
|
|
// Broad type categorization
|
|
assert_eq!(err.get("type").unwrap(), "invalid_request_error");
|
|
// Specific machine-readable code
|
|
assert_eq!(
|
|
err.get("code").unwrap().as_str().unwrap(),
|
|
"model_not_found"
|
|
);
|
|
// param is always null
|
|
assert!(err.get("param").unwrap().is_null());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn error_response_missing_model_field() {
|
|
let neuron_url = common::spawn_mock_neuron().await;
|
|
let gateway_url = common::spawn_gateway(&neuron_url).await;
|
|
|
|
let client = reqwest::Client::new();
|
|
|
|
// Request without the required `model` field.
|
|
let resp = client
|
|
.post(format!("{gateway_url}/v1/chat/completions"))
|
|
.header("Content-Type", "application/json")
|
|
.json(&json!({
|
|
"messages": [{"role": "user", "content": "hi"}]
|
|
}))
|
|
.send()
|
|
.await
|
|
.expect("request should succeed");
|
|
|
|
assert_eq!(resp.status(), axum::http::StatusCode::BAD_REQUEST);
|
|
|
|
let body: serde_json::Value = resp.json().await.expect("valid json");
|
|
let err = body.get("error").expect("response has error object");
|
|
|
|
assert_eq!(err.get("type").unwrap(), "invalid_request_error");
|
|
assert_eq!(
|
|
err.get("code").unwrap().as_str().unwrap(),
|
|
"missing_model_field"
|
|
);
|
|
assert!(err.get("param").unwrap().is_null());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn error_response_no_healthy_nodes() {
|
|
use cortex_core::config::{EvictionSettings, GatewayConfig, GatewaySettings, NeuronEndpoint};
|
|
use std::sync::Arc;
|
|
|
|
// Create a gateway config with a neuron pointing at an unreachable port so no node is ever healthy.
|
|
let config = GatewayConfig {
|
|
gateway: GatewaySettings {
|
|
listen: "127.0.0.1:0".into(),
|
|
metrics_listen: "127.0.0.1:0".into(),
|
|
},
|
|
eviction: EvictionSettings {
|
|
strategy: cortex_core::config::EvictionStrategy::Lru,
|
|
defrag_after_cycles: 0,
|
|
},
|
|
neurons: vec![NeuronEndpoint {
|
|
name: "dead-node".into(),
|
|
endpoint: "http://127.0.0.1:1".into(),
|
|
}],
|
|
models_config: "/dev/null".into(),
|
|
entitlements: Default::default(),
|
|
};
|
|
|
|
let fleet = Arc::new(cortex_gateway::state::CortexState::from_config(&config));
|
|
|
|
let app = cortex_gateway::build_app(fleet);
|
|
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
|
let addr = listener.local_addr().unwrap();
|
|
tokio::spawn(async move {
|
|
axum::serve(listener, app).await.unwrap();
|
|
});
|
|
|
|
// Allow the poller a moment to mark the node unhealthy.
|
|
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
|
|
|
|
let client = reqwest::Client::new();
|
|
let resp = client
|
|
.post(format!("http://{addr}/v1/chat/completions"))
|
|
.header("Content-Type", "application/json")
|
|
.json(&json!({
|
|
"model": "any-model",
|
|
"messages": [{"role": "user", "content": "hi"}]
|
|
}))
|
|
.send()
|
|
.await
|
|
.expect("request should succeed");
|
|
|
|
assert_eq!(resp.status(), axum::http::StatusCode::SERVICE_UNAVAILABLE);
|
|
|
|
// Transient 503 — the gateway advertises Retry-After so OpenAI-compatible
|
|
// clients back off and retry rather than surfacing an opaque error (#63).
|
|
let retry_after = resp
|
|
.headers()
|
|
.get(reqwest::header::RETRY_AFTER)
|
|
.expect("transient 503 must carry Retry-After")
|
|
.to_str()
|
|
.unwrap()
|
|
.to_string();
|
|
assert_eq!(retry_after, "5");
|
|
|
|
let body: serde_json::Value = resp.json().await.expect("valid json");
|
|
let err = body.get("error").expect("response has error object");
|
|
|
|
assert_eq!(err.get("type").unwrap(), "api_error");
|
|
assert_eq!(
|
|
err.get("code").unwrap().as_str().unwrap(),
|
|
"service_unavailable"
|
|
);
|
|
assert!(err.get("param").unwrap().is_null());
|
|
}
|