Files
helexa/crates/helexa-bench/tests/sweep_integration.rs
rob thijssen f50f5531cf
Some checks are pending
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Blocked by required conditions
build-prerelease / Resolve version stamps + change detection (push) Successful in 31s
build-prerelease / Lint (fmt + clippy) (push) Successful in 2m21s
build-prerelease / Build cortex binary (push) Successful in 2m27s
build-prerelease / Build helexa-bench binary (push) Successful in 2m44s
build-prerelease / Test (push) Successful in 4m32s
build-prerelease / Build neuron-ampere (push) Successful in 2m7s
build-prerelease / Build neuron-ada (push) Successful in 2m28s
build-prerelease / Build neuron-blackwell (push) Successful in 2m59s
build-prerelease / Package cortex RPM (push) Successful in 1m20s
build-prerelease / Package helexa-bench RPM (push) Successful in 1m19s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 1m39s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 1m39s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 1m42s
feat(bench): read-only JSON API on bob + bench/ React visualisation app
Part A — helexa-bench read API:
- [api] config (enabled, listen :13132); WAL on the store so API reads
  never block the sweep writer.
- store read methods: summary, series (chronological per-build medians),
  runs (filtered), dimensions, run_count.
- api.rs: axum /api/health|dimensions|summary|series|runs, permissive
  CORS (UI is a separate origin). The `run` daemon binds the API
  alongside the sweep; new `serve` subcommand serves API-only.
- listener plumbing (bench gains a port): data/helexa-bench-firewalld.xml,
  spec install, deploy-bench /api/health probe + firewalld step, sudoers
  firewall-cmd grants, [api] in example + bob.toml.
- 5 API tests + serve smoke.

Part B — bench/ Vite + React-SWC-TS app (router, react-bootstrap,
recharts): Overview (summary table), Trends (decode tok/s & TTFT across
build SHAs), Runs (filterable explorer). Typed API client with
VITE_API_BASE + dev proxy to bob. npm build/typecheck clean. Hosted
separately from the API (per design); .gitignore excludes node_modules/dist.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 11:26:55 +03:00

134 lines
4.6 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! End-to-end sweep against a mock neuron: a sweep records samples, a
//! second sweep skips the satisfied cell, and bumping the reported build
//! SHA resumes fresh sampling.
use axum::Router;
use axum::extract::State;
use axum::http::header;
use axum::response::{IntoResponse, Json};
use axum::routing::{get, post};
use helexa_bench::config::{BenchConfig, BenchSettings, ScenarioConfig, TargetConfig, TargetKind};
use helexa_bench::sweep::Sweeper;
use serde_json::json;
use std::sync::{Arc, Mutex};
#[derive(Clone)]
struct MockState {
sha: Arc<Mutex<String>>,
}
async fn version(State(s): State<MockState>) -> Json<serde_json::Value> {
let sha = s.sha.lock().unwrap().clone();
Json(json!({
"package_version": "0.1.16",
"git_sha": sha,
"git_dirty": false,
"features": ["cuda", "cudnn"],
"candle_version": "0.10.2",
}))
}
async fn discovery() -> Json<serde_json::Value> {
Json(json!({
"hostname": "mock-beast",
"os": "Linux",
"kernel": "6.19.0",
"cuda_version": "13.0",
"driver_version": "580.159",
"devices": [{"index": 0, "name": "RTX 5090", "vram_total_mb": 32614, "compute_capability": "12.0"}],
"harnesses": ["candle"],
}))
}
async fn models() -> Json<serde_json::Value> {
Json(json!([
{"id": "Qwen/Qwen3.6-27B", "harness": "candle", "status": "loaded", "devices": [0], "capabilities": ["text"]},
// A non-warm model the bench must ignore.
{"id": "Qwen/cold", "harness": "candle", "status": "recovering", "devices": [0]},
]))
}
async fn chat() -> impl IntoResponse {
let body = concat!(
"data: {\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"finish_reason\":null}]}\n\n",
"data: {\"choices\":[{\"index\":0,\"delta\":{\"content\":\" world\"},\"finish_reason\":null}]}\n\n",
"data: {\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":130,\"completion_tokens\":2,\"total_tokens\":132}}\n\n",
"data: [DONE]\n\n",
);
([(header::CONTENT_TYPE, "text/event-stream")], body)
}
async fn spawn_mock(sha: &str) -> (String, Arc<Mutex<String>>) {
let shared = Arc::new(Mutex::new(sha.to_string()));
let state = MockState {
sha: shared.clone(),
};
let app = Router::new()
.route("/version", get(version))
.route("/discovery", get(discovery))
.route("/models", get(models))
.route("/v1/chat/completions", post(chat))
.with_state(state);
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move {
axum::serve(listener, app).await.unwrap();
});
(format!("http://{addr}"), shared)
}
fn config_for(endpoint: String, db_path: String) -> BenchConfig {
BenchConfig {
bench: BenchSettings {
sweep_interval_secs: 1,
samples_per_version: 2,
iteration_pause_secs: 0,
request_timeout_secs: 30,
db_path,
},
scenarios: ScenarioConfig {
prompt_sizes: vec![128], // single scenario keeps assertions simple
max_tokens: 16,
},
api: Default::default(),
targets: vec![TargetConfig {
name: "mock".into(),
kind: TargetKind::Neuron,
endpoint,
label: None,
}],
}
}
#[tokio::test]
async fn sweep_records_skips_and_resumes_on_new_sha() {
let (endpoint, sha_handle) = spawn_mock("aaaaaaa").await;
// Unique db path per run (bound port is unique).
let port = endpoint.rsplit(':').next().unwrap();
let db_path = std::env::temp_dir().join(format!("helexa-bench-it-{port}.sqlite"));
let _ = std::fs::remove_file(&db_path);
let db_str = db_path.to_string_lossy().to_string();
let sweeper = Sweeper::new(config_for(endpoint, db_str)).unwrap();
// First sweep: one warm model × one scenario × 2 samples.
let s1 = sweeper.run_once().await.unwrap();
assert_eq!(s1.measured, 2, "should record samples_per_version samples");
assert_eq!(s1.skipped, 0);
assert_eq!(s1.failed, 0);
// Second sweep at same SHA: cell satisfied, nothing measured.
let s2 = sweeper.run_once().await.unwrap();
assert_eq!(s2.measured, 0, "satisfied cell must be skipped");
assert_eq!(s2.skipped, 1);
// Bump the reported build SHA: a new cell → fresh sampling resumes.
*sha_handle.lock().unwrap() = "bbbbbbb".to_string();
let s3 = sweeper.run_once().await.unwrap();
assert_eq!(s3.measured, 2, "new SHA must resume sampling");
assert_eq!(s3.skipped, 0);
let _ = std::fs::remove_file(&db_path);
}