All checks were successful
CI / CUDA type-check (push) Successful in 1m36s
CI / Format (push) Successful in 31s
CI / Clippy (push) Successful in 2m47s
CI / Test (push) Successful in 4m33s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
Adds automated, longitudinal performance tracking across neuron builds,
replacing manual script/bench.py runs and hand edits to benchmarks.md.
neuron build metadata + GET /version:
- cortex-core: shared BuildInfo type (build_info.rs).
- neuron build.rs captures git SHA (preferring injected HELEXA_BUILD_SHA,
else git, else "unknown"), dirty flag, build timestamp, rustc version,
profile, target, enabled cargo features, and best-effort candle-core
version from Cargo.lock.
- New GET /version endpoint (version.rs) + clap --version long form.
- SHA injected in CI (build-neuron step) and helexa-neuron.spec
(%{?helexa_commit}) so tarball RPMs report the real SHA. /version is
now the canonical "which build is live" probe.
helexa-bench crate:
- Continuous daemon: hits each neuron directly on :13131, exercises each
warm (status==loaded) model, records every run into a SQLite
system-of-record stamped with the neuron's full BuildInfo.
- Version-aware: skips any (target, build SHA, model, scenario) cell
already at samples_per_version, so a steady fleet costs only cheap
/version + /models polls until a new SHA ships.
- Extensible Scenario trait; phase-1 chat-latency family ported verbatim
from bench.py (synthetic 128/4096-tok prompts, /no_think, streamed
TTFT + decode-window tok/s). `report` regenerates the benchmarks table.
- kind="openai" comparison targets scaffolded, not yet wired.
Packaging: data/helexa-bench.service (+ sysusers), prebuilt-binary RPM
spec (outbound-only, no firewalld), and build/package/publish wiring in
build-prerelease.yml with change detection.
Tests: cortex-core BuildInfo round-trip, neuron GET /version integration,
helexa-bench unit (prompt/SSE/config/store) + end-to-end sweep
(record -> skip -> resume on new SHA). Docs updated (benchmarks.md,
CLAUDE.md addendum).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
74 lines
1.9 KiB
TOML
74 lines
1.9 KiB
TOML
[workspace]
|
|
resolver = "2"
|
|
members = [
|
|
"crates/cortex-core",
|
|
"crates/cortex-gateway",
|
|
"crates/cortex-cli",
|
|
"crates/neuron",
|
|
"crates/helexa-acp",
|
|
"crates/helexa-bench",
|
|
]
|
|
|
|
[workspace.package]
|
|
version = "0.1.16"
|
|
edition = "2024"
|
|
license = "GPL-3.0-or-later"
|
|
repository = "https://git.lair.cafe/helexa/helexa"
|
|
|
|
[workspace.dependencies]
|
|
# async runtime
|
|
tokio = { version = "1", features = ["full"] }
|
|
|
|
# web framework
|
|
axum = { version = "0.8", features = ["macros"] }
|
|
tower = "0.5"
|
|
tower-http = { version = "0.6", features = ["cors", "trace", "timeout"] }
|
|
|
|
# serialization
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
toml = "0.8"
|
|
|
|
# http client (for proxying to neuron backends)
|
|
reqwest = { version = "0.12", features = ["json", "stream"] }
|
|
|
|
# observability
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
|
metrics = "0.24"
|
|
metrics-exporter-prometheus = "0.16"
|
|
|
|
# time
|
|
chrono = { version = "0.4", features = ["serde"] }
|
|
|
|
# config
|
|
figment = { version = "0.10", features = ["toml", "env"] }
|
|
|
|
# error handling
|
|
anyhow = "1"
|
|
thiserror = "2"
|
|
|
|
# async traits
|
|
async-trait = "0.1"
|
|
|
|
# CLI
|
|
clap = { version = "4", features = ["derive"] }
|
|
|
|
# futures / streams (for SSE proxying)
|
|
futures = "0.3"
|
|
tokio-stream = "0.1"
|
|
eventsource-stream = "0.2"
|
|
|
|
# workspace crates
|
|
cortex-core = { path = "crates/cortex-core" }
|
|
cortex-gateway = { path = "crates/cortex-gateway" }
|
|
|
|
# Patched cudarc (affects neuron's 0.19.x only; candle's 0.17.x is
|
|
# untouched since the fork is 0.19.7 and doesn't satisfy a 0.17 req). Adds
|
|
# Comm::abort / get_async_error / raw comm() — needed for #17 Stage 2 TP
|
|
# hang-recovery (abort a wedged collective from another thread, then
|
|
# rebuild the comm). Pinned to a fork revision pending upstream review
|
|
# (grenade/cudarc @ nccl-comm-abort).
|
|
[patch.crates-io]
|
|
cudarc = { git = "https://github.com/grenade/cudarc", rev = "63327a256059f8252641ae46c6bb9eefe707f382" }
|