feat: add neuron daemon with GPU discovery and health endpoints

Replace cortex-agent stub with neuron (cortex-neuron binary). cortex-core additions: - discovery.rs: DeviceInfo, DiscoveryResponse, DeviceHealth, HealthResponse - harness.rs: Harness async trait, HarnessConfig, ModelSpec, ModelInfo neuron crate (crates/neuron/): - discovery.rs: nvidia-smi CSV parsing (pure functions) + system discovery via uname/nvidia-smi/nvcc - health.rs: cached GPU health polling every 5s - api.rs: GET /discovery and GET /health axum handlers - main.rs: CLI entrypoint with --port flag (default 9090) - harness stubs for mistralrs (Phase 8) and llamacpp (Phase 11) 12 new tests (9 unit + 3 integration), 35 total. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 14:23:42 +03:00
parent 67b9b044d3
commit 6dc717ebcd
22 changed files with 1239 additions and 112 deletions
--- a/crates/neuron/src/main.rs
+++ b/crates/neuron/src/main.rs
@@ -0,0 +1,60 @@
+use anyhow::Result;
+use clap::Parser;
+use cortex_neuron::{api, discovery, health};
+use std::sync::Arc;
+use std::time::Instant;
+use tracing_subscriber::EnvFilter;
+
+#[derive(Parser)]
+#[command(name = "cortex-neuron")]
+#[command(about = "Per-node daemon for cortex inference clusters")]
+#[command(version)]
+struct Args {
+    /// Port to listen on.
+    #[arg(short, long, default_value = "9090")]
+    port: u16,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    tracing_subscriber::fmt()
+        .with_env_filter(
+            EnvFilter::try_from_default_env()
+                .unwrap_or_else(|_| EnvFilter::new("info,cortex_neuron=debug")),
+        )
+        .init();
+
+    let args = Args::parse();
+    let start_time = Instant::now();
+
+    tracing::info!("running hardware discovery");
+    let discovery_result = discovery::discover_system().await?;
+    tracing::info!(
+        hostname = %discovery_result.hostname,
+        devices = discovery_result.devices.len(),
+        "discovery complete"
+    );
+
+    let health_cache = Arc::new(health::HealthCache::new());
+    health_cache
+        .set_has_gpus(!discovery_result.devices.is_empty())
+        .await;
+
+    let poller_cache = Arc::clone(&health_cache);
+    tokio::spawn(async move {
+        poller_cache.poll_loop(start_time).await;
+    });
+
+    let state = Arc::new(api::NeuronState {
+        discovery: discovery_result,
+        health_cache,
+    });
+
+    let app = api::neuron_routes().with_state(state);
+    let addr: std::net::SocketAddr = format!("0.0.0.0:{}", args.port).parse()?;
+    tracing::info!("cortex-neuron listening on {addr}");
+    let listener = tokio::net::TcpListener::bind(addr).await?;
+    axum::serve(listener, app).await?;
+
+    Ok(())
+}