diff --git a/CLAUDE.md b/CLAUDE.md index db353ea..ec5398f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -125,7 +125,8 @@ automatically. Clippy warnings must be resolved, not suppressed with - One or more GPU nodes running mistral.rs on port 8080 - Optionally a metrics-only node (no GPU) for Prometheus/Grafana - Each node runs `mistralrs serve` on port 8080 -- Gateway listens on port 8000 (API) and 9100 (metrics) +- Gateway listens on port 31313 (API) and 31314 (metrics) +- neuron listens on port 13131 on each GPU host - TLS terminated at gateway or via nginx; internal traffic is plaintext over WireGuard ## Conventions @@ -380,7 +381,7 @@ processes (one process per loaded model, each on its own port). ## neuron API -neuron exposes an HTTP API on port 9090 that cortex polls and calls. +neuron exposes an HTTP API on port 13131 that cortex polls and calls. ``` GET /discovery @@ -424,8 +425,8 @@ endpoint. cortex.toml shrinks to: ```toml [gateway] -listen = "0.0.0.0:8000" -metrics_listen = "0.0.0.0:9100" +listen = "0.0.0.0:31313" +metrics_listen = "0.0.0.0:31314" [eviction] strategy = "lru" @@ -433,15 +434,15 @@ defrag_after_cycles = 50 [[neurons]] name = "beast" -endpoint = "http://beast.hanzalova.internal:9090" +endpoint = "http://beast.hanzalova.internal:13131" [[neurons]] name = "benjy" -endpoint = "http://benjy.kosherinata.internal:9090" +endpoint = "http://benjy.hanzalova.internal:13131" [[neurons]] name = "quadbrat" -endpoint = "http://quadbrat.hanzalova.internal:9090" +endpoint = "http://quadbrat.hanzalova.internal:13131" ``` On startup and periodically, cortex calls `GET /discovery` and @@ -521,7 +522,7 @@ cortex/ │ │ └── metrics.rs # prometheus exporter (unchanged) │ ├── neuron/ # node plane (replaces cortex-agent) │ │ └── src/ -│ │ ├── main.rs # binary entrypoint, axum server on :9090 +│ │ ├── main.rs # binary entrypoint, axum server on :13131 │ │ ├── discovery.rs # nvidia-smi, device enumeration │ │ ├── health.rs # runtime GPU polling │ │ ├── api.rs # HTTP handlers for /discovery, /models, etc. diff --git a/README.md b/README.md index 34256ff..ef44918 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,8 @@ WantedBy=multi-user.target ```toml # cortex.toml [gateway] -listen = "0.0.0.0:8000" -metrics_listen = "0.0.0.0:9100" +listen = "0.0.0.0:31313" +metrics_listen = "0.0.0.0:31314" [eviction] strategy = "lru" # lru | priority @@ -143,7 +143,7 @@ cortex serve --config cortex.toml cortex status # list all models across nodes -curl http://localhost:8000/v1/models +curl http://localhost:31313/v1/models ``` ## License diff --git a/cortex.example.toml b/cortex.example.toml index 60a1e68..7eb7058 100644 --- a/cortex.example.toml +++ b/cortex.example.toml @@ -3,11 +3,11 @@ # Copy to cortex.toml and adjust for your environment. # # Environment variable overrides use CORTEX_ prefix with __ separators: -# CORTEX_GATEWAY__LISTEN=0.0.0.0:9000 +# CORTEX_GATEWAY__LISTEN=0.0.0.0:31313 [gateway] -listen = "0.0.0.0:8000" -metrics_listen = "0.0.0.0:9100" +listen = "0.0.0.0:31313" +metrics_listen = "0.0.0.0:31314" [eviction] strategy = "lru" diff --git a/crates/cortex-cli/src/main.rs b/crates/cortex-cli/src/main.rs index d1ff992..ec120a4 100644 --- a/crates/cortex-cli/src/main.rs +++ b/crates/cortex-cli/src/main.rs @@ -23,7 +23,7 @@ enum Commands { /// Print the fleet status (models, nodes, health). Status { /// Gateway API endpoint to query. - #[arg(short, long, default_value = "http://localhost:8000")] + #[arg(short, long, default_value = "http://localhost:31313")] endpoint: String, }, } diff --git a/crates/cortex-core/src/config.rs b/crates/cortex-core/src/config.rs index 0056755..6d03f96 100644 --- a/crates/cortex-core/src/config.rs +++ b/crates/cortex-core/src/config.rs @@ -22,9 +22,9 @@ fn default_models_path() -> String { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GatewaySettings { - /// Address to listen on for API requests (e.g. "0.0.0.0:8000") + /// Address to listen on for API requests (e.g. "0.0.0.0:31313") pub listen: String, - /// Address to listen on for Prometheus metrics (e.g. "0.0.0.0:9100") + /// Address to listen on for Prometheus metrics (e.g. "0.0.0.0:31314") pub metrics_listen: String, } @@ -50,7 +50,7 @@ pub enum EvictionStrategy { pub struct NeuronEndpoint { /// Human-readable node name (e.g. "beast") pub name: String, - /// Base URL of the neuron daemon (e.g. "http://beast.internal:9090") + /// Base URL of the neuron daemon (e.g. "http://beast.internal:13131") pub endpoint: String, } @@ -70,8 +70,8 @@ impl Default for GatewayConfig { fn default() -> Self { Self { gateway: GatewaySettings { - listen: "0.0.0.0:8000".into(), - metrics_listen: "0.0.0.0:9100".into(), + listen: "0.0.0.0:31313".into(), + metrics_listen: "0.0.0.0:31314".into(), }, eviction: EvictionSettings { strategy: EvictionStrategy::Lru, diff --git a/crates/cortex-core/src/node.rs b/crates/cortex-core/src/node.rs index 21fd9a8..860926a 100644 --- a/crates/cortex-core/src/node.rs +++ b/crates/cortex-core/src/node.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; #[derive(Debug, Clone)] pub struct NodeState { pub name: String, - /// Base URL of the neuron daemon (e.g. "http://beast.internal:9090"). + /// Base URL of the neuron daemon (e.g. "http://beast.internal:13131"). pub endpoint: String, pub healthy: bool, pub models: HashMap, diff --git a/crates/neuron/src/config.rs b/crates/neuron/src/config.rs index ff282ef..36177d9 100644 --- a/crates/neuron/src/config.rs +++ b/crates/neuron/src/config.rs @@ -17,7 +17,7 @@ pub struct NeuronConfig { } fn default_port() -> u16 { - 9090 + 13131 } impl NeuronConfig { @@ -33,7 +33,7 @@ impl NeuronConfig { impl Default for NeuronConfig { fn default() -> Self { Self { - port: 9090, + port: 13131, harnesses: vec![], } } diff --git a/neuron.example.toml b/neuron.example.toml index d9e4a69..7bf1220 100644 --- a/neuron.example.toml +++ b/neuron.example.toml @@ -3,9 +3,9 @@ # Copy to /etc/neuron/neuron.toml and adjust for your environment. # # Environment variable overrides use NEURON_ prefix with __ separators: -# NEURON_PORT=9090 +# NEURON_PORT=13131 -port = 9090 +port = 13131 # -- Harnesses --------------------------------------------------------------- # Each [[harnesses]] entry declares an inference engine managed by neuron.