Files
cortex/crates/cortex-core/src/config.rs
rob thijssen e42e8ee81f
All checks were successful
CI / Format, lint, build, test (push) Successful in 2m46s
CI / Build SRPM (push) Has been skipped
CI / Publish to COPR (push) Has been skipped
refactor: cortex talks to neurons instead of mistral.rs directly
Replace NodeConfig (static vram_mb, pinned) with NeuronEndpoint.
Hardware discovery and model pinning now come from neuron API and
models.toml catalogue respectively.

- config.rs: nodes -> neurons, add models_config path
- catalogue.rs: ModelProfile with pinned_on, ModelCatalogue
- poller.rs: poll neuron GET /models (ModelInfo format)
- router.rs: resolve inference endpoint via neuron GET /models/{id}/endpoint
- evictor.rs: call neuron POST /models/unload
- node.rs: remove vram_mb, pinned fields (come from discovery/catalogue)
- All 22 gateway tests updated to mock neuron API
- Remove MistralModelsResponse, ModelLifecycleRequest (no longer needed)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 14:42:52 +03:00

85 lines
2.5 KiB
Rust

use figment::{
Figment,
providers::{Env, Format, Toml},
};
use serde::{Deserialize, Serialize};
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GatewayConfig {
pub gateway: GatewaySettings,
pub eviction: EvictionSettings,
/// Neuron endpoints (replaces old NodeConfig with static vram_mb/pinned).
pub neurons: Vec<NeuronEndpoint>,
/// Path to the model catalogue file (default: "models.toml").
#[serde(default = "default_models_path")]
pub models_config: String,
}
fn default_models_path() -> String {
"models.toml".into()
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GatewaySettings {
/// Address to listen on for API requests (e.g. "0.0.0.0:8000")
pub listen: String,
/// Address to listen on for Prometheus metrics (e.g. "0.0.0.0:9100")
pub metrics_listen: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvictionSettings {
/// Eviction strategy: "lru" or "priority"
pub strategy: EvictionStrategy,
/// Number of load/unload cycles before flagging for defrag. 0 = never.
#[serde(default)]
pub defrag_after_cycles: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EvictionStrategy {
Lru,
Priority,
}
/// A neuron endpoint in the fleet. Hardware details come from
/// neuron's /discovery endpoint, not from config.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NeuronEndpoint {
/// Human-readable node name (e.g. "beast")
pub name: String,
/// Base URL of the neuron daemon (e.g. "http://beast.internal:9090")
pub endpoint: String,
}
impl GatewayConfig {
/// Load configuration from a TOML file, with environment variable overrides.
/// Env vars are prefixed with `CORTEX_` and use `__` as a separator.
pub fn load(path: impl AsRef<Path>) -> Result<Self, Box<figment::Error>> {
Figment::new()
.merge(Toml::file(path))
.merge(Env::prefixed("CORTEX_").split("__"))
.extract()
.map_err(Box::new)
}
}
impl Default for GatewayConfig {
fn default() -> Self {
Self {
gateway: GatewaySettings {
listen: "0.0.0.0:8000".into(),
metrics_listen: "0.0.0.0:9100".into(),
},
eviction: EvictionSettings {
strategy: EvictionStrategy::Lru,
defrag_after_cycles: 50,
},
neurons: vec![],
models_config: default_models_path(),
}
}
}