Previous defaults collided with well-trodden infra services and with the Linux ephemeral port range: - cortex API 8000 — common dev-server default (Django, minio UI) - cortex metrics 9100 — Prometheus node_exporter default - neuron API 9090 — Cockpit default on Fedora, Prometheus self Move to helexa-themed palindromic ports, all below Linux's 32768-60999 ephemeral range and not registered to any well-known service: - cortex API 31313 - cortex metrics 31314 - neuron API 13131 Updated places: - cortex.example.toml, neuron.example.toml defaults - default impls in cortex-core and neuron config - cortex-cli --endpoint default for the status subcommand - doc comments citing example URLs - README.md and CLAUDE.md snippets Consumers already on the old ports need a one-line edit in their /etc/cortex/cortex.toml or /etc/neuron/neuron.toml to match; firewall rules and prometheus scrape configs will also need updating. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
54 lines
1.6 KiB
Rust
54 lines
1.6 KiB
Rust
use chrono::{DateTime, Utc};
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
|
|
/// Runtime state of a single neuron in the fleet.
|
|
#[derive(Debug, Clone)]
|
|
pub struct NodeState {
|
|
pub name: String,
|
|
/// Base URL of the neuron daemon (e.g. "http://beast.internal:13131").
|
|
pub endpoint: String,
|
|
pub healthy: bool,
|
|
pub models: HashMap<String, ModelEntry>,
|
|
/// Number of load/unload cycles since last process restart.
|
|
pub lifecycle_cycles: u32,
|
|
pub last_poll: Option<DateTime<Utc>>,
|
|
}
|
|
|
|
/// A model registered on a node, with its runtime status.
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ModelEntry {
|
|
pub id: String,
|
|
pub status: ModelStatus,
|
|
/// When this model was last used (for LRU eviction).
|
|
pub last_accessed: Option<DateTime<Utc>>,
|
|
/// Estimated VRAM usage in MB when loaded.
|
|
pub vram_estimate_mb: Option<u64>,
|
|
}
|
|
|
|
/// Model lifecycle status.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
#[serde(rename_all = "lowercase")]
|
|
pub enum ModelStatus {
|
|
Loaded,
|
|
Unloaded,
|
|
Reloading,
|
|
}
|
|
|
|
/// Unified model entry as exposed by the gateway's `/v1/models` endpoint.
|
|
/// Includes which node(s) host this model and their status.
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct CortexModelEntry {
|
|
pub id: String,
|
|
pub object: String,
|
|
/// Which nodes have this model (and their status).
|
|
pub locations: Vec<ModelLocation>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ModelLocation {
|
|
pub node: String,
|
|
pub status: ModelStatus,
|
|
pub vram_estimate_mb: Option<u64>,
|
|
}
|