Replace NodeConfig (static vram_mb, pinned) with NeuronEndpoint.
Hardware discovery and model pinning now come from neuron API and
models.toml catalogue respectively.
- config.rs: nodes -> neurons, add models_config path
- catalogue.rs: ModelProfile with pinned_on, ModelCatalogue
- poller.rs: poll neuron GET /models (ModelInfo format)
- router.rs: resolve inference endpoint via neuron GET /models/{id}/endpoint
- evictor.rs: call neuron POST /models/unload
- node.rs: remove vram_mb, pinned fields (come from discovery/catalogue)
- All 22 gateway tests updated to mock neuron API
- Remove MistralModelsResponse, ModelLifecycleRequest (no longer needed)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
30 lines
692 B
TOML
30 lines
692 B
TOML
[package]
|
|
name = "cortex-gateway"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
license.workspace = true
|
|
|
|
[dependencies]
|
|
cortex-core.workspace = true
|
|
tokio.workspace = true
|
|
axum.workspace = true
|
|
tower.workspace = true
|
|
tower-http.workspace = true
|
|
serde.workspace = true
|
|
serde_json.workspace = true
|
|
reqwest.workspace = true
|
|
tracing.workspace = true
|
|
metrics.workspace = true
|
|
metrics-exporter-prometheus.workspace = true
|
|
chrono.workspace = true
|
|
anyhow.workspace = true
|
|
thiserror.workspace = true
|
|
futures.workspace = true
|
|
tokio-stream.workspace = true
|
|
eventsource-stream.workspace = true
|
|
bytes = "1"
|
|
urlencoding = "2"
|
|
|
|
[dev-dependencies]
|
|
tokio = { workspace = true, features = ["test-util"] }
|