feat(cortex-gateway): C3 — propagate vision capabilities through /v1/models
ModelEntry and CortexModelEntry gain a `capabilities: Vec<String>` field (serde-default for back-compat). The poller copies it verbatim from each neuron's ModelInfo.capabilities; list_models computes the union across every node where a model is loaded so a checkpoint loaded text-only on one neuron and text+vision on another reports both to the fleet. Catalogue-only and mid-prewarm entries default to empty until the catalogue gains a capabilities declaration. Aliases inherit their target's capability union. New gateway test mocks two nodes with differing capability arrays and asserts the unioned /v1/models response. Closes part of #16 (Stage C3). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -414,6 +414,9 @@ async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
|
||||
loaded: false,
|
||||
feasible_on,
|
||||
locations: Vec::new(),
|
||||
// Catalogue profiles don't declare capabilities yet;
|
||||
// the union is filled in Pass 2 from loaded locations.
|
||||
capabilities: Vec::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -438,6 +441,14 @@ async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
|
||||
if was_loaded {
|
||||
e.loaded = true;
|
||||
}
|
||||
// Union the per-node capabilities so a model loaded
|
||||
// on several neurons reports every modality any of
|
||||
// them advertises.
|
||||
for cap in &entry.capabilities {
|
||||
if !e.capabilities.contains(cap) {
|
||||
e.capabilities.push(cap.clone());
|
||||
}
|
||||
}
|
||||
})
|
||||
.or_insert_with(|| CortexModelEntry {
|
||||
id: model_id.clone(),
|
||||
@@ -449,6 +460,7 @@ async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
|
||||
// feasibility; leave empty.
|
||||
feasible_on: Vec::new(),
|
||||
locations: vec![location],
|
||||
capabilities: entry.capabilities.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -498,6 +510,9 @@ async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
|
||||
loaded: false,
|
||||
feasible_on: Vec::new(),
|
||||
locations: vec![location],
|
||||
// A model that's only mid-prewarm has no loaded
|
||||
// location to read capabilities from yet.
|
||||
capabilities: Vec::new(),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -527,6 +542,7 @@ async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
|
||||
loaded: target_entry.loaded,
|
||||
feasible_on: target_entry.feasible_on,
|
||||
locations: target_entry.locations,
|
||||
capabilities: target_entry.capabilities,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -107,12 +107,14 @@ async fn poll_neuron(fleet: &CortexState, name: &str, endpoint: &str) {
|
||||
.and_modify(|e| {
|
||||
e.status = status;
|
||||
e.vram_estimate_mb = upstream.vram_used_mb;
|
||||
e.capabilities = upstream.capabilities.clone();
|
||||
})
|
||||
.or_insert_with(|| ModelEntry {
|
||||
id: upstream.id.clone(),
|
||||
status,
|
||||
last_accessed: None,
|
||||
vram_estimate_mb: upstream.vram_used_mb,
|
||||
capabilities: upstream.capabilities.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -244,6 +244,7 @@ async fn cold_load(
|
||||
status: ModelStatus::Loaded,
|
||||
last_accessed: Some(chrono::Utc::now()),
|
||||
vram_estimate_mb: profile.vram_mb,
|
||||
capabilities: Vec::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user