From d0292ed37740bc0bca8d8b5996c36c27bc25de7a Mon Sep 17 00:00:00 2001
From: rob thijssen <grenade@rob.tn>
Date: Mon, 1 Jun 2026 14:53:58 +0300
Subject: [PATCH] feat(cortex): catalogue source field + scheme-qualified
 /models/load
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 3 of plan-source-aware-loader-preflight. Adds an optional
`source` field to `ModelProfile` and threads it through the
router's cold-load path so a profile pointing at the helexa
registry forwards `helexa:<id>` to neuron's `/models/load`
instead of leaving neuron to substitute its `default_source`
(typically `huggingface`).

Without this, an operator who declares
`source = "helexa"` in models.toml would still see neuron fetch
from HuggingFace — the catalogue → ModelSpec translation in
`profile_to_spec` was dropping the scheme on the floor.

What lands:

- `cortex-core::catalogue::ModelProfile.source: Option<String>`.
  None is the default and preserves pre-Phase-3 behaviour.
- `cortex-gateway::router::qualified_model_id(profile)` —
  small pure helper, extracted from `profile_to_spec` so it can
  be unit-tested. Empty-string `source` is treated as None so
  operators who blank out a previously-set value don't trip a
  scheme-with-no-scheme failure mode in neuron.
- `models.example.toml` documents the new field with a
  commented-out helexa-scheme example pointing back at
  neuron.example.toml's matching sources block.

Tests:

- 2 new unit tests in `cortex-core::catalogue`: source-absent
  round-trip and source-present round-trip through TOML.
- 3 new unit tests in `cortex-gateway::router`: pass-through
  when None, prefix when Some, pass-through on empty-string
  source.
- ModelProfile literal in catalogue's existing test updated to
  carry `source: None`.

CI gate: cargo fmt --check, cargo clippy --workspace
--all-targets -- -D warnings, cargo test --workspace
(24 test groups ok, zero failures).

Completes Phase 3. With Phases 1+2+3 landed:
- neuron parses `scheme:org/name`, routes per-source hf-hub
  Api with disambiguated cache.
- preflight returns structured errors before any device
  allocation.
- cortex catalogue declares per-model source jurisdiction
  and forwards it to neuron.

The registry itself (registry.helexa.ai service, MinIO,
nginx, mirror fabric) is the next moving piece — landing
under a separate project per the design discussion.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/cortex-core/src/catalogue.rs | 35 ++++++++++++++++++
 crates/cortex-gateway/src/router.rs | 56 +++++++++++++++++++++++++++--
 models.example.toml                 | 22 +++++++++++-
 3 files changed, 110 insertions(+), 3 deletions(-)
diff --git a/crates/cortex-core/src/catalogue.rs b/crates/cortex-core/src/catalogue.rs
index ac0e028..e606cce 100644
--- a/crates/cortex-core/src/catalogue.rs
+++ b/crates/cortex-core/src/catalogue.rs
@@ -24,6 +24,17 @@ pub struct ModelProfile {
     /// Neurons where this model should never be evicted.
     #[serde(default)]
     pub pinned_on: Vec<String>,
+    /// Source scheme this profile's weights come from. When set, the
+    /// router prefixes `id` with `scheme:` before forwarding the load
+    /// request to neuron, ensuring the daemon fetches from the right
+    /// registry regardless of which entry happens to match `id`.
+    ///
+    /// `None` lets neuron substitute its own `default_source` (typically
+    /// `huggingface`). Set to `"helexa"` when the model is hosted in
+    /// the helexa registry — operator-procurement-grade audit relies
+    /// on this being explicit per model rather than implicit.
+    #[serde(default)]
+    pub source: Option<String>,
 }
 
 fn default_min_devices() -> u32 {
@@ -140,6 +151,7 @@ mod tests {
             min_devices: 2,
             min_device_vram_mb: Some(24_000),
             pinned_on: vec![],
+            source: None,
         }
     }
 
@@ -197,6 +209,29 @@ mod tests {
         assert_eq!(cat.resolve_alias("Qwen/Qwen3-8B"), "Qwen/Qwen3-8B");
     }
 
+    #[test]
+    fn source_defaults_to_none_when_absent_from_toml() {
+        let src = r#"
+[[models]]
+id = "Qwen/Qwen3-30B"
+harness = "candle"
+"#;
+        let cat: ModelCatalogue = toml::from_str(src).expect("parse models table");
+        assert!(cat.models[0].source.is_none());
+    }
+
+    #[test]
+    fn source_round_trips_through_toml() {
+        let src = r#"
+[[models]]
+id = "Helexa/Qwen3.6-27B-Uncensored"
+harness = "candle"
+source = "helexa"
+"#;
+        let cat: ModelCatalogue = toml::from_str(src).expect("parse models table");
+        assert_eq!(cat.models[0].source.as_deref(), Some("helexa"));
+    }
+
     #[test]
     fn aliases_table_round_trips_through_toml() {
         let src = r#"
diff --git a/crates/cortex-gateway/src/router.rs b/crates/cortex-gateway/src/router.rs
index 5e59757..eeb20b4 100644
--- a/crates/cortex-gateway/src/router.rs
+++ b/crates/cortex-gateway/src/router.rs
@@ -292,7 +292,7 @@ async fn profile_to_spec(
     };
 
     ModelSpec {
-        model_id: profile.id.clone(),
+        model_id: qualified_model_id(profile),
         harness: profile.harness.clone(),
         quant: profile.quant.clone(),
         tensor_parallel,
@@ -300,6 +300,22 @@ async fn profile_to_spec(
     }
 }
 
+/// Prefix the catalogue id with the scheme when one is declared, so
+/// neuron resolves the load against the right registry. Without this,
+/// a profile pointing at the helexa registry would resolve via
+/// neuron's `default_source` (typically `huggingface`) and fetch
+/// bytes from the wrong place. Profiles that omit `source` continue
+/// to pass the bare id through, preserving the pre-Phase-3 contract.
+///
+/// Stays at module scope (not nested in `profile_to_spec`) so the unit
+/// tests can exercise it without spinning up CortexState topology.
+fn qualified_model_id(profile: &ModelProfile) -> String {
+    match profile.source.as_deref() {
+        Some(scheme) if !scheme.is_empty() => format!("{scheme}:{}", profile.id),
+        _ => profile.id.clone(),
+    }
+}
+
 /// Resolve neuron's `/models/{id}/endpoint` to its inference URL and
 /// build the final `RouteDecision`. Shared by all three priority
 /// branches above.
@@ -375,7 +391,43 @@ fn rewrite_loopback_host(inference_url: &str, neuron_endpoint: &str) -> Option<S
 
 #[cfg(test)]
 mod tests {
-    use super::rewrite_loopback_host;
+    use super::{ModelProfile, qualified_model_id, rewrite_loopback_host};
+
+    fn bare_profile(id: &str, source: Option<&str>) -> ModelProfile {
+        ModelProfile {
+            id: id.into(),
+            harness: "candle".into(),
+            quant: None,
+            vram_mb: None,
+            min_devices: 1,
+            min_device_vram_mb: None,
+            pinned_on: vec![],
+            source: source.map(String::from),
+        }
+    }
+
+    #[test]
+    fn qualified_id_passes_through_when_source_absent() {
+        let p = bare_profile("Qwen/Qwen3-30B", None);
+        assert_eq!(qualified_model_id(&p), "Qwen/Qwen3-30B");
+    }
+
+    #[test]
+    fn qualified_id_prefixes_when_source_set() {
+        let p = bare_profile("Helexa/Qwen3.6-27B-Uncensored", Some("helexa"));
+        assert_eq!(
+            qualified_model_id(&p),
+            "helexa:Helexa/Qwen3.6-27B-Uncensored"
+        );
+    }
+
+    #[test]
+    fn qualified_id_passes_through_when_source_is_empty_string() {
+        // An empty scheme is treated as absent — neuron's default_source
+        // substitution kicks in.
+        let p = bare_profile("Qwen/Qwen3-30B", Some(""));
+        assert_eq!(qualified_model_id(&p), "Qwen/Qwen3-30B");
+    }
 
     #[test]
     fn rewrites_localhost_keeps_port_and_path() {
diff --git a/models.example.toml b/models.example.toml
index 12da8a5..eb2dca1 100644
--- a/models.example.toml
+++ b/models.example.toml
@@ -7,7 +7,8 @@
 # returns and what the router can cold-load on demand.
 #
 # Field reference:
-#   id                 - HuggingFace model id, exact match.
+#   id                 - Repo id in the source registry (e.g. "Qwen/Qwen3.6-27B").
+#                        Exact match.
 #   harness            - which engine handles inference (currently "candle").
 #   quant              - GGUF quantisation tag for the file in the HF repo
 #                        (e.g. "Q4_K_M"). Omit/empty for the dense
@@ -20,6 +21,11 @@
 #   pinned_on          - optional whitelist of neuron names. Non-empty
 #                        narrows feasibility to just those neurons and
 #                        protects the model from LRU eviction there.
+#   source             - optional source scheme ("huggingface", "helexa",
+#                        operator mirror tag). When set, cortex forwards
+#                        the load to neuron as `scheme:id` so the daemon
+#                        fetches from the right registry. Omit to let
+#                        neuron substitute its own `default_source`.
 
 # Tensor-parallel target — needs a neuron with at least 2 large GPUs.
 # The example pins to a specific neuron name; adjust or remove the
@@ -49,6 +55,20 @@ vram_mb = 500
 min_devices = 1
 min_device_vram_mb = 4000
 
+# Helexa registry model — `source` pins this entry to the helexa
+# scheme so cortex forwards `helexa:Helexa/Qwen3.6-27B-Uncensored` to
+# neuron's /models/load. Requires the neuron config to declare a
+# matching [harness.candle.sources.helexa] entry pointing at the
+# helexa registry endpoint (see neuron.example.toml).
+#
+# [[models]]
+# id = "Helexa/Qwen3.6-27B-Uncensored"
+# harness = "candle"
+# source = "helexa"
+# vram_mb = 54000
+# min_devices = 2
+# min_device_vram_mb = 24000
+
 # -- Tier aliases ------------------------------------------------------------
 # Optional. Clients can request inference against an alias (e.g.
 # `model: "helexa/small"` in /v1/chat/completions) and cortex