From 9b0ed0b57f3a7cf5ea5ab9e8613ee308eeafe693 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Fri, 22 May 2026 06:23:47 +0300 Subject: [PATCH] fix(router): rewrite loopback inference URLs to use neuron's host Neuron hardcodes its bind_url as `http://localhost:13131` (it can't reliably know its own externally-resolvable name). When cortex runs on a different host than the neuron it's routing to, blindly proxying to that URL hits localhost on the cortex box instead of the neuron. Cortex already knows each neuron's reachable host from cortex.toml. After fetching the inference URL from `/models/{id}/endpoint`, if the host is a loopback name (localhost / 127.0.0.1 / 0.0.0.0 / ::1), swap it for the configured neuron host. Preserve the port and path from neuron's URL so a future harness serving inference on a different port than the management API still works. Adds `url` (already a transitive dep via reqwest) as a direct dep for the URL parsing. Tests cover: localhost rewrite, distinct inference port preservation, non-loopback passthrough, malformed input. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + crates/cortex-gateway/Cargo.toml | 1 + crates/cortex-gateway/src/router.rs | 65 ++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index edd0d76..866c33d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -596,6 +596,7 @@ dependencies = [ "tower", "tower-http", "tracing", + "url", "urlencoding", ] diff --git a/crates/cortex-gateway/Cargo.toml b/crates/cortex-gateway/Cargo.toml index 9dbb704..b0ff8f8 100644 --- a/crates/cortex-gateway/Cargo.toml +++ b/crates/cortex-gateway/Cargo.toml @@ -24,6 +24,7 @@ tokio-stream.workspace = true eventsource-stream.workspace = true bytes = "1" urlencoding = "2" +url = "2" [dev-dependencies] tokio = { workspace = true, features = ["test-util"] } diff --git a/crates/cortex-gateway/src/router.rs b/crates/cortex-gateway/src/router.rs index 4b90a7b..cc0cef5 100644 --- a/crates/cortex-gateway/src/router.rs +++ b/crates/cortex-gateway/src/router.rs @@ -299,13 +299,76 @@ async fn finish( _ => None, }; - let endpoint = inference_endpoint.ok_or_else(|| { + let raw = inference_endpoint.ok_or_else(|| { RouteError::EndpointResolveFailed(model_id.to_string(), node_name.to_string()) })?; + // Rewrite loopback inference URLs to use the configured neuron host. + // Neuron's default bind_url is `http://localhost:13131` (it can't + // reliably know its own externally-resolvable name). Cortex sees a + // URL that's only meaningful from the neuron host's own perspective; + // proxying directly to localhost from a different cortex host would + // hit nothing. Keep neuron's port and path (a future harness could + // serve inference on a different port than the management API), but + // swap the host for the one in cortex.toml. + let endpoint = rewrite_loopback_host(&raw, neuron_endpoint).unwrap_or(raw); + Ok(RouteDecision { node_name: node_name.to_string(), endpoint, cold_start, }) } + +/// If `inference_url`'s host is a loopback name (localhost / 127.0.0.1 / +/// 0.0.0.0 / ::1), return a copy with the host replaced by +/// `neuron_endpoint`'s host. Otherwise return None and the caller falls +/// back to the inference URL as-is. +fn rewrite_loopback_host(inference_url: &str, neuron_endpoint: &str) -> Option { + let inf = url::Url::parse(inference_url).ok()?; + let inf_host = inf.host_str()?; + let is_loopback = matches!(inf_host, "localhost" | "127.0.0.1" | "0.0.0.0" | "::1"); + if !is_loopback { + return None; + } + let neuron = url::Url::parse(neuron_endpoint).ok()?; + let new_host = neuron.host_str()?; + let mut out = inf.clone(); + out.set_host(Some(new_host)).ok()?; + Some(out.to_string()) +} + +#[cfg(test)] +mod tests { + use super::rewrite_loopback_host; + + #[test] + fn rewrites_localhost_keeps_port_and_path() { + let out = rewrite_loopback_host( + "http://localhost:13131", + "http://beast.hanzalova.internal:13131", + ); + assert_eq!( + out.as_deref(), + Some("http://beast.hanzalova.internal:13131/") + ); + } + + #[test] + fn rewrites_loopback_with_distinct_inference_port() { + let out = rewrite_loopback_host("http://127.0.0.1:8080", "http://beast.lan:13131"); + assert_eq!(out.as_deref(), Some("http://beast.lan:8080/")); + } + + #[test] + fn leaves_non_loopback_alone() { + let out = rewrite_loopback_host("http://other.host:1234", "http://beast.lan:13131"); + assert_eq!(out, None); + } + + #[test] + fn malformed_inference_url_returns_none() { + let out = rewrite_loopback_host("not a url", "http://beast.lan:13131"); + assert_eq!(out, None); + } +}