fix(router): rewrite loopback inference URLs to use neuron's host
Some checks failed
CI / Format (push) Successful in 30s
build-prerelease / Resolve version stamps (push) Successful in 41s
build-prerelease / Build neuron-blackwell (push) Successful in 3m34s
CI / Clippy (push) Successful in 7m25s
build-prerelease / Build neuron-ampere (push) Successful in 4m57s
build-prerelease / Build cortex binary (push) Successful in 4m15s
build-prerelease / Build neuron-ada (push) Successful in 5m14s
build-prerelease / Package cortex RPM (push) Successful in 1m23s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 2m53s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 2m54s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m46s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m6s
CI / Test (push) Failing after 4m34s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
Some checks failed
CI / Format (push) Successful in 30s
build-prerelease / Resolve version stamps (push) Successful in 41s
build-prerelease / Build neuron-blackwell (push) Successful in 3m34s
CI / Clippy (push) Successful in 7m25s
build-prerelease / Build neuron-ampere (push) Successful in 4m57s
build-prerelease / Build cortex binary (push) Successful in 4m15s
build-prerelease / Build neuron-ada (push) Successful in 5m14s
build-prerelease / Package cortex RPM (push) Successful in 1m23s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 2m53s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 2m54s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m46s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m6s
CI / Test (push) Failing after 4m34s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
Neuron hardcodes its bind_url as `http://localhost:13131` (it can't reliably know its own externally-resolvable name). When cortex runs on a different host than the neuron it's routing to, blindly proxying to that URL hits localhost on the cortex box instead of the neuron. Cortex already knows each neuron's reachable host from cortex.toml. After fetching the inference URL from `/models/{id}/endpoint`, if the host is a loopback name (localhost / 127.0.0.1 / 0.0.0.0 / ::1), swap it for the configured neuron host. Preserve the port and path from neuron's URL so a future harness serving inference on a different port than the management API still works. Adds `url` (already a transitive dep via reqwest) as a direct dep for the URL parsing. Tests cover: localhost rewrite, distinct inference port preservation, non-loopback passthrough, malformed input. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -299,13 +299,76 @@ async fn finish(
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let endpoint = inference_endpoint.ok_or_else(|| {
|
||||
let raw = inference_endpoint.ok_or_else(|| {
|
||||
RouteError::EndpointResolveFailed(model_id.to_string(), node_name.to_string())
|
||||
})?;
|
||||
|
||||
// Rewrite loopback inference URLs to use the configured neuron host.
|
||||
// Neuron's default bind_url is `http://localhost:13131` (it can't
|
||||
// reliably know its own externally-resolvable name). Cortex sees a
|
||||
// URL that's only meaningful from the neuron host's own perspective;
|
||||
// proxying directly to localhost from a different cortex host would
|
||||
// hit nothing. Keep neuron's port and path (a future harness could
|
||||
// serve inference on a different port than the management API), but
|
||||
// swap the host for the one in cortex.toml.
|
||||
let endpoint = rewrite_loopback_host(&raw, neuron_endpoint).unwrap_or(raw);
|
||||
|
||||
Ok(RouteDecision {
|
||||
node_name: node_name.to_string(),
|
||||
endpoint,
|
||||
cold_start,
|
||||
})
|
||||
}
|
||||
|
||||
/// If `inference_url`'s host is a loopback name (localhost / 127.0.0.1 /
|
||||
/// 0.0.0.0 / ::1), return a copy with the host replaced by
|
||||
/// `neuron_endpoint`'s host. Otherwise return None and the caller falls
|
||||
/// back to the inference URL as-is.
|
||||
fn rewrite_loopback_host(inference_url: &str, neuron_endpoint: &str) -> Option<String> {
|
||||
let inf = url::Url::parse(inference_url).ok()?;
|
||||
let inf_host = inf.host_str()?;
|
||||
let is_loopback = matches!(inf_host, "localhost" | "127.0.0.1" | "0.0.0.0" | "::1");
|
||||
if !is_loopback {
|
||||
return None;
|
||||
}
|
||||
let neuron = url::Url::parse(neuron_endpoint).ok()?;
|
||||
let new_host = neuron.host_str()?;
|
||||
let mut out = inf.clone();
|
||||
out.set_host(Some(new_host)).ok()?;
|
||||
Some(out.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::rewrite_loopback_host;
|
||||
|
||||
#[test]
|
||||
fn rewrites_localhost_keeps_port_and_path() {
|
||||
let out = rewrite_loopback_host(
|
||||
"http://localhost:13131",
|
||||
"http://beast.hanzalova.internal:13131",
|
||||
);
|
||||
assert_eq!(
|
||||
out.as_deref(),
|
||||
Some("http://beast.hanzalova.internal:13131/")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrites_loopback_with_distinct_inference_port() {
|
||||
let out = rewrite_loopback_host("http://127.0.0.1:8080", "http://beast.lan:13131");
|
||||
assert_eq!(out.as_deref(), Some("http://beast.lan:8080/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leaves_non_loopback_alone() {
|
||||
let out = rewrite_loopback_host("http://other.host:1234", "http://beast.lan:13131");
|
||||
assert_eq!(out, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_inference_url_returns_none() {
|
||||
let out = rewrite_loopback_host("not a url", "http://beast.lan:13131");
|
||||
assert_eq!(out, None);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user