diff --git a/crates/cortex-gateway/src/handlers.rs b/crates/cortex-gateway/src/handlers.rs index 3c047d2..9a6effe 100644 --- a/crates/cortex-gateway/src/handlers.rs +++ b/crates/cortex-gateway/src/handlers.rs @@ -34,12 +34,30 @@ async fn chat_completions( ) -> Response { let model_id = match extract_model(&body) { Some(m) => m, - None => return error_response(400, "missing 'model' field in request body"), + None => { + tracing::warn!( + handler = "chat_completions", + "rejected: missing 'model' field in request body" + ); + return error_response(400, "missing 'model' field in request body"); + } }; let route = match router::resolve(&fleet, &model_id).await { Ok(r) => r, - Err(e) => return error_response(404, &e.to_string()), + Err(e) => { + tracing::warn!( + handler = "chat_completions", + model = %model_id, + error = %e, + "route resolve failed" + ); + // RouteError's Display strings are short and informative + // ("model 'X' not found...", "no healthy nodes available") + // — fine to surface to the caller. The warn above carries + // any extra context for operators. + return error_response(404, &e.to_string()); + } }; touch_model(&fleet, &route.node_name, &model_id).await; @@ -63,12 +81,30 @@ async fn completions( ) -> Response { let model_id = match extract_model(&body) { Some(m) => m, - None => return error_response(400, "missing 'model' field in request body"), + None => { + tracing::warn!( + handler = "completions", + "rejected: missing 'model' field in request body" + ); + return error_response(400, "missing 'model' field in request body"); + } }; let route = match router::resolve(&fleet, &model_id).await { Ok(r) => r, - Err(e) => return error_response(404, &e.to_string()), + Err(e) => { + tracing::warn!( + handler = "completions", + model = %model_id, + error = %e, + "route resolve failed" + ); + // RouteError's Display strings are short and informative + // ("model 'X' not found...", "no healthy nodes available") + // — fine to surface to the caller. The warn above carries + // any extra context for operators. + return error_response(404, &e.to_string()); + } }; touch_model(&fleet, &route.node_name, &model_id).await; @@ -85,7 +121,14 @@ async fn anthropic_messages( // Parse as Anthropic request. let anth_req: cortex_core::anthropic::MessagesRequest = match serde_json::from_slice(&body) { Ok(r) => r, - Err(e) => return error_response(400, &format!("invalid Anthropic request: {e}")), + Err(e) => { + tracing::warn!( + handler = "anthropic_messages", + error = %e, + "rejected: invalid Anthropic request body" + ); + return error_response(400, "invalid Anthropic request body"); + } }; let model_id = anth_req.model.clone(); @@ -95,12 +138,32 @@ async fn anthropic_messages( let openai_req = cortex_core::translate::anthropic_to_openai(anth_req); let openai_body = match serde_json::to_vec(&openai_req) { Ok(b) => Bytes::from(b), - Err(e) => return error_response(500, &format!("translation error: {e}")), + Err(e) => { + tracing::error!( + handler = "anthropic_messages", + model = %model_id, + error = %e, + "internal: failed to serialise translated OpenAI request" + ); + return error_response(500, "internal translation error"); + } }; let route = match router::resolve(&fleet, &model_id).await { Ok(r) => r, - Err(e) => return error_response(404, &e.to_string()), + Err(e) => { + tracing::warn!( + handler = "anthropic_messages", + model = %model_id, + error = %e, + "route resolve failed" + ); + // RouteError's Display strings are short and informative + // ("model 'X' not found...", "no healthy nodes available") + // — fine to surface to the caller. The warn above carries + // any extra context for operators. + return error_response(404, &e.to_string()); + } }; touch_model(&fleet, &route.node_name, &model_id).await; @@ -133,12 +196,22 @@ async fn anthropic_messages( Ok(resp) => resp, Err(e) => { metrics::counter!("cortex_request_errors_total", &labels).increment(1); + // forward_request already warn'd with the wire-level + // detail; no need to log again here. e.into_response() } } } else { // Non-streaming: proxy, buffer full response, translate back to Anthropic. let target_url = format!("{}/v1/chat/completions", route.endpoint); + tracing::info!( + handler = "anthropic_messages", + model = %model_id, + node = %route.node_name, + url = %target_url, + cold_start = route.cold_start, + "proxying request" + ); let upstream_resp = fleet .http_client .post(&target_url) @@ -152,28 +225,31 @@ async fn anthropic_messages( Err(e) => { metrics::counter!("cortex_request_errors_total", &labels).increment(1); tracing::warn!( + handler = "anthropic_messages", model = %model_id, node = %route.node_name, - target = %target_url, + url = %target_url, error = %e, - "anthropic proxy: upstream request failed (network)" + "upstream request failed (network)" ); return error_response(502, "upstream request failed"); } }; - if !upstream_resp.status().is_success() { + let upstream_status = upstream_resp.status(); + if !upstream_status.is_success() { metrics::counter!("cortex_request_errors_total", &labels).increment(1); - let status = upstream_resp.status().as_u16(); + let status = upstream_status.as_u16(); let body = upstream_resp.text().await.unwrap_or_default(); let body_snippet = body.chars().take(512).collect::(); tracing::warn!( + handler = "anthropic_messages", model = %model_id, node = %route.node_name, - target = %target_url, + url = %target_url, status, body = %body_snippet, - "anthropic proxy: upstream returned non-2xx" + "upstream returned non-2xx" ); return error_response(status, &format!("upstream returned {status}")); } @@ -182,7 +258,15 @@ async fn anthropic_messages( Ok(b) => b, Err(e) => { metrics::counter!("cortex_request_errors_total", &labels).increment(1); - return error_response(502, &format!("failed to read upstream response: {e}")); + tracing::warn!( + handler = "anthropic_messages", + model = %model_id, + node = %route.node_name, + url = %target_url, + error = %e, + "failed to read upstream response body" + ); + return error_response(502, "failed to read upstream response"); } }; @@ -191,7 +275,20 @@ async fn anthropic_messages( Ok(r) => r, Err(e) => { metrics::counter!("cortex_request_errors_total", &labels).increment(1); - return error_response(502, &format!("failed to parse upstream response: {e}")); + let body_snippet = String::from_utf8_lossy(&body_bytes) + .chars() + .take(512) + .collect::(); + tracing::warn!( + handler = "anthropic_messages", + model = %model_id, + node = %route.node_name, + url = %target_url, + error = %e, + body = %body_snippet, + "failed to parse upstream response as OpenAI ChatCompletionResponse" + ); + return error_response(502, "malformed upstream response"); } }; @@ -343,6 +440,9 @@ async fn proxy_with_metrics( } Err(e) => { metrics::counter!("cortex_request_errors_total", &labels).increment(1); + // proxy::forward_request already warn'd with wire-level + // detail (target URL, error, status). ProxyError::into_response + // now returns a generic message — no body leak. e.into_response() } } diff --git a/crates/cortex-gateway/src/proxy.rs b/crates/cortex-gateway/src/proxy.rs index 79647fc..dd167fb 100644 --- a/crates/cortex-gateway/src/proxy.rs +++ b/crates/cortex-gateway/src/proxy.rs @@ -12,6 +12,13 @@ use axum::response::{IntoResponse, Response}; use reqwest::Client; /// Proxy a request body to the resolved backend node and stream the response. +/// +/// Logging contract: every call emits exactly one structured event at +/// info / warn level for operator visibility, regardless of outcome. +/// Network-level failures and non-2xx upstream statuses are warn'd here +/// (closest to the wire); the user-facing response carries only the +/// status code and a generic message — implementation detail (body, +/// error chain) lives in the log, never in the API surface. pub async fn forward_request( client: &Client, route: &RouteDecision, @@ -37,10 +44,33 @@ pub async fn forward_request( req_builder = req_builder.header(key, value); } - let upstream_resp = req_builder.send().await.map_err(ProxyError::Upstream)?; + let upstream_resp = match req_builder.send().await { + Ok(r) => r, + Err(e) => { + tracing::warn!( + node = %route.node_name, + url = %url, + error = %e, + "proxy: upstream request failed (network)" + ); + return Err(ProxyError::Upstream(e)); + } + }; - let status = - StatusCode::from_u16(upstream_resp.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); + let upstream_status = upstream_resp.status(); + if !upstream_status.is_success() { + // Streaming body — can't snippet without breaking the stream + // pass-through. Log status + URL; the client still gets the + // upstream status, just without the leaked body. + tracing::warn!( + node = %route.node_name, + url = %url, + status = upstream_status.as_u16(), + "proxy: upstream returned non-2xx" + ); + } + + let status = StatusCode::from_u16(upstream_status.as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); let resp_headers = upstream_resp.headers().clone(); let stream = upstream_resp.bytes_stream(); @@ -52,28 +82,37 @@ pub async fn forward_request( response = response.header(key, value); } - response - .body(body) - .map_err(|e| ProxyError::ResponseBuild(e.to_string())) + response.body(body).map_err(|e| { + tracing::warn!( + node = %route.node_name, + url = %url, + error = %e, + "proxy: failed to build response" + ); + ProxyError::ResponseBuild(e.to_string()) + }) } #[derive(Debug, thiserror::Error)] pub enum ProxyError { - #[error("upstream request failed: {0}")] + #[error("upstream request failed")] Upstream(reqwest::Error), - #[error("failed to build response: {0}")] + #[error("failed to build response")] ResponseBuild(String), } impl IntoResponse for ProxyError { fn into_response(self) -> Response { - let status = match &self { - ProxyError::Upstream(_) => StatusCode::BAD_GATEWAY, - ProxyError::ResponseBuild(_) => StatusCode::INTERNAL_SERVER_ERROR, + let (status, message) = match &self { + ProxyError::Upstream(_) => (StatusCode::BAD_GATEWAY, "upstream request failed"), + ProxyError::ResponseBuild(_) => ( + StatusCode::INTERNAL_SERVER_ERROR, + "failed to build response", + ), }; let body = serde_json::json!({ "error": { - "message": self.to_string(), + "message": message, "type": "proxy_error", } });