feat: add per-request Prometheus metrics instrumentation

Emit cortex_requests_total, cortex_request_duration_seconds, cortex_request_errors_total, and cortex_cold_starts_total with model and node labels on every proxied request. Add install_test_recorder() for testing metrics without HTTP listener. Integration test verifies counters and histograms appear after proxy. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 19:42:09 +03:00
parent 29c8f10761
commit 67b9b044d3
4 changed files with 152 additions and 40 deletions
--- a/crates/cortex-gateway/tests/metrics.rs
+++ b/crates/cortex-gateway/tests/metrics.rs
@@ -0,0 +1,53 @@
+mod common;
+
+use serde_json::json;
+
+#[tokio::test]
+async fn test_metrics_emitted_after_proxy() {
+    // Install a test recorder (no HTTP listener, renders to string).
+    // This sets the global recorder, so only one test can do this.
+    let handle = cortex_gateway::metrics::install_test_recorder().expect("recorder should install");
+
+    let mock_url = common::spawn_mock_backend().await;
+    let gw_url = common::spawn_gateway(&mock_url).await;
+
+    // Verify no request metrics yet.
+    let before = handle.render();
+    assert!(
+        !before.contains("cortex_requests_total"),
+        "no request metrics before any requests"
+    );
+
+    // Make a successful request.
+    let client = reqwest::Client::new();
+    let resp = client
+        .post(format!("{gw_url}/v1/chat/completions"))
+        .header("content-type", "application/json")
+        .json(&json!({
+            "model": "test-model",
+            "messages": [{"role": "user", "content": "Hi"}]
+        }))
+        .send()
+        .await
+        .expect("request should succeed");
+    assert_eq!(resp.status(), 200);
+    // Consume the response body to ensure the proxy completes.
+    let _body: serde_json::Value = resp.json().await.unwrap();
+
+    // Check metrics were emitted.
+    let after = handle.render();
+
+    assert!(
+        after.contains("cortex_requests_total"),
+        "cortex_requests_total should be present after a request.\nMetrics:\n{after}"
+    );
+    assert!(
+        after.contains("cortex_request_duration_seconds"),
+        "cortex_request_duration_seconds should be present.\nMetrics:\n{after}"
+    );
+    // Should NOT have error or cold start counters for this request.
+    assert!(
+        !after.contains("cortex_request_errors_total"),
+        "no errors expected for a successful request"
+    );
+}