Emit cortex_requests_total, cortex_request_duration_seconds, cortex_request_errors_total, and cortex_cold_starts_total with model and node labels on every proxied request. Add install_test_recorder() for testing metrics without HTTP listener. Integration test verifies counters and histograms appear after proxy. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
59 lines
2.0 KiB
Rust
59 lines
2.0 KiB
Rust
//! Prometheus metrics exporter.
|
|
//!
|
|
//! Runs on a separate port from the main API, exposing `/metrics`
|
|
//! in Prometheus text format.
|
|
|
|
use anyhow::Result;
|
|
use metrics_exporter_prometheus::PrometheusBuilder;
|
|
use std::net::SocketAddr;
|
|
|
|
/// Install the Prometheus metrics recorder and return a handle.
|
|
/// The `/metrics` endpoint is served by the exporter's built-in HTTP server.
|
|
pub fn install(listen: &str) -> Result<()> {
|
|
let addr: SocketAddr = listen.parse()?;
|
|
|
|
PrometheusBuilder::new()
|
|
.with_http_listener(addr)
|
|
.install()
|
|
.map_err(|e| anyhow::anyhow!("failed to install Prometheus exporter: {e}"))?;
|
|
|
|
tracing::info!("prometheus metrics exporter on {addr}");
|
|
describe_metrics();
|
|
Ok(())
|
|
}
|
|
|
|
/// Install a recorder for testing (no HTTP listener). Returns a handle
|
|
/// that can render the current metrics as Prometheus text.
|
|
pub fn install_test_recorder() -> Result<metrics_exporter_prometheus::PrometheusHandle> {
|
|
let handle = PrometheusBuilder::new()
|
|
.install_recorder()
|
|
.map_err(|e| anyhow::anyhow!("failed to install test recorder: {e}"))?;
|
|
describe_metrics();
|
|
Ok(handle)
|
|
}
|
|
|
|
fn describe_metrics() {
|
|
metrics::describe_histogram!(
|
|
"cortex_request_duration_seconds",
|
|
"Total request latency in seconds"
|
|
);
|
|
metrics::describe_histogram!(
|
|
"cortex_time_to_first_token_seconds",
|
|
"Time to first token in seconds"
|
|
);
|
|
metrics::describe_histogram!(
|
|
"cortex_tokens_per_second",
|
|
"Generation throughput in tokens per second"
|
|
);
|
|
metrics::describe_counter!("cortex_requests_total", "Total number of proxied requests");
|
|
metrics::describe_counter!(
|
|
"cortex_request_errors_total",
|
|
"Total number of failed proxy requests"
|
|
);
|
|
metrics::describe_counter!("cortex_evictions_total", "Total number of model evictions");
|
|
metrics::describe_counter!(
|
|
"cortex_cold_starts_total",
|
|
"Total number of cold-start model loads"
|
|
);
|
|
}
|