From 0da68833af4c77ab78fb4b8456ab69ae04de62b2 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Tue, 14 Apr 2026 18:13:30 +0300 Subject: [PATCH] feat: scaffold cortex workspace Rust reverse-proxy for multi-node mistral.rs inference clusters. Includes crate structure (cortex-core, cortex-gateway, cortex-agent, cortex-cli), config loading, OpenAI/Anthropic translation stubs, model routing, eviction, polling, and streaming proxy scaffolding. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 6 + CLAUDE.md | 141 ++ Cargo.lock | 2787 +++++++++++++++++++++++++ Cargo.toml | 57 + README.md | 138 ++ cortex.example.toml | 45 + crates/cortex-agent/Cargo.toml | 14 + crates/cortex-agent/src/agent.rs | 72 + crates/cortex-agent/src/lib.rs | 1 + crates/cortex-cli/Cargo.toml | 20 + crates/cortex-cli/src/main.rs | 112 + crates/cortex-core/Cargo.toml | 15 + crates/cortex-core/src/anthropic.rs | 87 + crates/cortex-core/src/config.rs | 79 + crates/cortex-core/src/lib.rs | 6 + crates/cortex-core/src/metrics.rs | 23 + crates/cortex-core/src/node.rs | 74 + crates/cortex-core/src/openai.rs | 122 ++ crates/cortex-core/src/translate.rs | 114 + crates/cortex-gateway/Cargo.toml | 25 + crates/cortex-gateway/src/evictor.rs | 106 + crates/cortex-gateway/src/handlers.rs | 207 ++ crates/cortex-gateway/src/lib.rs | 51 + crates/cortex-gateway/src/metrics.rs | 55 + crates/cortex-gateway/src/poller.rs | 103 + crates/cortex-gateway/src/proxy.rs | 82 + crates/cortex-gateway/src/router.rs | 74 + crates/cortex-gateway/src/state.rs | 43 + 28 files changed, 4659 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 cortex.example.toml create mode 100644 crates/cortex-agent/Cargo.toml create mode 100644 crates/cortex-agent/src/agent.rs create mode 100644 crates/cortex-agent/src/lib.rs create mode 100644 crates/cortex-cli/Cargo.toml create mode 100644 crates/cortex-cli/src/main.rs create mode 100644 crates/cortex-core/Cargo.toml create mode 100644 crates/cortex-core/src/anthropic.rs create mode 100644 crates/cortex-core/src/config.rs create mode 100644 crates/cortex-core/src/lib.rs create mode 100644 crates/cortex-core/src/metrics.rs create mode 100644 crates/cortex-core/src/node.rs create mode 100644 crates/cortex-core/src/openai.rs create mode 100644 crates/cortex-core/src/translate.rs create mode 100644 crates/cortex-gateway/Cargo.toml create mode 100644 crates/cortex-gateway/src/evictor.rs create mode 100644 crates/cortex-gateway/src/handlers.rs create mode 100644 crates/cortex-gateway/src/lib.rs create mode 100644 crates/cortex-gateway/src/metrics.rs create mode 100644 crates/cortex-gateway/src/poller.rs create mode 100644 crates/cortex-gateway/src/proxy.rs create mode 100644 crates/cortex-gateway/src/router.rs create mode 100644 crates/cortex-gateway/src/state.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..84db778 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/target +*.swp +*.swo +.idea/ +.vscode/ +cortex.toml diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..157eff2 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,141 @@ +# CLAUDE.md — cortex + +## Project overview + +cortex is a Rust reverse-proxy that sits in front of multiple +mistral.rs inference nodes and presents a unified OpenAI + Anthropic +compatible API surface. It handles model routing, lifecycle management +(load/unload/evict), request translation, and metrics collection. + +## Repository layout + +``` +cortex/ +├── Cargo.toml # workspace root +├── cortex.toml # example gateway config +├── README.md +├── CLAUDE.md # ← you are here +├── crates/ +│ ├── cortex-core/ # shared types, config, envelopes +│ │ └── src/ +│ │ ├── lib.rs +│ │ ├── config.rs # figment-based config structs +│ │ ├── node.rs # NodeState, ModelStatus +│ │ ├── openai.rs # OpenAI request/response types +│ │ ├── anthropic.rs # Anthropic request/response types +│ │ ├── translate.rs # OpenAI <-> Anthropic translation +│ │ └── metrics.rs # RequestMetrics, histogram helpers +│ ├── cortex-gateway/ # the HTTP proxy server +│ │ └── src/ +│ │ ├── lib.rs +│ │ ├── state.rs # CortexState: Arc> +│ │ ├── router.rs # model -> node routing logic +│ │ ├── proxy.rs # streaming HTTP proxy to backends +│ │ ├── evictor.rs # LRU/priority eviction logic +│ │ ├── poller.rs # background task polling node status +│ │ ├── handlers.rs # axum handlers (chat, completions, models, etc.) +│ │ └── metrics.rs # prometheus exporter endpoint +│ ├── cortex-agent/ # per-node sidecar (future: defrag, restart) +│ │ └── src/ +│ │ ├── lib.rs +│ │ └── agent.rs # local node management +│ └── cortex-cli/ # CLI entrypoint +│ └── src/ +│ └── main.rs +└── tests/ # integration tests (future) +``` + +## Key design decisions + +### mistral.rs HTTP API for model lifecycle +mistral.rs (v0.8+) supports dynamic model loading/unloading at runtime: +- `POST /v1/models/unload {"model_id": "..."}` — frees VRAM, preserves config +- `POST /v1/models/reload {"model_id": "..."}` — explicitly reload +- `POST /v1/models/status {"model_id": "..."}` — loaded/unloaded/reloading +- `GET /v1/models` — lists all models with status field +- Lazy loading: requests to unloaded models trigger automatic reload + +The gateway does NOT manage systemd units for model swaps. It calls these +HTTP endpoints directly. The only systemd interaction is for full-process +restarts after VRAM fragmentation accumulates (defrag_after_cycles). + +### Streaming proxy +Chat completions are proxied as SSE streams. The gateway must: +1. Parse the inbound request to extract the model name +2. Route to the correct backend node +3. Stream the response back, capturing token timing for metrics +4. NOT buffer the full response — true streaming passthrough + +### Anthropic translation +When a request arrives at `/v1/messages` (Anthropic format), the gateway +translates it to OpenAI format before proxying to mistral.rs, then +translates the response back. This is stateless envelope transformation. + +### Eviction +The evictor runs as a background task. Before loading a model on a node +where VRAM is tight: +1. Check if the model is already loaded elsewhere → route there instead +2. Find the LRU model on the target node (excluding pinned models) +3. Call `/v1/models/unload` on that model +4. The incoming request's lazy-load triggers the new model load + +### Metrics +Per-request: model, node, prompt_tokens, completion_tokens, total_tokens, +tok_per_sec, time_to_first_token_ms, total_latency_ms. +Exposed as Prometheus histograms/counters on a separate port. + +## Tech stack + +- **Rust 2024 edition** — workspace with 4 crates +- **Axum 0.8** — HTTP framework (same as mistral.rs itself) +- **reqwest** — HTTP client for proxying to backends +- **figment** — config loading (TOML + env vars) +- **tokio** — async runtime +- **metrics + metrics-exporter-prometheus** — observability +- **tracing** — structured logging + +## Build commands + +```sh +cargo build --release # build all crates +cargo run -p cortex-cli -- serve # run the gateway +cargo test # run all tests +cargo clippy --workspace # lint +``` + +## Environment + +- Targets Fedora 43 (systemd, SELinux enforcing) +- Nodes communicate over a private network (e.g. WireGuard mesh) + - One or more GPU nodes running mistral.rs on port 8080 + - Optionally a metrics-only node (no GPU) for Prometheus/Grafana +- Each node runs `mistralrs serve` on port 8080 +- Gateway listens on port 8000 (API) and 9100 (metrics) +- TLS terminated at gateway or via nginx; internal traffic is plaintext over WireGuard + +## Conventions + +- Error handling: `anyhow` for binaries, `thiserror` for library crates +- No `unwrap()` in library code; `expect()` only with clear rationale +- All public types derive `Debug, Clone, Serialize, Deserialize` where sensible +- Config structs use `figment` with TOML as primary source, env vars as override +- Prefer `Arc>` for shared fleet state; minimize lock duration +- SSE streaming uses `tokio_stream` + `eventsource-stream` for parsing +- Log at `info` for request routing, `debug` for proxy details, `warn` for + eviction and node health, `error` for proxy failures + +## Current status + +**Scaffold phase** — crate structure, types, and handler stubs are in place. +The following needs implementation: + +1. **cortex-core**: Flesh out OpenAI/Anthropic envelope types with all fields + needed for chat completions (streaming + non-streaming) +2. **cortex-gateway/proxy.rs**: Implement streaming HTTP proxy with SSE passthrough +3. **cortex-gateway/router.rs**: Model-to-node routing with fallback to least-loaded +4. **cortex-gateway/evictor.rs**: LRU eviction with pinning support +5. **cortex-gateway/poller.rs**: Background polling of node `/v1/models` endpoints +6. **cortex-gateway/handlers.rs**: Wire up axum routes to proxy logic +7. **cortex-core/translate.rs**: OpenAI <-> Anthropic request/response translation +8. **cortex-agent**: Sidecar for VRAM defrag restarts (lower priority) +9. **Integration tests**: Mock mistralrs backends, test routing + eviction diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..db91ab8 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,2787 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "atomic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-lc-rs" +version = "1.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.39.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "axum" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" +dependencies = [ + "axum-core", + "axum-macros", + "bytes", + "form_urlencoded", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-macros" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cortex-agent" +version = "0.1.0" +dependencies = [ + "anyhow", + "cortex-core", + "reqwest", + "serde", + "serde_json", + "tokio", + "tracing", +] + +[[package]] +name = "cortex-cli" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "cortex-core", + "cortex-gateway", + "reqwest", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "cortex-core" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "figment", + "serde", + "serde_json", + "thiserror 2.0.18", + "toml", + "tracing", +] + +[[package]] +name = "cortex-gateway" +version = "0.1.0" +dependencies = [ + "anyhow", + "axum", + "bytes", + "chrono", + "cortex-core", + "eventsource-stream", + "futures", + "metrics", + "metrics-exporter-prometheus", + "reqwest", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tower", + "tower-http", + "tracing", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "eventsource-stream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" +dependencies = [ + "futures-core", + "nom", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "figment" +version = "0.10.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" +dependencies = [ + "atomic", + "pear", + "serde", + "toml", + "uncased", + "version_check", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2b52f86d1d4bc0d6b4e6826d960b1b333217e07d36b882dca570a5e1c48895b" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-native-certs", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "system-configuration", + "tokio", + "tower-service", + "tracing", + "windows-registry", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.0", + "serde", + "serde_core", +] + +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.185" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "metrics" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8" +dependencies = [ + "ahash", + "portable-atomic", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" +dependencies = [ + "base64", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "indexmap", + "ipnet", + "metrics", + "metrics-util", + "quanta", + "thiserror 1.0.69", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-util" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8496cc523d1f94c1385dd8f0f0c2c480b2b8aeccb5b7e4485ad6365523ae376" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.15.5", + "metrics", + "quanta", + "rand", + "rand_xoshiro", + "sketches-ddsketch", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "mio" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "openssl" +version = "0.10.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe4646e360ec77dff7dde40ed3d6c5fee52d156ef4a62f53973d38294dad87f" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "openssl-sys" +version = "0.9.113" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad2f2c0eba47118757e4c6d2bff2838f3e0523380021356e7875e858372ce644" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "pear" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] + +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xoshiro" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" +dependencies = [ + "rand_core", +] + +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-tls", + "hyper-util", + "js-sys", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" +dependencies = [ + "aws-lc-rs", + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "sketches-ddsketch" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "system-configuration" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.51.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http", + "http-body", + "iri-string", + "pin-project-lite", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..142df11 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,57 @@ +[workspace] +resolver = "2" +members = [ + "crates/cortex-core", + "crates/cortex-gateway", + "crates/cortex-agent", + "crates/cortex-cli", +] + +[workspace.package] +version = "0.1.0" +edition = "2024" +license = "GPL-3.0" +repository = "https://git.lair.cafe/helexa/cortex" + +[workspace.dependencies] +# async runtime +tokio = { version = "1", features = ["full"] } + +# web framework +axum = { version = "0.8", features = ["macros"] } +tower = "0.5" +tower-http = { version = "0.6", features = ["cors", "trace", "timeout"] } + +# serialization +serde = { version = "1", features = ["derive"] } +serde_json = "1" +toml = "0.8" + +# http client (for proxying to mistralrs backends) +reqwest = { version = "0.12", features = ["json", "stream"] } + +# observability +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } +metrics = "0.24" +metrics-exporter-prometheus = "0.16" + +# time +chrono = { version = "0.4", features = ["serde"] } + +# config +figment = { version = "0.10", features = ["toml", "env"] } + +# error handling +anyhow = "1" +thiserror = "2" + +# futures / streams (for SSE proxying) +futures = "0.3" +tokio-stream = "0.1" +eventsource-stream = "0.2" + +# workspace crates +cortex-core = { path = "crates/cortex-core" } +cortex-gateway = { path = "crates/cortex-gateway" } +cortex-agent = { path = "crates/cortex-agent" } diff --git a/README.md b/README.md new file mode 100644 index 0000000..b8d0d61 --- /dev/null +++ b/README.md @@ -0,0 +1,138 @@ +# cortex + +A Rust reverse-proxy and fleet management layer for multi-node +[mistral.rs](https://github.com/EricLBuehler/mistral.rs) inference clusters. + +## Problem + +Running local LLMs across multiple GPU nodes (different VRAM tiers, different +model affinities) requires a unified API surface that: + +- Presents a **single `/v1/models` catalogue** merging every model across every + node. +- **Routes requests** to the correct node based on where a model is loaded (or + *can* be loaded). +- Manages **model lifecycle** — unload cold models, reload on demand, pin + critical ones — using the mistral.rs + `/v1/models/{unload,reload,status}` HTTP API (PR #1828+). +- Translates between **OpenAI and Anthropic** request/response envelopes so + every client in the homelab speaks whichever dialect it prefers. +- Captures **per-request metrics** (tokens, tok/s, TTFT, latency) and exposes + them as Prometheus counters/histograms. + +## Architecture + +``` +┌──────────────┐ ┌──────────┐ ┌────────────┐ ┌────────────┐ +│ Claude Code │ │ Zed/IDE │ │ Tidal / mm │ │ curl / etc │ +└──────┬───────┘ └─────┬────┘ └──────┬─────┘ └──────┬─────┘ + │ │ │ │ + └────────────────┴──────┬───────┴───────────────┘ + │ + ┌──────────▼──────────┐ + │ cortex │ + │ (cortex-gateway) │ + │ │ + │ Router · Metrics │ + │ Evictor · Translate│ + └──┬──────┬────────┬──┘ + │ │ │ + ┌──────────▼┐ ┌──▼─────┐ ┌▼──────────┐ + │ gpu-large │ │gpu-med │ │ gpu-small │ + │ mistralrs │ │mistral │ │ mistralrs │ + │ serve │ │rs serve│ │ serve │ + │ :8080 │ │ :8080 │ │ :8080 │ + └───────────┘ └────────┘ └───────────┘ + private network (.internal) +``` + +### Crates + +| Crate | Purpose | +|---|---| +| `cortex-core` | Shared types: config, node/model state, metrics, OpenAI/Anthropic request/response envelopes | +| `cortex-gateway` | Axum HTTP server: proxy, router, evictor, metrics exporter | +| `cortex-agent` | Per-node sidecar: polls local mistralrs, reports to gateway, handles restart/defrag | +| `cortex-cli` | CLI entrypoint (`cortex serve`, `cortex status`, etc.) | + +## Node setup + +Each GPU node runs `mistralrs serve` with a multi-model config. Models are +declared but start **unloaded** — mistral.rs lazy-loads on first request and +the gateway can explicitly unload/reload via the HTTP API. + +Example node systemd unit: + +```ini +# /etc/systemd/system/mistralrs.service +[Unit] +Description=mistral.rs inference server +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +ExecStart=/usr/local/bin/mistralrs serve \ + --from-config /etc/mistralrs/config.toml \ + --port 8080 +Restart=on-failure +RestartSec=5 +Environment=CUDA_VISIBLE_DEVICES=0,1 + +[Install] +WantedBy=multi-user.target +``` + +## Gateway config + +```toml +# cortex.toml +[gateway] +listen = "0.0.0.0:8000" +metrics_listen = "0.0.0.0:9100" + +[eviction] +strategy = "lru" # lru | priority +defrag_after_cycles = 50 + +[[nodes]] +name = "gpu-large" +endpoint = "http://gpu-large.internal:8080" +vram_mb = 49_152 # e.g. 2x RTX 4090 +pinned = ["your-org/large-model"] + +[[nodes]] +name = "gpu-medium" +endpoint = "http://gpu-medium.internal:8080" +vram_mb = 24_576 # e.g. RTX 4090 +pinned = ["your-org/medium-model"] + +[[nodes]] +name = "gpu-small" +endpoint = "http://gpu-small.internal:8080" +vram_mb = 12_288 # e.g. RTX 3060 +pinned = ["your-org/embedding-model"] +``` + +## Building + +```sh +cargo build --release +``` + +## Running + +```sh +# start the gateway +cortex serve --config cortex.toml + +# check fleet status +cortex status + +# list all models across nodes +curl http://localhost:8000/v1/models +``` + +## License + +GPL-3.0 diff --git a/cortex.example.toml b/cortex.example.toml new file mode 100644 index 0000000..60a1e68 --- /dev/null +++ b/cortex.example.toml @@ -0,0 +1,45 @@ +# cortex.example.toml — example configuration +# +# Copy to cortex.toml and adjust for your environment. +# +# Environment variable overrides use CORTEX_ prefix with __ separators: +# CORTEX_GATEWAY__LISTEN=0.0.0.0:9000 + +[gateway] +listen = "0.0.0.0:8000" +metrics_listen = "0.0.0.0:9100" + +[eviction] +strategy = "lru" +# Restart mistralrs after this many load/unload cycles to defragment VRAM. +# Set to 0 to disable. +defrag_after_cycles = 50 + +# -- Nodes --------------------------------------------------------------- +# Each [[nodes]] entry declares a mistral.rs instance in the fleet. +# Models are discovered by polling the node's /v1/models endpoint. +# Pinned models are never evicted. + +[[nodes]] +name = "gpu-large" +endpoint = "http://gpu-large.internal:8080" +vram_mb = 49152 # e.g. 2x RTX 4090 (48 GB combined) +pinned = [ + "your-org/large-model", +] + +[[nodes]] +name = "gpu-medium" +endpoint = "http://gpu-medium.internal:8080" +vram_mb = 24576 # e.g. RTX 4090 (24 GB) +pinned = [ + "your-org/medium-model", +] + +[[nodes]] +name = "gpu-small" +endpoint = "http://gpu-small.internal:8080" +vram_mb = 12288 # e.g. RTX 3060 (12 GB) +pinned = [ + "your-org/embedding-model", +] diff --git a/crates/cortex-agent/Cargo.toml b/crates/cortex-agent/Cargo.toml new file mode 100644 index 0000000..515ec8a --- /dev/null +++ b/crates/cortex-agent/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "cortex-agent" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +cortex-core.workspace = true +tokio.workspace = true +serde.workspace = true +serde_json.workspace = true +reqwest.workspace = true +tracing.workspace = true +anyhow.workspace = true diff --git a/crates/cortex-agent/src/agent.rs b/crates/cortex-agent/src/agent.rs new file mode 100644 index 0000000..7336984 --- /dev/null +++ b/crates/cortex-agent/src/agent.rs @@ -0,0 +1,72 @@ +//! Per-node agent sidecar. +//! +//! This is a future component that runs on each GPU node alongside mistralrs. +//! It handles: +//! - VRAM defragmentation (restarting the mistralrs systemd unit when the +//! gateway signals that lifecycle_cycles has exceeded the threshold) +//! - Local nvidia-smi polling for actual VRAM usage reporting +//! - Systemd unit management for mistralrs process restarts +//! +//! For now this is a stub. The gateway's poller + evictor handle the critical +//! path (model lifecycle via the mistralrs HTTP API). The agent adds +//! operational niceties that can be built incrementally. + +/// Placeholder for agent configuration. +#[derive(Debug, Clone)] +pub struct AgentConfig { + /// The local mistralrs endpoint to monitor. + pub mistralrs_endpoint: String, + /// The systemd unit name for mistralrs (e.g. "mistralrs.service"). + pub systemd_unit: String, +} + +/// Restart the local mistralrs process via systemd. +/// This is the nuclear option for VRAM defragmentation. +pub async fn restart_mistralrs(config: &AgentConfig) -> anyhow::Result<()> { + tracing::warn!( + unit = %config.systemd_unit, + "restarting mistralrs for VRAM defragmentation" + ); + + let output = tokio::process::Command::new("systemctl") + .args(["restart", &config.systemd_unit]) + .output() + .await?; + + if output.status.success() { + tracing::info!(unit = %config.systemd_unit, "mistralrs restarted successfully"); + Ok(()) + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("systemctl restart failed: {stderr}"); + } +} + +/// Query nvidia-smi for current VRAM usage on this node. +/// Returns (used_mb, total_mb) for each GPU. +pub async fn query_vram() -> anyhow::Result> { + let output = tokio::process::Command::new("nvidia-smi") + .args([ + "--query-gpu=memory.used,memory.total", + "--format=csv,noheader,nounits", + ]) + .output() + .await?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("nvidia-smi failed: {stderr}"); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut gpus = Vec::new(); + for line in stdout.lines() { + let parts: Vec<&str> = line.split(',').map(|s| s.trim()).collect(); + if parts.len() == 2 { + let used: u64 = parts[0].parse().unwrap_or(0); + let total: u64 = parts[1].parse().unwrap_or(0); + gpus.push((used, total)); + } + } + Ok(gpus) +} diff --git a/crates/cortex-agent/src/lib.rs b/crates/cortex-agent/src/lib.rs new file mode 100644 index 0000000..f17bc55 --- /dev/null +++ b/crates/cortex-agent/src/lib.rs @@ -0,0 +1 @@ +pub mod agent; diff --git a/crates/cortex-cli/Cargo.toml b/crates/cortex-cli/Cargo.toml new file mode 100644 index 0000000..894af75 --- /dev/null +++ b/crates/cortex-cli/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "cortex-cli" +version.workspace = true +edition.workspace = true +license.workspace = true + +[[bin]] +name = "cortex" +path = "src/main.rs" + +[dependencies] +cortex-core.workspace = true +cortex-gateway.workspace = true +tokio.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +anyhow.workspace = true +reqwest.workspace = true +serde_json.workspace = true +clap = { version = "4", features = ["derive"] } diff --git a/crates/cortex-cli/src/main.rs b/crates/cortex-cli/src/main.rs new file mode 100644 index 0000000..3ae7a2b --- /dev/null +++ b/crates/cortex-cli/src/main.rs @@ -0,0 +1,112 @@ +use anyhow::Result; +use clap::{Parser, Subcommand}; +use cortex_core::config::GatewayConfig; +use tracing_subscriber::EnvFilter; + +#[derive(Parser)] +#[command(name = "cortex")] +#[command(about = "Unified inference gateway for multi-node mistral.rs clusters")] +#[command(version)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Start the gateway server. + Serve { + /// Path to the gateway config file. + #[arg(short, long, default_value = "cortex.toml")] + config: String, + }, + /// Print the fleet status (models, nodes, health). + Status { + /// Gateway API endpoint to query. + #[arg(short, long, default_value = "http://localhost:8000")] + endpoint: String, + }, +} + +#[tokio::main] +async fn main() -> Result<()> { + // Initialize tracing with env filter (e.g. RUST_LOG=cortex_gateway=debug). + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new("info,cortex_gateway=debug")), + ) + .init(); + + let cli = Cli::parse(); + + match cli.command { + Commands::Serve { config } => { + let cfg = GatewayConfig::load(&config).map_err(|e| { + anyhow::anyhow!("failed to load config from '{config}': {e}") + })?; + + tracing::info!( + nodes = cfg.nodes.len(), + listen = %cfg.gateway.listen, + "starting cortex" + ); + + // Install Prometheus metrics exporter on a separate port. + cortex_gateway::metrics::install(&cfg.gateway.metrics_listen)?; + + cortex_gateway::run(cfg).await?; + } + Commands::Status { endpoint } => { + print_status(&endpoint).await?; + } + } + + Ok(()) +} + +async fn print_status(endpoint: &str) -> Result<()> { + let client = reqwest::Client::new(); + + // Fetch health. + let health: serde_json::Value = client + .get(format!("{endpoint}/health")) + .send() + .await? + .json() + .await?; + + println!("Fleet health: {}", serde_json::to_string_pretty(&health)?); + + // Fetch models. + let models: serde_json::Value = client + .get(format!("{endpoint}/v1/models")) + .send() + .await? + .json() + .await?; + + println!("\nModels:"); + if let Some(data) = models.get("data").and_then(|d| d.as_array()) { + for model in data { + let id = model.get("id").and_then(|v| v.as_str()).unwrap_or("?"); + let locations = model + .get("locations") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|l| { + let node = l.get("node")?.as_str()?; + let status = l.get("status")?.as_str()?; + Some(format!("{node}({status})")) + }) + .collect::>() + .join(", ") + }) + .unwrap_or_default(); + println!(" {id:40} {locations}"); + } + } + + Ok(()) +} diff --git a/crates/cortex-core/Cargo.toml b/crates/cortex-core/Cargo.toml new file mode 100644 index 0000000..76b8dd1 --- /dev/null +++ b/crates/cortex-core/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "cortex-core" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +serde.workspace = true +serde_json.workspace = true +toml.workspace = true +figment.workspace = true +chrono.workspace = true +anyhow.workspace = true +thiserror.workspace = true +tracing.workspace = true diff --git a/crates/cortex-core/src/anthropic.rs b/crates/cortex-core/src/anthropic.rs new file mode 100644 index 0000000..921eda7 --- /dev/null +++ b/crates/cortex-core/src/anthropic.rs @@ -0,0 +1,87 @@ +//! Anthropic Messages API request and response types. +//! +//! These mirror the `/v1/messages` format used by the Anthropic API. +//! The gateway accepts these, translates to OpenAI format, proxies to +//! mistral.rs, then translates the response back. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +// ── Messages request ───────────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MessagesRequest { + pub model: String, + pub messages: Vec, + pub max_tokens: u64, + #[serde(skip_serializing_if = "Option::is_none")] + pub system: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub top_p: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub stream: Option, + #[serde(flatten)] + pub extra: Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum SystemPrompt { + Text(String), + Blocks(Vec), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnthropicMessage { + pub role: String, + pub content: AnthropicContent, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum AnthropicContent { + Text(String), + Blocks(Vec), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ContentBlock { + #[serde(rename = "type")] + pub block_type: String, + #[serde(flatten)] + pub data: Value, +} + +// ── Messages response ──────────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MessagesResponse { + pub id: String, + #[serde(rename = "type")] + pub response_type: String, + pub role: String, + pub content: Vec, + pub model: String, + pub stop_reason: Option, + pub usage: AnthropicUsage, + #[serde(flatten)] + pub extra: Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnthropicUsage { + pub input_tokens: u64, + pub output_tokens: u64, +} + +// ── Streaming events ───────────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StreamEvent { + #[serde(rename = "type")] + pub event_type: String, + #[serde(flatten)] + pub data: Value, +} diff --git a/crates/cortex-core/src/config.rs b/crates/cortex-core/src/config.rs new file mode 100644 index 0000000..97f6f83 --- /dev/null +++ b/crates/cortex-core/src/config.rs @@ -0,0 +1,79 @@ +use figment::{ + Figment, + providers::{Env, Format, Toml}, +}; +use serde::{Deserialize, Serialize}; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GatewayConfig { + pub gateway: GatewaySettings, + pub eviction: EvictionSettings, + pub nodes: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GatewaySettings { + /// Address to listen on for API requests (e.g. "0.0.0.0:8000") + pub listen: String, + /// Address to listen on for Prometheus metrics (e.g. "0.0.0.0:9100") + pub metrics_listen: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EvictionSettings { + /// Eviction strategy: "lru" or "priority" + pub strategy: EvictionStrategy, + /// Restart the mistralrs process after this many load/unload cycles + /// to reclaim fragmented VRAM. 0 = never. + #[serde(default)] + pub defrag_after_cycles: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum EvictionStrategy { + Lru, + Priority, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NodeConfig { + /// Human-readable node name (e.g. "gpu-large") + pub name: String, + /// Base URL of the mistralrs HTTP server (e.g. "http://gpu-large.internal:8080") + pub endpoint: String, + /// Total VRAM in MB across all GPUs on this node + pub vram_mb: u64, + /// Model IDs that should never be evicted from this node + #[serde(default)] + pub pinned: Vec, +} + +impl GatewayConfig { + /// Load configuration from a TOML file, with environment variable overrides. + /// Env vars are prefixed with `CORTEX_` and use `__` as a separator + /// (e.g. `CORTEX_GATEWAY__LISTEN=0.0.0.0:9000`). + pub fn load(path: impl AsRef) -> Result { + Figment::new() + .merge(Toml::file(path)) + .merge(Env::prefixed("CORTEX_").split("__")) + .extract() + } +} + +impl Default for GatewayConfig { + fn default() -> Self { + Self { + gateway: GatewaySettings { + listen: "0.0.0.0:8000".into(), + metrics_listen: "0.0.0.0:9100".into(), + }, + eviction: EvictionSettings { + strategy: EvictionStrategy::Lru, + defrag_after_cycles: 50, + }, + nodes: vec![], + } + } +} diff --git a/crates/cortex-core/src/lib.rs b/crates/cortex-core/src/lib.rs new file mode 100644 index 0000000..d54bc3e --- /dev/null +++ b/crates/cortex-core/src/lib.rs @@ -0,0 +1,6 @@ +pub mod anthropic; +pub mod config; +pub mod metrics; +pub mod node; +pub mod openai; +pub mod translate; diff --git a/crates/cortex-core/src/metrics.rs b/crates/cortex-core/src/metrics.rs new file mode 100644 index 0000000..4755520 --- /dev/null +++ b/crates/cortex-core/src/metrics.rs @@ -0,0 +1,23 @@ +//! Request-level metrics captured by the gateway proxy layer. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +/// Metrics captured for a single proxied request. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RequestMetrics { + pub timestamp: DateTime, + pub model: String, + pub node: String, + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, + /// Tokens per second for the generation phase. + pub tok_per_sec: f64, + /// Time from request start to first SSE chunk (streaming) or full response. + pub time_to_first_token_ms: u64, + /// Total request latency including proxy overhead. + pub total_latency_ms: u64, + /// Whether this request triggered a model load (cold start). + pub cold_start: bool, +} diff --git a/crates/cortex-core/src/node.rs b/crates/cortex-core/src/node.rs new file mode 100644 index 0000000..56df64e --- /dev/null +++ b/crates/cortex-core/src/node.rs @@ -0,0 +1,74 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Runtime state of a single node in the fleet. +#[derive(Debug, Clone)] +pub struct NodeState { + pub name: String, + pub endpoint: String, + pub vram_mb: u64, + pub pinned: Vec, + pub healthy: bool, + pub models: HashMap, + /// Number of load/unload cycles since last process restart. + pub lifecycle_cycles: u32, + pub last_poll: Option>, +} + +/// A model registered on a node, with its runtime status. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModelEntry { + pub id: String, + pub status: ModelStatus, + /// When this model was last used (for LRU eviction). + pub last_accessed: Option>, + /// Estimated VRAM usage in MB when loaded. + pub vram_estimate_mb: Option, +} + +/// Model lifecycle status, matching the mistral.rs API. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum ModelStatus { + Loaded, + Unloaded, + Reloading, +} + +/// Unified model entry as exposed by the gateway's `/v1/models` endpoint. +/// Includes which node(s) host this model and their status. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CortexModelEntry { + pub id: String, + pub object: String, + /// Which nodes have this model (and their status). + pub locations: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModelLocation { + pub node: String, + pub status: ModelStatus, + pub vram_estimate_mb: Option, +} + +/// Response from mistral.rs `GET /v1/models`. +/// This is the upstream format we parse when polling nodes. +#[derive(Debug, Clone, Deserialize)] +pub struct MistralModelsResponse { + pub data: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct MistralModelEntry { + pub id: String, + #[serde(default)] + pub status: Option, +} + +/// Request body for mistral.rs model lifecycle endpoints. +#[derive(Debug, Clone, Serialize)] +pub struct ModelLifecycleRequest { + pub model_id: String, +} diff --git a/crates/cortex-core/src/openai.rs b/crates/cortex-core/src/openai.rs new file mode 100644 index 0000000..efdf7de --- /dev/null +++ b/crates/cortex-core/src/openai.rs @@ -0,0 +1,122 @@ +//! OpenAI-compatible request and response types. +//! +//! These are a subset sufficient for chat completions (streaming + non-streaming). +//! Fields not relevant to proxying are captured as `serde_json::Value` via +//! `#[serde(flatten)]` so we forward them without needing to enumerate every +//! extension field mistral.rs supports. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +// ── Chat completion request ────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatCompletionRequest { + pub model: String, + pub messages: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub top_p: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_tokens: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub stream: Option, + /// All other fields (tools, response_format, mistral.rs extensions, etc.) + #[serde(flatten)] + pub extra: Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatMessage { + pub role: String, + pub content: MessageContent, + #[serde(flatten)] + pub extra: Value, +} + +/// Content can be a simple string or an array of content parts (for vision). +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum MessageContent { + Text(String), + Parts(Vec), +} + +// ── Chat completion response (non-streaming) ───────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatCompletionResponse { + pub id: String, + pub object: String, + pub created: u64, + pub model: String, + pub choices: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub usage: Option, + #[serde(flatten)] + pub extra: Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatCompletionChoice { + pub index: u32, + pub message: ChatMessage, + pub finish_reason: Option, + #[serde(flatten)] + pub extra: Value, +} + +// ── Streaming chunk ────────────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatCompletionChunk { + pub id: String, + pub object: String, + pub created: u64, + pub model: String, + pub choices: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub usage: Option, + #[serde(flatten)] + pub extra: Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChunkChoice { + pub index: u32, + pub delta: Value, + pub finish_reason: Option, + #[serde(flatten)] + pub extra: Value, +} + +// ── Usage ──────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Usage { + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, +} + +// ── Models list response ───────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModelsResponse { + pub object: String, + pub data: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModelObject { + pub id: String, + pub object: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub owned_by: Option, + /// Gateway extensions: which node(s) host this model. + #[serde(skip_serializing_if = "Option::is_none")] + pub locations: Option>, + #[serde(flatten)] + pub extra: Value, +} diff --git a/crates/cortex-core/src/translate.rs b/crates/cortex-core/src/translate.rs new file mode 100644 index 0000000..2d45a65 --- /dev/null +++ b/crates/cortex-core/src/translate.rs @@ -0,0 +1,114 @@ +//! Translation between OpenAI and Anthropic request/response envelopes. +//! +//! This is a stateless transformation — no context is carried between requests. + +use crate::anthropic::{ + AnthropicContent, AnthropicMessage, AnthropicUsage, ContentBlock, MessagesRequest, + MessagesResponse, SystemPrompt, +}; +use crate::openai::{ + ChatCompletionChoice, ChatCompletionRequest, ChatCompletionResponse, ChatMessage, Usage, + MessageContent, +}; +use serde_json::{json, Value}; + +/// Convert an Anthropic Messages request into an OpenAI ChatCompletion request. +pub fn anthropic_to_openai(req: MessagesRequest) -> ChatCompletionRequest { + let mut messages = Vec::new(); + + // Anthropic `system` field becomes a system message. + if let Some(system) = req.system { + let content = match system { + SystemPrompt::Text(t) => t, + SystemPrompt::Blocks(blocks) => serde_json::to_string(&blocks).unwrap_or_default(), + }; + messages.push(ChatMessage { + role: "system".into(), + content: MessageContent::Text(content), + extra: Value::Null, + }); + } + + // Convert message roles and content. + for msg in req.messages { + let content = match msg.content { + AnthropicContent::Text(t) => MessageContent::Text(t), + AnthropicContent::Blocks(blocks) => { + // For simple text-only blocks, extract the text. + // For mixed content (images, etc.), pass as parts. + if blocks.len() == 1 && blocks[0].block_type == "text" { + let text = blocks[0] + .data + .get("text") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + MessageContent::Text(text) + } else { + MessageContent::Parts( + blocks.into_iter().map(|b| json!(b)).collect(), + ) + } + } + }; + messages.push(ChatMessage { + role: msg.role, + content, + extra: Value::Null, + }); + } + + ChatCompletionRequest { + model: req.model, + messages, + temperature: req.temperature, + top_p: req.top_p, + max_tokens: Some(req.max_tokens), + stream: req.stream, + extra: req.extra, + } +} + +/// Convert an OpenAI ChatCompletion response into an Anthropic Messages response. +pub fn openai_to_anthropic(resp: ChatCompletionResponse) -> MessagesResponse { + let choice = resp.choices.into_iter().next(); + + let (content_text, stop_reason) = match choice { + Some(c) => { + let text = match c.message.content { + MessageContent::Text(t) => t, + MessageContent::Parts(parts) => serde_json::to_string(&parts).unwrap_or_default(), + }; + let stop = c.finish_reason.map(|r| match r.as_str() { + "stop" => "end_turn".to_string(), + "length" => "max_tokens".to_string(), + other => other.to_string(), + }); + (text, stop) + } + None => (String::new(), None), + }; + + let usage = resp.usage.unwrap_or(Usage { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + }); + + MessagesResponse { + id: resp.id, + response_type: "message".into(), + role: "assistant".into(), + content: vec![ContentBlock { + block_type: "text".into(), + data: json!({ "text": content_text }), + }], + model: resp.model, + stop_reason, + usage: AnthropicUsage { + input_tokens: usage.prompt_tokens, + output_tokens: usage.completion_tokens, + }, + extra: Value::Null, + } +} diff --git a/crates/cortex-gateway/Cargo.toml b/crates/cortex-gateway/Cargo.toml new file mode 100644 index 0000000..7515467 --- /dev/null +++ b/crates/cortex-gateway/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "cortex-gateway" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +cortex-core.workspace = true +tokio.workspace = true +axum.workspace = true +tower.workspace = true +tower-http.workspace = true +serde.workspace = true +serde_json.workspace = true +reqwest.workspace = true +tracing.workspace = true +metrics.workspace = true +metrics-exporter-prometheus.workspace = true +chrono.workspace = true +anyhow.workspace = true +thiserror.workspace = true +futures.workspace = true +tokio-stream.workspace = true +eventsource-stream.workspace = true +bytes = "1" diff --git a/crates/cortex-gateway/src/evictor.rs b/crates/cortex-gateway/src/evictor.rs new file mode 100644 index 0000000..66989d8 --- /dev/null +++ b/crates/cortex-gateway/src/evictor.rs @@ -0,0 +1,106 @@ +//! Model eviction logic. +//! +//! The evictor runs as a background task. When the router determines that a +//! model needs to be loaded on a node but VRAM is tight, it can request +//! eviction via a channel. The evictor then: +//! 1. Identifies the LRU model on that node (excluding pinned models) +//! 2. Calls `POST /v1/models/unload` on the node +//! 3. Increments the lifecycle cycle counter (for defrag tracking) + +use crate::state::CortexState; +use cortex_core::node::{ModelLifecycleRequest, ModelStatus}; +use std::sync::Arc; +use std::time::Duration; + +/// Runs forever. Currently a placeholder that periodically checks for +/// eviction opportunities. In the future, this will be driven by a +/// channel from the router when VRAM pressure is detected. +pub async fn eviction_loop(fleet: Arc) { + // TODO: Replace this polling approach with a channel-driven design + // where the router sends eviction requests when it detects that a + // model load would exceed available VRAM. + loop { + tokio::time::sleep(Duration::from_secs(30)).await; + // Placeholder: the actual eviction logic is in `evict_lru_on_node`, + // called on demand by the router. + let _ = &fleet; // suppress unused warning + } +} + +/// Evict the least-recently-used model on a given node. +/// Returns the model ID that was evicted, or None if nothing could be evicted. +pub async fn evict_lru_on_node( + fleet: &CortexState, + node_name: &str, +) -> anyhow::Result> { + let (endpoint, candidate) = { + let nodes = fleet.nodes.read().await; + let Some(node) = nodes.get(node_name) else { + anyhow::bail!("node '{node_name}' not found"); + }; + + // Find the loaded model with the oldest last_accessed, excluding pinned. + let candidate = node + .models + .values() + .filter(|m| m.status == ModelStatus::Loaded) + .filter(|m| !node.pinned.contains(&m.id)) + .min_by_key(|m| m.last_accessed) + .map(|m| m.id.clone()); + + (node.endpoint.clone(), candidate) + }; + + let Some(model_id) = candidate else { + tracing::info!(node = node_name, "no evictable models found"); + return Ok(None); + }; + + tracing::info!(node = node_name, model = %model_id, "evicting model"); + + let url = format!("{endpoint}/v1/models/unload"); + let resp = fleet + .http_client + .post(&url) + .json(&ModelLifecycleRequest { + model_id: model_id.clone(), + }) + .send() + .await?; + + if resp.status().is_success() { + // Update local state. + let mut nodes = fleet.nodes.write().await; + if let Some(node) = nodes.get_mut(node_name) { + if let Some(entry) = node.models.get_mut(&model_id) { + entry.status = ModelStatus::Unloaded; + } + node.lifecycle_cycles += 1; + + // Check if we should flag for defrag. + if fleet.eviction.defrag_after_cycles > 0 + && node.lifecycle_cycles >= fleet.eviction.defrag_after_cycles + { + tracing::warn!( + node = node_name, + cycles = node.lifecycle_cycles, + "VRAM fragmentation threshold reached — consider restarting mistralrs" + ); + } + } + + tracing::info!(node = node_name, model = %model_id, "model evicted"); + Ok(Some(model_id)) + } else { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + tracing::error!( + node = node_name, + model = %model_id, + status = %status, + body = %body, + "failed to evict model" + ); + anyhow::bail!("eviction failed: {status} {body}"); + } +} diff --git a/crates/cortex-gateway/src/handlers.rs b/crates/cortex-gateway/src/handlers.rs new file mode 100644 index 0000000..d24ae9b --- /dev/null +++ b/crates/cortex-gateway/src/handlers.rs @@ -0,0 +1,207 @@ +//! Axum HTTP handlers for the gateway API surface. + +use crate::proxy; +use crate::router; +use crate::state::CortexState; +use axum::body::Bytes; +use axum::extract::State; +use axum::http::HeaderMap; +use axum::response::{IntoResponse, Json, Response}; +use axum::routing::{get, post}; +use axum::Router; +use cortex_core::node::{CortexModelEntry, ModelLocation}; +use cortex_core::openai::ChatCompletionRequest; +use serde_json::{json, Value}; +use std::sync::Arc; + +pub fn api_routes() -> Router> { + Router::new() + .route("/v1/chat/completions", post(chat_completions)) + .route("/v1/completions", post(completions)) + .route("/v1/models", get(list_models)) + .route("/v1/messages", post(anthropic_messages)) + .route("/health", get(health)) + .route("/", get(health)) +} + +/// `POST /v1/chat/completions` — proxy to the appropriate backend node. +async fn chat_completions( + State(fleet): State>, + headers: HeaderMap, + body: Bytes, +) -> Response { + let model_id = match extract_model(&body) { + Some(m) => m, + None => return error_response(400, "missing 'model' field in request body"), + }; + + let route = match router::resolve(&fleet, &model_id).await { + Ok(r) => r, + Err(e) => return error_response(404, &e.to_string()), + }; + + match proxy::forward_request(&fleet.http_client, &route, "/v1/chat/completions", headers, body) + .await + { + Ok(resp) => resp, + Err(e) => e.into_response(), + } +} + +/// `POST /v1/completions` — proxy completions endpoint. +async fn completions( + State(fleet): State>, + headers: HeaderMap, + body: Bytes, +) -> Response { + let model_id = match extract_model(&body) { + Some(m) => m, + None => return error_response(400, "missing 'model' field in request body"), + }; + + let route = match router::resolve(&fleet, &model_id).await { + Ok(r) => r, + Err(e) => return error_response(404, &e.to_string()), + }; + + match proxy::forward_request(&fleet.http_client, &route, "/v1/completions", headers, body) + .await + { + Ok(resp) => resp, + Err(e) => e.into_response(), + } +} + +/// `POST /v1/messages` — accept Anthropic format, translate, proxy, translate back. +async fn anthropic_messages( + State(fleet): State>, + headers: HeaderMap, + body: Bytes, +) -> Response { + // Parse as Anthropic request. + let anth_req: cortex_core::anthropic::MessagesRequest = match serde_json::from_slice(&body) { + Ok(r) => r, + Err(e) => return error_response(400, &format!("invalid Anthropic request: {e}")), + }; + + let model_id = anth_req.model.clone(); + let is_streaming = anth_req.stream.unwrap_or(false); + + // Translate to OpenAI format. + let openai_req = cortex_core::translate::anthropic_to_openai(anth_req); + let openai_body = match serde_json::to_vec(&openai_req) { + Ok(b) => Bytes::from(b), + Err(e) => return error_response(500, &format!("translation error: {e}")), + }; + + let route = match router::resolve(&fleet, &model_id).await { + Ok(r) => r, + Err(e) => return error_response(404, &e.to_string()), + }; + + if is_streaming { + // TODO: streaming Anthropic translation requires converting SSE format. + // For now, proxy the OpenAI SSE stream directly (clients that can handle + // OpenAI SSE will work; full Anthropic SSE translation is a follow-up). + match proxy::forward_request( + &fleet.http_client, + &route, + "/v1/chat/completions", + headers, + openai_body, + ) + .await + { + Ok(resp) => resp, + Err(e) => e.into_response(), + } + } else { + // Non-streaming: proxy, await full response, translate back. + match proxy::forward_request( + &fleet.http_client, + &route, + "/v1/chat/completions", + headers, + openai_body, + ) + .await + { + Ok(resp) => { + // TODO: buffer response, parse as OpenAI ChatCompletionResponse, + // translate to Anthropic MessagesResponse. + // For now, return the OpenAI response as-is. + resp + } + Err(e) => e.into_response(), + } + } +} + +/// `GET /v1/models` — aggregate models from all nodes. +async fn list_models(State(fleet): State>) -> Json { + let nodes = fleet.nodes.read().await; + let mut model_map: std::collections::HashMap = + std::collections::HashMap::new(); + + for node in nodes.values() { + for (model_id, entry) in &node.models { + let location = ModelLocation { + node: node.name.clone(), + status: entry.status, + vram_estimate_mb: entry.vram_estimate_mb, + }; + model_map + .entry(model_id.clone()) + .and_modify(|e| e.locations.push(location.clone())) + .or_insert_with(|| CortexModelEntry { + id: model_id.clone(), + object: "model".into(), + locations: vec![location], + }); + } + } + + let data: Vec = model_map + .values() + .map(|e| json!(e)) + .collect(); + + Json(json!({ + "object": "list", + "data": data, + })) +} + +/// `GET /health` +async fn health(State(fleet): State>) -> Json { + let nodes = fleet.nodes.read().await; + let healthy_count = nodes.values().filter(|n| n.healthy).count(); + let total_count = nodes.len(); + + Json(json!({ + "status": if healthy_count > 0 { "ok" } else { "degraded" }, + "nodes": { + "healthy": healthy_count, + "total": total_count, + } + })) +} + +// ── Helpers ────────────────────────────────────────────────────────── + +fn extract_model(body: &[u8]) -> Option { + let v: Value = serde_json::from_slice(body).ok()?; + v.get("model")?.as_str().map(|s| s.to_string()) +} + +fn error_response(status: u16, message: &str) -> Response { + let code = axum::http::StatusCode::from_u16(status) + .unwrap_or(axum::http::StatusCode::INTERNAL_SERVER_ERROR); + let body = json!({ + "error": { + "message": message, + "type": "gateway_error", + } + }); + (code, Json(body)).into_response() +} diff --git a/crates/cortex-gateway/src/lib.rs b/crates/cortex-gateway/src/lib.rs new file mode 100644 index 0000000..5e5b7c6 --- /dev/null +++ b/crates/cortex-gateway/src/lib.rs @@ -0,0 +1,51 @@ +pub mod evictor; +pub mod handlers; +pub mod metrics; +pub mod poller; +pub mod proxy; +pub mod router; +pub mod state; + +use anyhow::Result; +use axum::Router; +use cortex_core::config::GatewayConfig; +use std::sync::Arc; +use tower_http::cors::CorsLayer; +use tower_http::trace::TraceLayer; + +/// Build the Axum application router with all routes wired up. +pub fn build_app(fleet: Arc) -> Router { + Router::new() + .merge(handlers::api_routes()) + .layer(CorsLayer::permissive()) + .layer(TraceLayer::new_for_http()) + .with_state(fleet) +} + +/// Start the gateway: build state from config, spawn background tasks, +/// bind the HTTP server. +pub async fn run(config: GatewayConfig) -> Result<()> { + let fleet = Arc::new(state::CortexState::from_config(&config)); + + // Spawn the background poller that refreshes node/model status. + let poller_fleet = Arc::clone(&fleet); + tokio::spawn(async move { + poller::poll_loop(poller_fleet).await; + }); + + // Spawn the evictor (reacts to VRAM pressure events from the router). + let evictor_fleet = Arc::clone(&fleet); + tokio::spawn(async move { + evictor::eviction_loop(evictor_fleet).await; + }); + + let app = build_app(Arc::clone(&fleet)); + + let listen_addr = config.gateway.listen.parse::()?; + tracing::info!("cortex listening on {listen_addr}"); + + let listener = tokio::net::TcpListener::bind(listen_addr).await?; + axum::serve(listener, app).await?; + + Ok(()) +} diff --git a/crates/cortex-gateway/src/metrics.rs b/crates/cortex-gateway/src/metrics.rs new file mode 100644 index 0000000..cf960a8 --- /dev/null +++ b/crates/cortex-gateway/src/metrics.rs @@ -0,0 +1,55 @@ +//! Prometheus metrics exporter. +//! +//! Runs on a separate port from the main API, exposing `/metrics` +//! in Prometheus text format. + +use anyhow::Result; +use metrics_exporter_prometheus::PrometheusBuilder; +use std::net::SocketAddr; + +/// Install the Prometheus metrics recorder and return a handle. +/// The `/metrics` endpoint is served by the exporter's built-in HTTP server. +pub fn install(listen: &str) -> Result<()> { + let addr: SocketAddr = listen.parse()?; + + PrometheusBuilder::new() + .with_http_listener(addr) + .install() + .map_err(|e| anyhow::anyhow!("failed to install Prometheus exporter: {e}"))?; + + tracing::info!("prometheus metrics exporter on {addr}"); + + // Register histograms and counters used by the proxy layer. + // The `metrics` crate lazily creates metrics on first use, but + // describing them up front gives Prometheus proper HELP/TYPE lines. + metrics::describe_histogram!( + "cortex_request_duration_seconds", + "Total request latency in seconds" + ); + metrics::describe_histogram!( + "cortex_time_to_first_token_seconds", + "Time to first token in seconds" + ); + metrics::describe_histogram!( + "cortex_tokens_per_second", + "Generation throughput in tokens per second" + ); + metrics::describe_counter!( + "cortex_requests_total", + "Total number of proxied requests" + ); + metrics::describe_counter!( + "cortex_request_errors_total", + "Total number of failed proxy requests" + ); + metrics::describe_counter!( + "cortex_evictions_total", + "Total number of model evictions" + ); + metrics::describe_counter!( + "cortex_cold_starts_total", + "Total number of cold-start model loads" + ); + + Ok(()) +} diff --git a/crates/cortex-gateway/src/poller.rs b/crates/cortex-gateway/src/poller.rs new file mode 100644 index 0000000..baa9566 --- /dev/null +++ b/crates/cortex-gateway/src/poller.rs @@ -0,0 +1,103 @@ +//! Background poller that periodically queries each node's `/v1/models` +//! endpoint to refresh the fleet state. + +use crate::state::CortexState; +use chrono::Utc; +use cortex_core::node::{MistralModelsResponse, ModelEntry, ModelStatus}; +use std::sync::Arc; +use std::time::Duration; + +const POLL_INTERVAL: Duration = Duration::from_secs(10); + +/// Runs forever, polling all nodes on a fixed interval. +pub async fn poll_loop(fleet: Arc) { + loop { + for nc in &fleet.node_configs { + poll_node(&fleet, &nc.name, &nc.endpoint).await; + } + tokio::time::sleep(POLL_INTERVAL).await; + } +} + +async fn poll_node(fleet: &CortexState, name: &str, endpoint: &str) { + let url = format!("{endpoint}/v1/models"); + + let result = fleet + .http_client + .get(&url) + .timeout(Duration::from_secs(5)) + .send() + .await; + + let mut nodes = fleet.nodes.write().await; + let Some(node) = nodes.get_mut(name) else { + return; + }; + + match result { + Ok(resp) if resp.status().is_success() => { + match resp.json::().await { + Ok(models_resp) => { + // Merge upstream model list into our state, preserving + // our local metadata (last_accessed, vram_estimate). + let mut seen = std::collections::HashSet::new(); + for upstream in &models_resp.data { + seen.insert(upstream.id.clone()); + let status = parse_status(upstream.status.as_deref()); + + node.models + .entry(upstream.id.clone()) + .and_modify(|e| { + e.status = status; + }) + .or_insert_with(|| ModelEntry { + id: upstream.id.clone(), + status, + last_accessed: None, + vram_estimate_mb: None, + }); + } + + // Remove models that are no longer reported by the node + // (e.g. after a config change / restart). + node.models.retain(|id, _| seen.contains(id)); + + node.healthy = true; + node.last_poll = Some(Utc::now()); + tracing::debug!( + node = name, + models = models_resp.data.len(), + "poll ok" + ); + } + Err(e) => { + tracing::warn!(node = name, error = %e, "failed to parse /v1/models response"); + node.healthy = false; + } + } + } + Ok(resp) => { + tracing::warn!( + node = name, + status = %resp.status(), + "node returned non-success status" + ); + node.healthy = false; + } + Err(e) => { + tracing::warn!(node = name, error = %e, "failed to reach node"); + node.healthy = false; + } + } +} + +fn parse_status(s: Option<&str>) -> ModelStatus { + match s { + Some("loaded") => ModelStatus::Loaded, + Some("unloaded") => ModelStatus::Unloaded, + Some("reloading") => ModelStatus::Reloading, + // If the status field is absent, assume loaded (older mistral.rs versions + // may not include it). + _ => ModelStatus::Loaded, + } +} diff --git a/crates/cortex-gateway/src/proxy.rs b/crates/cortex-gateway/src/proxy.rs new file mode 100644 index 0000000..41c22f5 --- /dev/null +++ b/crates/cortex-gateway/src/proxy.rs @@ -0,0 +1,82 @@ +//! Streaming HTTP reverse proxy to mistral.rs backends. +//! +//! For streaming requests, SSE chunks are forwarded as they arrive. +//! The proxy captures timing information for metrics but does not +//! buffer the full response. + +use crate::router::RouteDecision; +use anyhow::Result; +use axum::body::Body; +use axum::http::{HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Response}; +use reqwest::Client; + +/// Proxy a request body to the resolved backend node and stream the response. +pub async fn forward_request( + client: &Client, + route: &RouteDecision, + path: &str, + headers: HeaderMap, + body: bytes::Bytes, +) -> Result { + let url = format!("{}{}", route.endpoint, path); + tracing::info!( + node = %route.node_name, + url = %url, + cold_start = route.cold_start, + "proxying request" + ); + + let mut req_builder = client.post(&url).body(body); + + // Forward relevant headers. + for (key, value) in headers.iter() { + if key == "host" || key == "content-length" { + continue; // reqwest sets these + } + req_builder = req_builder.header(key, value); + } + + let upstream_resp = req_builder.send().await.map_err(ProxyError::Upstream)?; + + let status = StatusCode::from_u16(upstream_resp.status().as_u16()) + .unwrap_or(StatusCode::BAD_GATEWAY); + + let resp_headers = upstream_resp.headers().clone(); + let stream = upstream_resp.bytes_stream(); + + let body = Body::from_stream(stream); + + let mut response = Response::builder().status(status); + for (key, value) in resp_headers.iter() { + response = response.header(key, value); + } + + response + .body(body) + .map_err(|e| ProxyError::ResponseBuild(e.to_string())) +} + +#[derive(Debug, thiserror::Error)] +pub enum ProxyError { + #[error("upstream request failed: {0}")] + Upstream(reqwest::Error), + #[error("failed to build response: {0}")] + ResponseBuild(String), +} + +impl IntoResponse for ProxyError { + fn into_response(self) -> Response { + let status = match &self { + ProxyError::Upstream(_) => StatusCode::BAD_GATEWAY, + ProxyError::ResponseBuild(_) => StatusCode::INTERNAL_SERVER_ERROR, + }; + let body = serde_json::json!({ + "error": { + "message": self.to_string(), + "type": "proxy_error", + } + }); + (status, axum::Json(body)).into_response() + } +} diff --git a/crates/cortex-gateway/src/router.rs b/crates/cortex-gateway/src/router.rs new file mode 100644 index 0000000..8af3fbc --- /dev/null +++ b/crates/cortex-gateway/src/router.rs @@ -0,0 +1,74 @@ +//! Model-to-node routing logic. +//! +//! Given a model ID from an inbound request, determine which node should +//! handle it. Priority: +//! 1. Node where the model is currently `Loaded` +//! 2. Node where the model is `Unloaded` (will lazy-load on request) +//! 3. Error: model not found on any node + +use crate::state::CortexState; +use cortex_core::node::ModelStatus; +use std::sync::Arc; + +/// The routing decision: which node endpoint to proxy the request to. +#[derive(Debug, Clone)] +pub struct RouteDecision { + pub node_name: String, + pub endpoint: String, + /// Whether the model will need to load (cold start). + pub cold_start: bool, +} + +#[derive(Debug, thiserror::Error)] +pub enum RouteError { + #[error("model '{0}' not found on any node")] + ModelNotFound(String), + #[error("no healthy nodes available")] + NoHealthyNodes, +} + +/// Resolve which node should serve a request for the given model. +pub async fn resolve(fleet: &Arc, model_id: &str) -> Result { + let nodes = fleet.nodes.read().await; + + // Pass 1: find a node where the model is already loaded. + let mut loaded_candidate = None; + let mut unloaded_candidate = None; + + for node in nodes.values() { + if !node.healthy { + continue; + } + if let Some(entry) = node.models.get(model_id) { + match entry.status { + ModelStatus::Loaded | ModelStatus::Reloading => { + loaded_candidate = Some(RouteDecision { + node_name: node.name.clone(), + endpoint: node.endpoint.clone(), + cold_start: false, + }); + break; // loaded is best, stop searching + } + ModelStatus::Unloaded => { + if unloaded_candidate.is_none() { + unloaded_candidate = Some(RouteDecision { + node_name: node.name.clone(), + endpoint: node.endpoint.clone(), + cold_start: true, + }); + } + } + } + } + } + + loaded_candidate + .or(unloaded_candidate) + .ok_or_else(|| { + if nodes.values().any(|n| n.healthy) { + RouteError::ModelNotFound(model_id.to_string()) + } else { + RouteError::NoHealthyNodes + } + }) +} diff --git a/crates/cortex-gateway/src/state.rs b/crates/cortex-gateway/src/state.rs new file mode 100644 index 0000000..0d7aade --- /dev/null +++ b/crates/cortex-gateway/src/state.rs @@ -0,0 +1,43 @@ +use cortex_core::config::{EvictionSettings, GatewayConfig, NodeConfig}; +use cortex_core::node::NodeState; +use std::collections::HashMap; +use tokio::sync::RwLock; + +/// Shared fleet state, protected by a RwLock for concurrent reader access. +pub struct CortexState { + pub nodes: RwLock>, + pub node_configs: Vec, + pub eviction: EvictionSettings, + pub http_client: reqwest::Client, +} + +impl CortexState { + pub fn from_config(config: &GatewayConfig) -> Self { + let mut nodes = HashMap::new(); + for nc in &config.nodes { + nodes.insert( + nc.name.clone(), + NodeState { + name: nc.name.clone(), + endpoint: nc.endpoint.clone(), + vram_mb: nc.vram_mb, + pinned: nc.pinned.clone(), + healthy: false, // will be set by first poll + models: HashMap::new(), + lifecycle_cycles: 0, + last_poll: None, + }, + ); + } + + Self { + nodes: RwLock::new(nodes), + node_configs: config.nodes.clone(), + eviction: config.eviction.clone(), + http_client: reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(300)) + .build() + .expect("failed to build HTTP client"), + } + } +}