feat: scaffold cortex workspace

Rust reverse-proxy for multi-node mistral.rs inference clusters. Includes crate structure (cortex-core, cortex-gateway, cortex-agent, cortex-cli), config loading, OpenAI/Anthropic translation stubs, model routing, eviction, polling, and streaming proxy scaffolding. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 18:13:30 +03:00
commit 0da68833af
28 changed files with 4659 additions and 0 deletions
--- a/crates/cortex-gateway/src/handlers.rs
+++ b/crates/cortex-gateway/src/handlers.rs
@@ -0,0 +1,207 @@
+//! Axum HTTP handlers for the gateway API surface.
+
+use crate::proxy;
+use crate::router;
+use crate::state::CortexState;
+use axum::body::Bytes;
+use axum::extract::State;
+use axum::http::HeaderMap;
+use axum::response::{IntoResponse, Json, Response};
+use axum::routing::{get, post};
+use axum::Router;
+use cortex_core::node::{CortexModelEntry, ModelLocation};
+use cortex_core::openai::ChatCompletionRequest;
+use serde_json::{json, Value};
+use std::sync::Arc;
+
+pub fn api_routes() -> Router<Arc<CortexState>> {
+    Router::new()
+        .route("/v1/chat/completions", post(chat_completions))
+        .route("/v1/completions", post(completions))
+        .route("/v1/models", get(list_models))
+        .route("/v1/messages", post(anthropic_messages))
+        .route("/health", get(health))
+        .route("/", get(health))
+}
+
+/// `POST /v1/chat/completions` — proxy to the appropriate backend node.
+async fn chat_completions(
+    State(fleet): State<Arc<CortexState>>,
+    headers: HeaderMap,
+    body: Bytes,
+) -> Response {
+    let model_id = match extract_model(&body) {
+        Some(m) => m,
+        None => return error_response(400, "missing 'model' field in request body"),
+    };
+
+    let route = match router::resolve(&fleet, &model_id).await {
+        Ok(r) => r,
+        Err(e) => return error_response(404, &e.to_string()),
+    };
+
+    match proxy::forward_request(&fleet.http_client, &route, "/v1/chat/completions", headers, body)
+        .await
+    {
+        Ok(resp) => resp,
+        Err(e) => e.into_response(),
+    }
+}
+
+/// `POST /v1/completions` — proxy completions endpoint.
+async fn completions(
+    State(fleet): State<Arc<CortexState>>,
+    headers: HeaderMap,
+    body: Bytes,
+) -> Response {
+    let model_id = match extract_model(&body) {
+        Some(m) => m,
+        None => return error_response(400, "missing 'model' field in request body"),
+    };
+
+    let route = match router::resolve(&fleet, &model_id).await {
+        Ok(r) => r,
+        Err(e) => return error_response(404, &e.to_string()),
+    };
+
+    match proxy::forward_request(&fleet.http_client, &route, "/v1/completions", headers, body)
+        .await
+    {
+        Ok(resp) => resp,
+        Err(e) => e.into_response(),
+    }
+}
+
+/// `POST /v1/messages` — accept Anthropic format, translate, proxy, translate back.
+async fn anthropic_messages(
+    State(fleet): State<Arc<CortexState>>,
+    headers: HeaderMap,
+    body: Bytes,
+) -> Response {
+    // Parse as Anthropic request.
+    let anth_req: cortex_core::anthropic::MessagesRequest = match serde_json::from_slice(&body) {
+        Ok(r) => r,
+        Err(e) => return error_response(400, &format!("invalid Anthropic request: {e}")),
+    };
+
+    let model_id = anth_req.model.clone();
+    let is_streaming = anth_req.stream.unwrap_or(false);
+
+    // Translate to OpenAI format.
+    let openai_req = cortex_core::translate::anthropic_to_openai(anth_req);
+    let openai_body = match serde_json::to_vec(&openai_req) {
+        Ok(b) => Bytes::from(b),
+        Err(e) => return error_response(500, &format!("translation error: {e}")),
+    };
+
+    let route = match router::resolve(&fleet, &model_id).await {
+        Ok(r) => r,
+        Err(e) => return error_response(404, &e.to_string()),
+    };
+
+    if is_streaming {
+        // TODO: streaming Anthropic translation requires converting SSE format.
+        // For now, proxy the OpenAI SSE stream directly (clients that can handle
+        // OpenAI SSE will work; full Anthropic SSE translation is a follow-up).
+        match proxy::forward_request(
+            &fleet.http_client,
+            &route,
+            "/v1/chat/completions",
+            headers,
+            openai_body,
+        )
+        .await
+        {
+            Ok(resp) => resp,
+            Err(e) => e.into_response(),
+        }
+    } else {
+        // Non-streaming: proxy, await full response, translate back.
+        match proxy::forward_request(
+            &fleet.http_client,
+            &route,
+            "/v1/chat/completions",
+            headers,
+            openai_body,
+        )
+        .await
+        {
+            Ok(resp) => {
+                // TODO: buffer response, parse as OpenAI ChatCompletionResponse,
+                // translate to Anthropic MessagesResponse.
+                // For now, return the OpenAI response as-is.
+                resp
+            }
+            Err(e) => e.into_response(),
+        }
+    }
+}
+
+/// `GET /v1/models` — aggregate models from all nodes.
+async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
+    let nodes = fleet.nodes.read().await;
+    let mut model_map: std::collections::HashMap<String, CortexModelEntry> =
+        std::collections::HashMap::new();
+
+    for node in nodes.values() {
+        for (model_id, entry) in &node.models {
+            let location = ModelLocation {
+                node: node.name.clone(),
+                status: entry.status,
+                vram_estimate_mb: entry.vram_estimate_mb,
+            };
+            model_map
+                .entry(model_id.clone())
+                .and_modify(|e| e.locations.push(location.clone()))
+                .or_insert_with(|| CortexModelEntry {
+                    id: model_id.clone(),
+                    object: "model".into(),
+                    locations: vec![location],
+                });
+        }
+    }
+
+    let data: Vec<Value> = model_map
+        .values()
+        .map(|e| json!(e))
+        .collect();
+
+    Json(json!({
+        "object": "list",
+        "data": data,
+    }))
+}
+
+/// `GET /health`
+async fn health(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
+    let nodes = fleet.nodes.read().await;
+    let healthy_count = nodes.values().filter(|n| n.healthy).count();
+    let total_count = nodes.len();
+
+    Json(json!({
+        "status": if healthy_count > 0 { "ok" } else { "degraded" },
+        "nodes": {
+            "healthy": healthy_count,
+            "total": total_count,
+        }
+    }))
+}
+
+// ── Helpers ──────────────────────────────────────────────────────────
+
+fn extract_model(body: &[u8]) -> Option<String> {
+    let v: Value = serde_json::from_slice(body).ok()?;
+    v.get("model")?.as_str().map(|s| s.to_string())
+}
+
+fn error_response(status: u16, message: &str) -> Response {
+    let code = axum::http::StatusCode::from_u16(status)
+        .unwrap_or(axum::http::StatusCode::INTERNAL_SERVER_ERROR);
+    let body = json!({
+        "error": {
+            "message": message,
+            "type": "gateway_error",
+        }
+    });
+    (code, Json(body)).into_response()
+}