ci: add Gitea CI, RPM spec, license, and repo hygiene
- Add .gitea/workflows/ci.yml with fmt/clippy/test on all branches and SRPM build + COPR publish on version tags - Add cortex.spec for Fedora RPM packaging - Add GPL-3.0-or-later LICENSE file - Add cortex.example.toml with generic hostnames; gitignore cortex.toml - Scrub infrastructure-specific hostnames from README.md, CLAUDE.md, and doc comments - Fix unused imports and clippy warnings to pass -D warnings - Fix missing deps (bytes, reqwest, serde_json) exposed during build - Run cargo fmt across workspace - Update SPDX license identifier to GPL-3.0-or-later Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,15 +3,14 @@
|
||||
use crate::proxy;
|
||||
use crate::router;
|
||||
use crate::state::CortexState;
|
||||
use axum::Router;
|
||||
use axum::body::Bytes;
|
||||
use axum::extract::State;
|
||||
use axum::http::HeaderMap;
|
||||
use axum::response::{IntoResponse, Json, Response};
|
||||
use axum::routing::{get, post};
|
||||
use axum::Router;
|
||||
use cortex_core::node::{CortexModelEntry, ModelLocation};
|
||||
use cortex_core::openai::ChatCompletionRequest;
|
||||
use serde_json::{json, Value};
|
||||
use serde_json::{Value, json};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub fn api_routes() -> Router<Arc<CortexState>> {
|
||||
@@ -40,8 +39,14 @@ async fn chat_completions(
|
||||
Err(e) => return error_response(404, &e.to_string()),
|
||||
};
|
||||
|
||||
match proxy::forward_request(&fleet.http_client, &route, "/v1/chat/completions", headers, body)
|
||||
.await
|
||||
match proxy::forward_request(
|
||||
&fleet.http_client,
|
||||
&route,
|
||||
"/v1/chat/completions",
|
||||
headers,
|
||||
body,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(e) => e.into_response(),
|
||||
@@ -64,8 +69,7 @@ async fn completions(
|
||||
Err(e) => return error_response(404, &e.to_string()),
|
||||
};
|
||||
|
||||
match proxy::forward_request(&fleet.http_client, &route, "/v1/completions", headers, body)
|
||||
.await
|
||||
match proxy::forward_request(&fleet.http_client, &route, "/v1/completions", headers, body).await
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(e) => e.into_response(),
|
||||
@@ -161,10 +165,7 @@ async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
|
||||
}
|
||||
}
|
||||
|
||||
let data: Vec<Value> = model_map
|
||||
.values()
|
||||
.map(|e| json!(e))
|
||||
.collect();
|
||||
let data: Vec<Value> = model_map.values().map(|e| json!(e)).collect();
|
||||
|
||||
Json(json!({
|
||||
"object": "list",
|
||||
|
||||
@@ -34,18 +34,12 @@ pub fn install(listen: &str) -> Result<()> {
|
||||
"cortex_tokens_per_second",
|
||||
"Generation throughput in tokens per second"
|
||||
);
|
||||
metrics::describe_counter!(
|
||||
"cortex_requests_total",
|
||||
"Total number of proxied requests"
|
||||
);
|
||||
metrics::describe_counter!("cortex_requests_total", "Total number of proxied requests");
|
||||
metrics::describe_counter!(
|
||||
"cortex_request_errors_total",
|
||||
"Total number of failed proxy requests"
|
||||
);
|
||||
metrics::describe_counter!(
|
||||
"cortex_evictions_total",
|
||||
"Total number of model evictions"
|
||||
);
|
||||
metrics::describe_counter!("cortex_evictions_total", "Total number of model evictions");
|
||||
metrics::describe_counter!(
|
||||
"cortex_cold_starts_total",
|
||||
"Total number of cold-start model loads"
|
||||
|
||||
@@ -64,11 +64,7 @@ async fn poll_node(fleet: &CortexState, name: &str, endpoint: &str) {
|
||||
|
||||
node.healthy = true;
|
||||
node.last_poll = Some(Utc::now());
|
||||
tracing::debug!(
|
||||
node = name,
|
||||
models = models_resp.data.len(),
|
||||
"poll ok"
|
||||
);
|
||||
tracing::debug!(node = name, models = models_resp.data.len(), "poll ok");
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(node = name, error = %e, "failed to parse /v1/models response");
|
||||
|
||||
@@ -39,8 +39,8 @@ pub async fn forward_request(
|
||||
|
||||
let upstream_resp = req_builder.send().await.map_err(ProxyError::Upstream)?;
|
||||
|
||||
let status = StatusCode::from_u16(upstream_resp.status().as_u16())
|
||||
.unwrap_or(StatusCode::BAD_GATEWAY);
|
||||
let status =
|
||||
StatusCode::from_u16(upstream_resp.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY);
|
||||
|
||||
let resp_headers = upstream_resp.headers().clone();
|
||||
let stream = upstream_resp.bytes_stream();
|
||||
|
||||
@@ -28,7 +28,10 @@ pub enum RouteError {
|
||||
}
|
||||
|
||||
/// Resolve which node should serve a request for the given model.
|
||||
pub async fn resolve(fleet: &Arc<CortexState>, model_id: &str) -> Result<RouteDecision, RouteError> {
|
||||
pub async fn resolve(
|
||||
fleet: &Arc<CortexState>,
|
||||
model_id: &str,
|
||||
) -> Result<RouteDecision, RouteError> {
|
||||
let nodes = fleet.nodes.read().await;
|
||||
|
||||
// Pass 1: find a node where the model is already loaded.
|
||||
@@ -62,13 +65,11 @@ pub async fn resolve(fleet: &Arc<CortexState>, model_id: &str) -> Result<RouteDe
|
||||
}
|
||||
}
|
||||
|
||||
loaded_candidate
|
||||
.or(unloaded_candidate)
|
||||
.ok_or_else(|| {
|
||||
if nodes.values().any(|n| n.healthy) {
|
||||
RouteError::ModelNotFound(model_id.to_string())
|
||||
} else {
|
||||
RouteError::NoHealthyNodes
|
||||
}
|
||||
})
|
||||
loaded_candidate.or(unloaded_candidate).ok_or_else(|| {
|
||||
if nodes.values().any(|n| n.healthy) {
|
||||
RouteError::ModelNotFound(model_id.to_string())
|
||||
} else {
|
||||
RouteError::NoHealthyNodes
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user