ci: add Gitea CI, RPM spec, license, and repo hygiene
All checks were successful
CI / Format, lint, build, test (push) Successful in 2m15s
CI / Build SRPM (push) Has been skipped
CI / Publish to COPR (push) Has been skipped

- Add .gitea/workflows/ci.yml with fmt/clippy/test on all branches
  and SRPM build + COPR publish on version tags
- Add cortex.spec for Fedora RPM packaging
- Add GPL-3.0-or-later LICENSE file
- Add cortex.example.toml with generic hostnames; gitignore cortex.toml
- Scrub infrastructure-specific hostnames from README.md, CLAUDE.md,
  and doc comments
- Fix unused imports and clippy warnings to pass -D warnings
- Fix missing deps (bytes, reqwest, serde_json) exposed during build
- Run cargo fmt across workspace
- Update SPDX license identifier to GPL-3.0-or-later

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 18:24:04 +03:00
parent 0da68833af
commit 6bb3004cfc
12 changed files with 860 additions and 49 deletions

View File

@@ -42,9 +42,8 @@ async fn main() -> Result<()> {
match cli.command {
Commands::Serve { config } => {
let cfg = GatewayConfig::load(&config).map_err(|e| {
anyhow::anyhow!("failed to load config from '{config}': {e}")
})?;
let cfg = GatewayConfig::load(&config)
.map_err(|e| anyhow::anyhow!("failed to load config from '{config}': {e}"))?;
tracing::info!(
nodes = cfg.nodes.len(),

View File

@@ -54,11 +54,12 @@ impl GatewayConfig {
/// Load configuration from a TOML file, with environment variable overrides.
/// Env vars are prefixed with `CORTEX_` and use `__` as a separator
/// (e.g. `CORTEX_GATEWAY__LISTEN=0.0.0.0:9000`).
pub fn load(path: impl AsRef<Path>) -> Result<Self, figment::Error> {
pub fn load(path: impl AsRef<Path>) -> Result<Self, Box<figment::Error>> {
Figment::new()
.merge(Toml::file(path))
.merge(Env::prefixed("CORTEX_").split("__"))
.extract()
.map_err(Box::new)
}
}

View File

@@ -3,14 +3,12 @@
//! This is a stateless transformation — no context is carried between requests.
use crate::anthropic::{
AnthropicContent, AnthropicMessage, AnthropicUsage, ContentBlock, MessagesRequest,
MessagesResponse, SystemPrompt,
AnthropicContent, AnthropicUsage, ContentBlock, MessagesRequest, MessagesResponse, SystemPrompt,
};
use crate::openai::{
ChatCompletionChoice, ChatCompletionRequest, ChatCompletionResponse, ChatMessage, Usage,
MessageContent,
ChatCompletionRequest, ChatCompletionResponse, ChatMessage, MessageContent, Usage,
};
use serde_json::{json, Value};
use serde_json::{Value, json};
/// Convert an Anthropic Messages request into an OpenAI ChatCompletion request.
pub fn anthropic_to_openai(req: MessagesRequest) -> ChatCompletionRequest {
@@ -45,9 +43,7 @@ pub fn anthropic_to_openai(req: MessagesRequest) -> ChatCompletionRequest {
.to_string();
MessageContent::Text(text)
} else {
MessageContent::Parts(
blocks.into_iter().map(|b| json!(b)).collect(),
)
MessageContent::Parts(blocks.into_iter().map(|b| json!(b)).collect())
}
}
};

View File

@@ -3,15 +3,14 @@
use crate::proxy;
use crate::router;
use crate::state::CortexState;
use axum::Router;
use axum::body::Bytes;
use axum::extract::State;
use axum::http::HeaderMap;
use axum::response::{IntoResponse, Json, Response};
use axum::routing::{get, post};
use axum::Router;
use cortex_core::node::{CortexModelEntry, ModelLocation};
use cortex_core::openai::ChatCompletionRequest;
use serde_json::{json, Value};
use serde_json::{Value, json};
use std::sync::Arc;
pub fn api_routes() -> Router<Arc<CortexState>> {
@@ -40,8 +39,14 @@ async fn chat_completions(
Err(e) => return error_response(404, &e.to_string()),
};
match proxy::forward_request(&fleet.http_client, &route, "/v1/chat/completions", headers, body)
.await
match proxy::forward_request(
&fleet.http_client,
&route,
"/v1/chat/completions",
headers,
body,
)
.await
{
Ok(resp) => resp,
Err(e) => e.into_response(),
@@ -64,8 +69,7 @@ async fn completions(
Err(e) => return error_response(404, &e.to_string()),
};
match proxy::forward_request(&fleet.http_client, &route, "/v1/completions", headers, body)
.await
match proxy::forward_request(&fleet.http_client, &route, "/v1/completions", headers, body).await
{
Ok(resp) => resp,
Err(e) => e.into_response(),
@@ -161,10 +165,7 @@ async fn list_models(State(fleet): State<Arc<CortexState>>) -> Json<Value> {
}
}
let data: Vec<Value> = model_map
.values()
.map(|e| json!(e))
.collect();
let data: Vec<Value> = model_map.values().map(|e| json!(e)).collect();
Json(json!({
"object": "list",

View File

@@ -34,18 +34,12 @@ pub fn install(listen: &str) -> Result<()> {
"cortex_tokens_per_second",
"Generation throughput in tokens per second"
);
metrics::describe_counter!(
"cortex_requests_total",
"Total number of proxied requests"
);
metrics::describe_counter!("cortex_requests_total", "Total number of proxied requests");
metrics::describe_counter!(
"cortex_request_errors_total",
"Total number of failed proxy requests"
);
metrics::describe_counter!(
"cortex_evictions_total",
"Total number of model evictions"
);
metrics::describe_counter!("cortex_evictions_total", "Total number of model evictions");
metrics::describe_counter!(
"cortex_cold_starts_total",
"Total number of cold-start model loads"

View File

@@ -64,11 +64,7 @@ async fn poll_node(fleet: &CortexState, name: &str, endpoint: &str) {
node.healthy = true;
node.last_poll = Some(Utc::now());
tracing::debug!(
node = name,
models = models_resp.data.len(),
"poll ok"
);
tracing::debug!(node = name, models = models_resp.data.len(), "poll ok");
}
Err(e) => {
tracing::warn!(node = name, error = %e, "failed to parse /v1/models response");

View File

@@ -39,8 +39,8 @@ pub async fn forward_request(
let upstream_resp = req_builder.send().await.map_err(ProxyError::Upstream)?;
let status = StatusCode::from_u16(upstream_resp.status().as_u16())
.unwrap_or(StatusCode::BAD_GATEWAY);
let status =
StatusCode::from_u16(upstream_resp.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY);
let resp_headers = upstream_resp.headers().clone();
let stream = upstream_resp.bytes_stream();

View File

@@ -28,7 +28,10 @@ pub enum RouteError {
}
/// Resolve which node should serve a request for the given model.
pub async fn resolve(fleet: &Arc<CortexState>, model_id: &str) -> Result<RouteDecision, RouteError> {
pub async fn resolve(
fleet: &Arc<CortexState>,
model_id: &str,
) -> Result<RouteDecision, RouteError> {
let nodes = fleet.nodes.read().await;
// Pass 1: find a node where the model is already loaded.
@@ -62,13 +65,11 @@ pub async fn resolve(fleet: &Arc<CortexState>, model_id: &str) -> Result<RouteDe
}
}
loaded_candidate
.or(unloaded_candidate)
.ok_or_else(|| {
if nodes.values().any(|n| n.healthy) {
RouteError::ModelNotFound(model_id.to_string())
} else {
RouteError::NoHealthyNodes
}
})
loaded_candidate.or(unloaded_candidate).ok_or_else(|| {
if nodes.values().any(|n| n.healthy) {
RouteError::ModelNotFound(model_id.to_string())
} else {
RouteError::NoHealthyNodes
}
})
}