Stage 1 of the candle-native pivot. Replaces the external-process harness model (mistralrs over HTTP, llamacpp placeholder) with an in-process Harness trait whose sole implementation is candle. The trait keeps its shape so future engines slot in additively, but start/stop default to no-ops and HarnessConfig drops endpoint and systemd_unit since no harness needs external supervision. Behaviour is unchanged on the wire: load_model returns a "not implemented yet (Stage 2)" error and list_models is empty. The gateway-side proxy, poller, and router are untouched. CLAUDE.md Phase 11 (llama.cpp) and Phase 12 (mistral.rs COPR) are marked superseded; the staged plan lives in ~/.claude/plans/create-a-more-aggressive-calm-naur.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
123 lines
4.0 KiB
Rust
123 lines
4.0 KiB
Rust
//! OpenAI-compatible request and response types.
|
|
//!
|
|
//! These are a subset sufficient for chat completions (streaming + non-streaming).
|
|
//! Fields not relevant to proxying are captured as `serde_json::Value` via
|
|
//! `#[serde(flatten)]` so we forward them without needing to enumerate every
|
|
//! extension field a backend might support.
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_json::Value;
|
|
|
|
// ── Chat completion request ──────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ChatCompletionRequest {
|
|
pub model: String,
|
|
pub messages: Vec<ChatMessage>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub temperature: Option<f64>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub top_p: Option<f64>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub max_tokens: Option<u64>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub stream: Option<bool>,
|
|
/// All other fields (tools, response_format, backend extensions, etc.)
|
|
#[serde(flatten)]
|
|
pub extra: Value,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ChatMessage {
|
|
pub role: String,
|
|
pub content: MessageContent,
|
|
#[serde(flatten)]
|
|
pub extra: Value,
|
|
}
|
|
|
|
/// Content can be a simple string or an array of content parts (for vision).
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(untagged)]
|
|
pub enum MessageContent {
|
|
Text(String),
|
|
Parts(Vec<Value>),
|
|
}
|
|
|
|
// ── Chat completion response (non-streaming) ─────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ChatCompletionResponse {
|
|
pub id: String,
|
|
pub object: String,
|
|
pub created: u64,
|
|
pub model: String,
|
|
pub choices: Vec<ChatCompletionChoice>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub usage: Option<Usage>,
|
|
#[serde(flatten)]
|
|
pub extra: Value,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ChatCompletionChoice {
|
|
pub index: u32,
|
|
pub message: ChatMessage,
|
|
pub finish_reason: Option<String>,
|
|
#[serde(flatten)]
|
|
pub extra: Value,
|
|
}
|
|
|
|
// ── Streaming chunk ──────────────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ChatCompletionChunk {
|
|
pub id: String,
|
|
pub object: String,
|
|
pub created: u64,
|
|
pub model: String,
|
|
pub choices: Vec<ChunkChoice>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub usage: Option<Usage>,
|
|
#[serde(flatten)]
|
|
pub extra: Value,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ChunkChoice {
|
|
pub index: u32,
|
|
pub delta: Value,
|
|
pub finish_reason: Option<String>,
|
|
#[serde(flatten)]
|
|
pub extra: Value,
|
|
}
|
|
|
|
// ── Usage ────────────────────────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct Usage {
|
|
pub prompt_tokens: u64,
|
|
pub completion_tokens: u64,
|
|
pub total_tokens: u64,
|
|
}
|
|
|
|
// ── Models list response ─────────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ModelsResponse {
|
|
pub object: String,
|
|
pub data: Vec<ModelObject>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ModelObject {
|
|
pub id: String,
|
|
pub object: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub owned_by: Option<String>,
|
|
/// Gateway extensions: which node(s) host this model.
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub locations: Option<Vec<super::node::ModelLocation>>,
|
|
#[serde(flatten)]
|
|
pub extra: Value,
|
|
}
|