diff --git a/Cargo.lock b/Cargo.lock index 77ddcc7..e6cd5a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2508,6 +2508,16 @@ dependencies = [ "serde_json", ] +[[package]] +name = "minijinja-contrib" +version = "2.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99df5123c54391e2a228014c1dbbd85a3dab08a25e776c810526f2f47542b3de" +dependencies = [ + "minijinja", + "serde", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -2605,6 +2615,7 @@ dependencies = [ "hf-hub", "image", "minijinja", + "minijinja-contrib", "reqwest", "safetensors 0.7.0", "serde", diff --git a/crates/neuron/Cargo.toml b/crates/neuron/Cargo.toml index 1aa576d..b7d3fd6 100644 --- a/crates/neuron/Cargo.toml +++ b/crates/neuron/Cargo.toml @@ -76,15 +76,19 @@ cudarc = { version = "0.19", optional = true, default-features = false, features half = { version = "2.5", optional = true } tokenizers = { version = "0.22", default-features = false, features = ["onig"] } hf-hub = { version = "0.4", features = ["tokio"] } -# Jinja-compatible template renderer for the model's -# `tokenizer_config.json::chat_template`. Hugging Face's chat -# templates use a strict subset of Jinja2 that minijinja supports -# out of the box. ~80KB compiled; pure Rust, no async surface. -# Features: `builtins` for the `is defined` / `default` filters HF -# templates use; `json` for `tojson` (some Qwen3 templates emit -# tool definitions via tojson); `serde` so we can hand it a -# serde_json::Value as the context. +# Jinja-compatible template renderer for the model's chat template +# (standalone `chat_template.jinja` or `tokenizer_config.json::chat_template`). +# Hugging Face's chat templates lean on Python string semantics; we +# bridge them with `minijinja-contrib`'s `pycompat` callback (str +# methods like `startswith`/`split`/`strip`) plus a `raise_exception` +# global. Features: `builtins` for `is defined` / `default`; `json` +# for `tojson`; `serde` so we can hand it a serde_json::Value context. minijinja = { version = "2", features = ["builtins", "json", "serde"] } +# Python-compatibility shim: the Qwen3-VL / Qwen3.6 template uses +# `content.startswith(...)`, `.endswith(...)`, `.split(...)`, +# `.rstrip(...)`, `.lstrip(...)` — Python str methods minijinja doesn't +# implement natively. `pycompat::unknown_method_callback` supplies them. +minijinja-contrib = { version = "2", features = ["pycompat"] } # Direct dep on `safetensors` (re-exported by candle but its `TensorView` # / `slice::IndexOp` types are public-but-not-re-exported). Used by the # tp `fused_load` module to read per-rank slices of fused QKV tensors diff --git a/crates/neuron/src/harness/chat_template.rs b/crates/neuron/src/harness/chat_template.rs index 9a93b38..8e451c1 100644 --- a/crates/neuron/src/harness/chat_template.rs +++ b/crates/neuron/src/harness/chat_template.rs @@ -43,7 +43,7 @@ use anyhow::{Context, Result}; use cortex_core::openai::{ChatMessage, MessageContent}; -use minijinja::Environment; +use minijinja::{Environment, Error as MjError, ErrorKind as MjErrorKind, Value as MjValue}; use serde_json::Value; use std::path::Path; @@ -191,6 +191,25 @@ pub fn render_chat_template( kwargs: &Value, ) -> Result { let mut env = Environment::new(); + + // HF chat templates are authored against Python's Jinja2 with its + // string semantics. Bridge the two so real model templates render: + // + // - `pycompat::unknown_method_callback` supplies Python str/list/dict + // methods minijinja lacks natively (`startswith`, `endswith`, + // `split`, `rstrip`, `lstrip`, …) — the Qwen3.6 template uses + // several in its think-block and tool-response handling. + // - `raise_exception` is the global HF templates call to reject + // malformed inputs (e.g. an image in a system message). Map it to + // a render error so the caller falls back / surfaces it. + env.set_unknown_method_callback(minijinja_contrib::pycompat::unknown_method_callback); + env.add_function( + "raise_exception", + |msg: String| -> Result { + Err(MjError::new(MjErrorKind::InvalidOperation, msg)) + }, + ); + // Compile the template against a fixed name so error messages // surface "chat_template" rather than `