fix(neuron): load chat_template.jinja (transformers precedence)

The chat-template loader only read the `chat_template` field from
tokenizer_config.json. Qwen3.6-27B ships its vision-aware template
*only* in a standalone `chat_template.jinja` (and has no
tokenizer_config.json at all), so the loader returned None and image
requests fell back to the text-only format_qwen3_prompt — rendering
zero `<|image_pad|>` tokens and tripping
"expand_image_pad_tokens: prompt has 0 image_token_id occurrences".

load_chat_template_alongside now follows HF transformers precedence:
standalone chat_template.jinja → chat_template.json → the
chat_template field in tokenizer_config.json. Tests cover the
precedence, the text-only fallback, and that an OpenAI image_url
content part renders `<|image_pad|>` through the real template
condition (`'image_url' in item`).

Refs #16 / TP-vision.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-04 16:25:30 +03:00
parent 1b0e36c119
commit 7ad56c6a86

View File

@@ -65,12 +65,55 @@ pub fn chat_templates_enabled() -> bool {
} }
} }
/// Convenience: probe for `tokenizer_config.json` in the same /// Probe for the model's chat template in the same directory the
/// directory the tokenizer was loaded from. Both files come from /// tokenizer was loaded from, following HuggingFace `transformers`
/// the same HuggingFace snapshot in the hf-hub cache, so the /// precedence: a standalone `chat_template.jinja` (then
/// sibling path is reliable. /// `chat_template.json`) wins over the `chat_template` field in
/// `tokenizer_config.json`.
///
/// This matters for multimodal models: Qwen3-VL / Qwen3.6 ship their
/// vision-aware template (the one that emits
/// `<|vision_start|><|image_pad|><|vision_end|>` per image) **only** in
/// `chat_template.jinja`, and may not ship a `tokenizer_config.json` at
/// all. Reading `tokenizer_config.json` alone returned `None`, which
/// dropped image content into the text-only `format_qwen3_prompt`
/// fallback — so image requests rendered zero `<|image_pad|>` tokens
/// and the vision path bailed on the count mismatch.
pub fn load_chat_template_alongside(tokenizer_json_path: &Path) -> Option<String> { pub fn load_chat_template_alongside(tokenizer_json_path: &Path) -> Option<String> {
let parent = tokenizer_json_path.parent()?; let parent = tokenizer_json_path.parent()?;
// 1. Standalone Jinja file — raw template text, highest priority.
let jinja_path = parent.join("chat_template.jinja");
match std::fs::read_to_string(&jinja_path) {
Ok(text) if !text.trim().is_empty() => {
tracing::info!(
path = %jinja_path.display(),
"chat_template: loaded standalone chat_template.jinja"
);
return Some(text);
}
Ok(_) => {
tracing::warn!(
path = %jinja_path.display(),
"chat_template: chat_template.jinja present but empty; trying other sources"
);
}
Err(_) => {} // absent — fall through, common case
}
// 2. Standalone JSON file — `{"chat_template": "..."}` form.
let json_path = parent.join("chat_template.json");
if json_path.exists()
&& let Some(t) = load_chat_template_from(&json_path)
{
tracing::info!(
path = %json_path.display(),
"chat_template: loaded standalone chat_template.json"
);
return Some(t);
}
// 3. The `chat_template` field inside tokenizer_config.json.
let config_path = parent.join("tokenizer_config.json"); let config_path = parent.join("tokenizer_config.json");
load_chat_template_from(&config_path) load_chat_template_from(&config_path)
} }
@@ -210,6 +253,87 @@ mod tests {
use super::*; use super::*;
use serde_json::json; use serde_json::json;
/// Reproduces the Qwen3.6 vision template's image-insertion
/// condition against the OpenAI `image_url` content-part shape our
/// renderer forwards. Confirms minijinja's `'image_url' in item`
/// matches a serde_json object that carries that key — i.e. the
/// template *can* emit `<|image_pad|>` for our parts.
#[test]
fn image_url_part_renders_image_pad() {
// Condition copied from doc/vision-qwen3_6-spec.md (lines 8-18
// of the real chat_template.jinja).
let template = "{%- for message in messages -%}\
{%- if message.content is string -%}\
{{ message.content }}\
{%- else -%}\
{%- for item in message.content -%}\
{%- if 'image' in item or 'image_url' in item or item.type == 'image' -%}\
<|vision_start|><|image_pad|><|vision_end|>\
{%- elif item.type == 'text' -%}\
{{ item.text }}\
{%- endif -%}\
{%- endfor -%}\
{%- endif -%}\
{%- endfor -%}";
let messages = vec![ChatMessage {
role: "user".into(),
content: MessageContent::Parts(vec![
json!({"type": "text", "text": "what is this?"}),
json!({"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA="}}),
]),
extra: Value::Object(Default::default()),
}];
let out = render_chat_template(template, &messages, &Value::Null, &Value::Null)
.expect("render should succeed");
assert!(
out.contains("<|image_pad|>"),
"expected the image_url part to emit <|image_pad|>; rendered: {out:?}"
);
}
/// `chat_template.jinja` must win over `tokenizer_config.json`'s
/// `chat_template` field — the transformers precedence Qwen3.6
/// relies on (its vision template ships only in the `.jinja` file).
#[test]
fn standalone_jinja_template_takes_precedence() {
let dir = std::env::temp_dir().join(format!(
"neuron_ct_precedence_{}_{}",
std::process::id(),
line!()
));
std::fs::create_dir_all(&dir).unwrap();
std::fs::write(dir.join("chat_template.jinja"), "FROM_JINJA").unwrap();
std::fs::write(
dir.join("tokenizer_config.json"),
r#"{"chat_template": "FROM_CONFIG"}"#,
)
.unwrap();
// tokenizer_json_path is the sibling the loader takes a parent of.
let got = load_chat_template_alongside(&dir.join("tokenizer.json"));
std::fs::remove_dir_all(&dir).ok();
assert_eq!(got.as_deref(), Some("FROM_JINJA"));
}
/// With no standalone file, fall back to the tokenizer_config.json
/// field — the text-only path stays unchanged.
#[test]
fn falls_back_to_tokenizer_config_when_no_standalone() {
let dir = std::env::temp_dir().join(format!(
"neuron_ct_fallback_{}_{}",
std::process::id(),
line!()
));
std::fs::create_dir_all(&dir).unwrap();
std::fs::write(
dir.join("tokenizer_config.json"),
r#"{"chat_template": "FROM_CONFIG"}"#,
)
.unwrap();
let got = load_chat_template_alongside(&dir.join("tokenizer.json"));
std::fs::remove_dir_all(&dir).ok();
assert_eq!(got.as_deref(), Some("FROM_CONFIG"));
}
fn user_msg(text: &str) -> ChatMessage { fn user_msg(text: &str) -> ChatMessage {
ChatMessage { ChatMessage {
role: "user".into(), role: "user".into(),