fix(neuron): load chat_template.jinja (transformers precedence)

The chat-template loader only read the `chat_template` field from tokenizer_config.json. Qwen3.6-27B ships its vision-aware template *only* in a standalone `chat_template.jinja` (and has no tokenizer_config.json at all), so the loader returned None and image requests fell back to the text-only format_qwen3_prompt — rendering zero `<|image_pad|>` tokens and tripping "expand_image_pad_tokens: prompt has 0 image_token_id occurrences". load_chat_template_alongside now follows HF transformers precedence: standalone chat_template.jinja → chat_template.json → the chat_template field in tokenizer_config.json. Tests cover the precedence, the text-only fallback, and that an OpenAI image_url content part renders `<|image_pad|>` through the real template condition (`'image_url' in item`). Refs #16 / TP-vision. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 16:25:30 +03:00
parent 1b0e36c119
commit 7ad56c6a86
1 changed files with 128 additions and 4 deletions
--- a/crates/neuron/src/harness/chat_template.rs
+++ b/crates/neuron/src/harness/chat_template.rs
@@ -65,12 +65,55 @@ pub fn chat_templates_enabled() -> bool {
    }
 }
-/// Convenience: probe for `tokenizer_config.json` in the same
+/// Probe for the model's chat template in the same directory the
-/// directory the tokenizer was loaded from. Both files come from
+/// tokenizer was loaded from, following HuggingFace `transformers`
-/// the same HuggingFace snapshot in the hf-hub cache, so the
+/// precedence: a standalone `chat_template.jinja` (then
-/// sibling path is reliable.
+/// `chat_template.json`) wins over the `chat_template` field in
 /// `tokenizer_config.json`.
 ///
 /// This matters for multimodal models: Qwen3-VL / Qwen3.6 ship their
 /// vision-aware template (the one that emits
 /// `<|vision_start|><|image_pad|><|vision_end|>` per image) **only** in
 /// `chat_template.jinja`, and may not ship a `tokenizer_config.json` at
 /// all. Reading `tokenizer_config.json` alone returned `None`, which
 /// dropped image content into the text-only `format_qwen3_prompt`
 /// fallback — so image requests rendered zero `<|image_pad|>` tokens
 /// and the vision path bailed on the count mismatch.
 pub fn load_chat_template_alongside(tokenizer_json_path: &Path) -> Option<String> {
    let parent = tokenizer_json_path.parent()?;
    // 1. Standalone Jinja file — raw template text, highest priority.
    let jinja_path = parent.join("chat_template.jinja");
    match std::fs::read_to_string(&jinja_path) {
        Ok(text) if !text.trim().is_empty() => {
            tracing::info!(
                path = %jinja_path.display(),
                "chat_template: loaded standalone chat_template.jinja"
            );
            return Some(text);
        }
        Ok(_) => {
            tracing::warn!(
                path = %jinja_path.display(),
                "chat_template: chat_template.jinja present but empty; trying other sources"
            );
        }
        Err(_) => {} // absent — fall through, common case
    }
    // 2. Standalone JSON file — `{"chat_template": "..."}` form.
    let json_path = parent.join("chat_template.json");
    if json_path.exists()
        && let Some(t) = load_chat_template_from(&json_path)
    {
        tracing::info!(
            path = %json_path.display(),
            "chat_template: loaded standalone chat_template.json"
        );
        return Some(t);
    }
    // 3. The `chat_template` field inside tokenizer_config.json.
    let config_path = parent.join("tokenizer_config.json");
    load_chat_template_from(&config_path)
 }
@@ -210,6 +253,87 @@ mod tests {
    use super::*;
    use serde_json::json;
    /// Reproduces the Qwen3.6 vision template's image-insertion
    /// condition against the OpenAI `image_url` content-part shape our
    /// renderer forwards. Confirms minijinja's `'image_url' in item`
    /// matches a serde_json object that carries that key — i.e. the
    /// template *can* emit `<|image_pad|>` for our parts.
    #[test]
    fn image_url_part_renders_image_pad() {
        // Condition copied from doc/vision-qwen3_6-spec.md (lines 8-18
        // of the real chat_template.jinja).
        let template = "{%- for message in messages -%}\
 {%- if message.content is string -%}\
 {{ message.content }}\
 {%- else -%}\
 {%- for item in message.content -%}\
 {%- if 'image' in item or 'image_url' in item or item.type == 'image' -%}\
 <|vision_start|><|image_pad|><|vision_end|>\
 {%- elif item.type == 'text' -%}\
 {{ item.text }}\
 {%- endif -%}\
 {%- endfor -%}\
 {%- endif -%}\
 {%- endfor -%}";
        let messages = vec![ChatMessage {
            role: "user".into(),
            content: MessageContent::Parts(vec![
                json!({"type": "text", "text": "what is this?"}),
                json!({"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA="}}),
            ]),
            extra: Value::Object(Default::default()),
        }];
        let out = render_chat_template(template, &messages, &Value::Null, &Value::Null)
            .expect("render should succeed");
        assert!(
            out.contains("<|image_pad|>"),
            "expected the image_url part to emit <|image_pad|>; rendered: {out:?}"
        );
    }
    /// `chat_template.jinja` must win over `tokenizer_config.json`'s
    /// `chat_template` field — the transformers precedence Qwen3.6
    /// relies on (its vision template ships only in the `.jinja` file).
    #[test]
    fn standalone_jinja_template_takes_precedence() {
        let dir = std::env::temp_dir().join(format!(
            "neuron_ct_precedence_{}_{}",
            std::process::id(),
            line!()
        ));
        std::fs::create_dir_all(&dir).unwrap();
        std::fs::write(dir.join("chat_template.jinja"), "FROM_JINJA").unwrap();
        std::fs::write(
            dir.join("tokenizer_config.json"),
            r#"{"chat_template": "FROM_CONFIG"}"#,
        )
        .unwrap();
        // tokenizer_json_path is the sibling the loader takes a parent of.
        let got = load_chat_template_alongside(&dir.join("tokenizer.json"));
        std::fs::remove_dir_all(&dir).ok();
        assert_eq!(got.as_deref(), Some("FROM_JINJA"));
    }
    /// With no standalone file, fall back to the tokenizer_config.json
    /// field — the text-only path stays unchanged.
    #[test]
    fn falls_back_to_tokenizer_config_when_no_standalone() {
        let dir = std::env::temp_dir().join(format!(
            "neuron_ct_fallback_{}_{}",
            std::process::id(),
            line!()
        ));
        std::fs::create_dir_all(&dir).unwrap();
        std::fs::write(
            dir.join("tokenizer_config.json"),
            r#"{"chat_template": "FROM_CONFIG"}"#,
        )
        .unwrap();
        let got = load_chat_template_alongside(&dir.join("tokenizer.json"));
        std::fs::remove_dir_all(&dir).ok();
        assert_eq!(got.as_deref(), Some("FROM_CONFIG"));
    }
    fn user_msg(text: &str) -> ChatMessage {
        ChatMessage {
            role: "user".into(),