From 602e8e1471152e6a9703829dbeec0115cf6dbc9a Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Tue, 19 May 2026 13:16:39 +0300 Subject: [PATCH] fix(neuron/candle): source tokenizer.json from base repo when GGUF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GGUF-only HF repos (unsloth/Qwen3-*-GGUF, Qwen/Qwen3-*-GGUF) ship the .gguf file but not tokenizer.json — the tokenizer data is embedded in the GGUF metadata itself, and the standalone tokenizer.json lives in the base non-GGUF repo (unsloth/Qwen3-0.6B, Qwen/Qwen3-0.6B, etc.). Live validation against quadbrat hit: HTTP 400 fetch tokenizer.json from unsloth/Qwen3-0.6B-GGUF: HTTP status client error (404 Not Found) resolve_files now derives the tokenizer repo by stripping a `-GGUF` or `-gguf` suffix from the model_id; non-GGUF ids fall through to fetching from the same repo. The error message includes the attempted tokenizer repo id so the next failure (e.g. base repo doesn't exist) is unambiguous. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/neuron/src/harness/candle.rs | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/crates/neuron/src/harness/candle.rs b/crates/neuron/src/harness/candle.rs index 6482f4e..b945fbf 100644 --- a/crates/neuron/src/harness/candle.rs +++ b/crates/neuron/src/harness/candle.rs @@ -121,10 +121,33 @@ impl CandleHarness { .get(&gguf_filename) .await .with_context(|| format!("fetch GGUF {gguf_filename}"))?; - let tokenizer_path = repo + + // GGUF-only HF repos (unsloth/Qwen3-*-GGUF, Qwen/Qwen3-*-GGUF, + // etc.) ship the .gguf file but not tokenizer.json — the + // tokenizer.json lives in the base non-GGUF repo. Derive the + // base repo id by stripping a `-GGUF` / `-gguf` suffix; if + // there's no such suffix the same repo is used (works for + // non-GGUF model_ids). + let tokenizer_repo_id = spec + .model_id + .strip_suffix("-GGUF") + .or_else(|| spec.model_id.strip_suffix("-gguf")) + .unwrap_or(spec.model_id.as_str()) + .to_string(); + let tokenizer_repo = if tokenizer_repo_id == spec.model_id { + repo + } else { + tracing::debug!( + from = %spec.model_id, + to = %tokenizer_repo_id, + "tokenizer.json sourced from base repo (GGUF suffix stripped)" + ); + api.model(tokenizer_repo_id.clone()) + }; + let tokenizer_path = tokenizer_repo .get("tokenizer.json") .await - .context("fetch tokenizer.json")?; + .with_context(|| format!("fetch tokenizer.json from {tokenizer_repo_id}"))?; Ok((gguf_path, tokenizer_path)) }