fix(neuron/candle): dense Qwen3 returns rank-3 logits, double-squeeze
All checks were successful
build-prerelease / Resolve version stamps (push) Successful in 33s
CI / Format (push) Successful in 38s
CI / Clippy (push) Successful in 2m19s
build-prerelease / Build neuron-blackwell (push) Successful in 3m32s
CI / Test (push) Successful in 4m34s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m16s
build-prerelease / Package cortex RPM (push) Successful in 1m18s
build-prerelease / Build neuron-ampere (push) Successful in 4m55s
build-prerelease / Build neuron-ada (push) Successful in 5m11s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 2m50s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 2m52s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m35s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m0s
All checks were successful
build-prerelease / Resolve version stamps (push) Successful in 33s
CI / Format (push) Successful in 38s
CI / Clippy (push) Successful in 2m19s
build-prerelease / Build neuron-blackwell (push) Successful in 3m32s
CI / Test (push) Successful in 4m34s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m16s
build-prerelease / Package cortex RPM (push) Successful in 1m18s
build-prerelease / Build neuron-ampere (push) Successful in 4m55s
build-prerelease / Build neuron-ada (push) Successful in 5m11s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 2m50s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 2m52s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m35s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m0s
Caught by live validation against Qwen/Qwen3-1.7B on beast:
HTTP 500 "unexpected rank, expected: 1, got: 2 ([1, 151936])"
Candle's qwen3::ModelForCausalLM::forward returns shape [B, 1, V]
(no final squeeze) while quantized_qwen3::ModelWeights::forward
returns [B, V] (with squeeze(1) at the end). My match arms applied
a single squeeze(0) uniformly, which is correct for the quantized
[1, V] → [V] but leaves the dense at [1, V] → which then trips
apply_repeat_penalty::to_vec1() expecting rank 1.
Dense match arms now strip both batch and seq dims:
model.forward(&input, offset)?.squeeze(0)?.squeeze(0)?
Also fixes validate-neuron.sh's `${3:-Q4_K_M}` → `${3-Q4_K_M}`
(no colon) so passing an explicit empty third arg now drives the
dense path instead of falling back to Q4_K_M.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -714,8 +714,11 @@ fn run_inference(
|
||||
ModelArch::Qwen3Dense(model) => {
|
||||
model.clear_kv_cache();
|
||||
let input = Tensor::new(prompt_tokens, device)?.unsqueeze(0)?;
|
||||
let logits = model.forward(&input, 0)?;
|
||||
let logits = logits.squeeze(0)?;
|
||||
// qwen3::ModelForCausalLM::forward returns [B, 1, V] —
|
||||
// no final squeeze on the dense path, unlike the quantized
|
||||
// variant which returns [B, V]. Strip both batch and seq
|
||||
// dims to get the rank-1 logits LogitsProcessor expects.
|
||||
let logits = model.forward(&input, 0)?.squeeze(0)?.squeeze(0)?;
|
||||
sample_with_penalty(&logits, &generated, &mut logits_processor)?
|
||||
}
|
||||
};
|
||||
@@ -735,8 +738,11 @@ fn run_inference(
|
||||
}
|
||||
ModelArch::Qwen3Dense(model) => {
|
||||
let input = Tensor::new(&[next_token], device)?.unsqueeze(0)?;
|
||||
let logits = model.forward(&input, prompt_tokens.len() + index)?;
|
||||
let logits = logits.squeeze(0)?;
|
||||
// Dense returns [B, 1, V]; strip both leading dims.
|
||||
let logits = model
|
||||
.forward(&input, prompt_tokens.len() + index)?
|
||||
.squeeze(0)?
|
||||
.squeeze(0)?;
|
||||
sample_with_penalty(&logits, &generated, &mut logits_processor)?
|
||||
}
|
||||
};
|
||||
@@ -852,8 +858,11 @@ fn run_inference_streaming(
|
||||
}
|
||||
ModelArch::Qwen3Dense(model) => {
|
||||
let input = Tensor::new(&[next_token], device)?.unsqueeze(0)?;
|
||||
let logits = model.forward(&input, prompt_tokens.len() + index)?;
|
||||
let logits = logits.squeeze(0)?;
|
||||
// Dense returns [B, 1, V]; strip both leading dims.
|
||||
let logits = model
|
||||
.forward(&input, prompt_tokens.len() + index)?
|
||||
.squeeze(0)?
|
||||
.squeeze(0)?;
|
||||
sample_with_penalty(&logits, &all_tokens, &mut logits_processor)?
|
||||
}
|
||||
};
|
||||
|
||||
@@ -22,7 +22,9 @@ set -euo pipefail
|
||||
|
||||
HOST="${1:-beast.hanzalova.internal}"
|
||||
MODEL_ID="${2:-unsloth/Qwen3-0.6B-GGUF}"
|
||||
QUANT="${3:-Q4_K_M}"
|
||||
# `${3-Q4_K_M}` (no colon) only uses the default when the arg is
|
||||
# UNSET — passing an explicit empty string drives the dense path.
|
||||
QUANT="${3-Q4_K_M}"
|
||||
PORT="${NEURON_PORT:-13131}"
|
||||
BASE="http://${HOST}:${PORT}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user