feat(neuron): operator pixel-budget env override + doc cleanup (#14 C5)
Some checks failed
CI / CUDA type-check (push) Successful in 32s
build-prerelease / Resolve version stamps (push) Successful in 38s
CI / Format (push) Successful in 45s
CI / Test (push) Failing after 58s
CI / Clippy (push) Successful in 2m41s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m14s
build-prerelease / Package cortex RPM (push) Successful in 1m23s
build-prerelease / Build neuron-blackwell (push) Successful in 6m20s
build-prerelease / Build neuron-ampere (push) Successful in 7m18s
build-prerelease / Build neuron-ada (push) Successful in 5m10s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 3m6s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 3m7s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m45s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m5s

- PreprocessProfile::qwen3_6() reads NEURON_VISION_MIN_PIXELS /
  NEURON_VISION_MAX_PIXELS (clamped to factor² ≤ min ≤ max), matching the
  NEURON_VISION_LEGACY_* / NEURON_MROPE knob convention. Defaults remain
  256²…1024² (64…1024 LM tokens/image).
- Test: a max-resolution source caps within the token budget (can't blow
  NEURON_MAX_PROMPT_TOKENS).
- Strip stale fixed-resolution / "MRoPE gap (#15)" / 14×14 language from
  the preprocess, mod, and rope doc-comments now that resolution is
  dynamic and M-RoPE is implemented.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-04 22:50:03 +03:00
parent c97a8654f5
commit d311c8ca7a
2 changed files with 45 additions and 18 deletions

View File

@@ -422,15 +422,10 @@ impl Qwen3_5Model {
/// ///
/// The splice replaces the LM's text-side embedding at each /// The splice replaces the LM's text-side embedding at each
/// `image_token_id` position with the corresponding row from /// `image_token_id` position with the corresponding row from
/// `image_embeds`. After the splice the decoder runs unchanged. /// `image_embeds`. After the splice the decoder runs the interleaved
/// /// M-RoPE path: `grids` carries each image's post-merge LM grid
/// **MRoPE gap.** Qwen3.6's `rope_parameters` declares MRoPE /// `(lm_gh, lm_gw)` so `get_rope_index` assigns image tokens their 2D
/// (interleaved text/height/width axes); Stage B applies plain /// coordinates (dynamic resolution, #14).
/// text-position RoPE to image tokens. The model still attends
/// to image content but loses spatial structure that MRoPE-aware
/// position encoding would preserve. Tracked under issue #15
/// (numerical validation) — quality benchmark from Stage D should
/// surface the impact, and the fix lives in `rope::RotaryEmbedding`.
pub fn forward_with_vision( pub fn forward_with_vision(
&mut self, &mut self,
input_ids: &Tensor, input_ids: &Tensor,
@@ -461,7 +456,7 @@ impl Qwen3_5Model {
// Vision path: splice image embeddings at `image_token_id` // Vision path: splice image embeddings at `image_token_id`
// positions and build interleaved M-RoPE cos/sin so image tokens // positions and build interleaved M-RoPE cos/sin so image tokens
// carry their 14×14 grid coordinates. Text / decode skip the // carry their 2D (lm_gh × lm_gw) grid coordinates. Text / decode skip the
// device→host id copy entirely and take the plain-RoPE fast path // device→host id copy entirely and take the plain-RoPE fast path
// — bit-for-bit the pre-M-RoPE behaviour when `rope_delta == 0`. // — bit-for-bit the pre-M-RoPE behaviour when `rope_delta == 0`.
let (cos, sin) = if let (Some(img), Some(tok_id)) = (image_embeds, image_token_id) { let (cos, sin) = if let (Some(img), Some(tok_id)) = (image_embeds, image_token_id) {

View File

@@ -55,18 +55,36 @@ pub struct PreprocessProfile {
pub image_std: [f32; 3], pub image_std: [f32; 3],
} }
/// Default pixel budget for Qwen3.6 (`256² … 1024²` → 64 … 1024 LM
/// tokens/image). Generous for documents/OCR, bounded for serving on
/// 2×RTX5090. Operators tune with `NEURON_VISION_MIN_PIXELS` /
/// `NEURON_VISION_MAX_PIXELS` (matching the other `NEURON_VISION_*` knobs).
const QWEN3_6_MIN_PIXELS: u32 = 65_536;
const QWEN3_6_MAX_PIXELS: u32 = 1_048_576;
fn env_pixels(name: &str, default: u32) -> u32 {
std::env::var(name)
.ok()
.and_then(|v| v.trim().parse::<u32>().ok())
.unwrap_or(default)
}
impl PreprocessProfile { impl PreprocessProfile {
/// Profile for Qwen3.6. Native-aspect `smart_resize` (factor 32), /// Profile for Qwen3.6. Native-aspect `smart_resize` (factor 32),
/// normalise to `[-1, 1]` via mean=std=0.5. Pixel budget defaults: /// normalise to `[-1, 1]` via mean=std=0.5. Pixel budget defaults to
/// `min = 256² = 65536` (→ 8×8 = 64 LM tokens) and /// [`QWEN3_6_MIN_PIXELS`]…[`QWEN3_6_MAX_PIXELS`], overridable via the
/// `max = 1024² = 1048576` (→ 32×32 = 1024 LM tokens) — generous for /// `NEURON_VISION_MIN_PIXELS` / `NEURON_VISION_MAX_PIXELS` env vars.
/// documents/OCR, bounded for serving on 2×RTX5090. (Operator /// The budget is clamped sane: `min ≥ factor²` (at least one LM token)
/// override lands with the `[harness.candle.vision]` config in #14 C5.) /// and `max ≥ min`.
pub fn qwen3_6() -> Self { pub fn qwen3_6() -> Self {
let factor = 32u32;
let f2 = factor * factor;
let min_pixels = env_pixels("NEURON_VISION_MIN_PIXELS", QWEN3_6_MIN_PIXELS).max(f2);
let max_pixels = env_pixels("NEURON_VISION_MAX_PIXELS", QWEN3_6_MAX_PIXELS).max(min_pixels);
Self { Self {
factor: 32, factor,
min_pixels: 65_536, min_pixels,
max_pixels: 1_048_576, max_pixels,
image_mean: [0.5, 0.5, 0.5], image_mean: [0.5, 0.5, 0.5],
image_std: [0.5, 0.5, 0.5], image_std: [0.5, 0.5, 0.5],
} }
@@ -369,4 +387,18 @@ mod tests {
let err = smart_resize(1, 500, 32, 65_536, 1_048_576).unwrap_err(); let err = smart_resize(1, 500, 32, 65_536, 1_048_576).unwrap_err();
assert!(format!("{err:#}").contains("200:1")); assert!(format!("{err:#}").contains("200:1"));
} }
#[test]
fn qwen3_6_default_budget_bounds_lm_tokens() {
// A huge source image caps at max_pixels → the per-image LM token
// count stays within budget (so it can't blow NEURON_MAX_PROMPT_TOKENS).
let p = PreprocessProfile::qwen3_6();
let (h, w) = p.resized_dims(8000, 6000).unwrap();
let lm_tokens = (h / p.factor) * (w / p.factor);
let budget = p.max_pixels / (p.factor * p.factor);
assert!(
lm_tokens <= budget,
"max-res image LM tokens {lm_tokens} must stay within budget {budget}"
);
}
} }