feat(neuron): operator pixel-budget env override + doc cleanup (#14 C5)
Some checks failed
CI / CUDA type-check (push) Successful in 32s
build-prerelease / Resolve version stamps (push) Successful in 38s
CI / Format (push) Successful in 45s
CI / Test (push) Failing after 58s
CI / Clippy (push) Successful in 2m41s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m14s
build-prerelease / Package cortex RPM (push) Successful in 1m23s
build-prerelease / Build neuron-blackwell (push) Successful in 6m20s
build-prerelease / Build neuron-ampere (push) Successful in 7m18s
build-prerelease / Build neuron-ada (push) Successful in 5m10s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 3m6s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 3m7s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m45s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m5s
Some checks failed
CI / CUDA type-check (push) Successful in 32s
build-prerelease / Resolve version stamps (push) Successful in 38s
CI / Format (push) Successful in 45s
CI / Test (push) Failing after 58s
CI / Clippy (push) Successful in 2m41s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m14s
build-prerelease / Package cortex RPM (push) Successful in 1m23s
build-prerelease / Build neuron-blackwell (push) Successful in 6m20s
build-prerelease / Build neuron-ampere (push) Successful in 7m18s
build-prerelease / Build neuron-ada (push) Successful in 5m10s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 3m6s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 3m7s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m45s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m5s
- PreprocessProfile::qwen3_6() reads NEURON_VISION_MIN_PIXELS / NEURON_VISION_MAX_PIXELS (clamped to factor² ≤ min ≤ max), matching the NEURON_VISION_LEGACY_* / NEURON_MROPE knob convention. Defaults remain 256²…1024² (64…1024 LM tokens/image). - Test: a max-resolution source caps within the token budget (can't blow NEURON_MAX_PROMPT_TOKENS). - Strip stale fixed-resolution / "MRoPE gap (#15)" / 14×14 language from the preprocess, mod, and rope doc-comments now that resolution is dynamic and M-RoPE is implemented. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -422,15 +422,10 @@ impl Qwen3_5Model {
|
|||||||
///
|
///
|
||||||
/// The splice replaces the LM's text-side embedding at each
|
/// The splice replaces the LM's text-side embedding at each
|
||||||
/// `image_token_id` position with the corresponding row from
|
/// `image_token_id` position with the corresponding row from
|
||||||
/// `image_embeds`. After the splice the decoder runs unchanged.
|
/// `image_embeds`. After the splice the decoder runs the interleaved
|
||||||
///
|
/// M-RoPE path: `grids` carries each image's post-merge LM grid
|
||||||
/// **MRoPE gap.** Qwen3.6's `rope_parameters` declares MRoPE
|
/// `(lm_gh, lm_gw)` so `get_rope_index` assigns image tokens their 2D
|
||||||
/// (interleaved text/height/width axes); Stage B applies plain
|
/// coordinates (dynamic resolution, #14).
|
||||||
/// text-position RoPE to image tokens. The model still attends
|
|
||||||
/// to image content but loses spatial structure that MRoPE-aware
|
|
||||||
/// position encoding would preserve. Tracked under issue #15
|
|
||||||
/// (numerical validation) — quality benchmark from Stage D should
|
|
||||||
/// surface the impact, and the fix lives in `rope::RotaryEmbedding`.
|
|
||||||
pub fn forward_with_vision(
|
pub fn forward_with_vision(
|
||||||
&mut self,
|
&mut self,
|
||||||
input_ids: &Tensor,
|
input_ids: &Tensor,
|
||||||
@@ -461,7 +456,7 @@ impl Qwen3_5Model {
|
|||||||
|
|
||||||
// Vision path: splice image embeddings at `image_token_id`
|
// Vision path: splice image embeddings at `image_token_id`
|
||||||
// positions and build interleaved M-RoPE cos/sin so image tokens
|
// positions and build interleaved M-RoPE cos/sin so image tokens
|
||||||
// carry their 14×14 grid coordinates. Text / decode skip the
|
// carry their 2D (lm_gh × lm_gw) grid coordinates. Text / decode skip the
|
||||||
// device→host id copy entirely and take the plain-RoPE fast path
|
// device→host id copy entirely and take the plain-RoPE fast path
|
||||||
// — bit-for-bit the pre-M-RoPE behaviour when `rope_delta == 0`.
|
// — bit-for-bit the pre-M-RoPE behaviour when `rope_delta == 0`.
|
||||||
let (cos, sin) = if let (Some(img), Some(tok_id)) = (image_embeds, image_token_id) {
|
let (cos, sin) = if let (Some(img), Some(tok_id)) = (image_embeds, image_token_id) {
|
||||||
|
|||||||
@@ -55,18 +55,36 @@ pub struct PreprocessProfile {
|
|||||||
pub image_std: [f32; 3],
|
pub image_std: [f32; 3],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Default pixel budget for Qwen3.6 (`256² … 1024²` → 64 … 1024 LM
|
||||||
|
/// tokens/image). Generous for documents/OCR, bounded for serving on
|
||||||
|
/// 2×RTX5090. Operators tune with `NEURON_VISION_MIN_PIXELS` /
|
||||||
|
/// `NEURON_VISION_MAX_PIXELS` (matching the other `NEURON_VISION_*` knobs).
|
||||||
|
const QWEN3_6_MIN_PIXELS: u32 = 65_536;
|
||||||
|
const QWEN3_6_MAX_PIXELS: u32 = 1_048_576;
|
||||||
|
|
||||||
|
fn env_pixels(name: &str, default: u32) -> u32 {
|
||||||
|
std::env::var(name)
|
||||||
|
.ok()
|
||||||
|
.and_then(|v| v.trim().parse::<u32>().ok())
|
||||||
|
.unwrap_or(default)
|
||||||
|
}
|
||||||
|
|
||||||
impl PreprocessProfile {
|
impl PreprocessProfile {
|
||||||
/// Profile for Qwen3.6. Native-aspect `smart_resize` (factor 32),
|
/// Profile for Qwen3.6. Native-aspect `smart_resize` (factor 32),
|
||||||
/// normalise to `[-1, 1]` via mean=std=0.5. Pixel budget defaults:
|
/// normalise to `[-1, 1]` via mean=std=0.5. Pixel budget defaults to
|
||||||
/// `min = 256² = 65536` (→ 8×8 = 64 LM tokens) and
|
/// [`QWEN3_6_MIN_PIXELS`]…[`QWEN3_6_MAX_PIXELS`], overridable via the
|
||||||
/// `max = 1024² = 1048576` (→ 32×32 = 1024 LM tokens) — generous for
|
/// `NEURON_VISION_MIN_PIXELS` / `NEURON_VISION_MAX_PIXELS` env vars.
|
||||||
/// documents/OCR, bounded for serving on 2×RTX5090. (Operator
|
/// The budget is clamped sane: `min ≥ factor²` (at least one LM token)
|
||||||
/// override lands with the `[harness.candle.vision]` config in #14 C5.)
|
/// and `max ≥ min`.
|
||||||
pub fn qwen3_6() -> Self {
|
pub fn qwen3_6() -> Self {
|
||||||
|
let factor = 32u32;
|
||||||
|
let f2 = factor * factor;
|
||||||
|
let min_pixels = env_pixels("NEURON_VISION_MIN_PIXELS", QWEN3_6_MIN_PIXELS).max(f2);
|
||||||
|
let max_pixels = env_pixels("NEURON_VISION_MAX_PIXELS", QWEN3_6_MAX_PIXELS).max(min_pixels);
|
||||||
Self {
|
Self {
|
||||||
factor: 32,
|
factor,
|
||||||
min_pixels: 65_536,
|
min_pixels,
|
||||||
max_pixels: 1_048_576,
|
max_pixels,
|
||||||
image_mean: [0.5, 0.5, 0.5],
|
image_mean: [0.5, 0.5, 0.5],
|
||||||
image_std: [0.5, 0.5, 0.5],
|
image_std: [0.5, 0.5, 0.5],
|
||||||
}
|
}
|
||||||
@@ -369,4 +387,18 @@ mod tests {
|
|||||||
let err = smart_resize(1, 500, 32, 65_536, 1_048_576).unwrap_err();
|
let err = smart_resize(1, 500, 32, 65_536, 1_048_576).unwrap_err();
|
||||||
assert!(format!("{err:#}").contains("200:1"));
|
assert!(format!("{err:#}").contains("200:1"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn qwen3_6_default_budget_bounds_lm_tokens() {
|
||||||
|
// A huge source image caps at max_pixels → the per-image LM token
|
||||||
|
// count stays within budget (so it can't blow NEURON_MAX_PROMPT_TOKENS).
|
||||||
|
let p = PreprocessProfile::qwen3_6();
|
||||||
|
let (h, w) = p.resized_dims(8000, 6000).unwrap();
|
||||||
|
let lm_tokens = (h / p.factor) * (w / p.factor);
|
||||||
|
let budget = p.max_pixels / (p.factor * p.factor);
|
||||||
|
assert!(
|
||||||
|
lm_tokens <= budget,
|
||||||
|
"max-res image LM tokens {lm_tokens} must stay within budget {budget}"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user