feat(neuron): operator pixel-budget env override + doc cleanup (#14 C5)
Some checks failed
CI / CUDA type-check (push) Successful in 32s
build-prerelease / Resolve version stamps (push) Successful in 38s
CI / Format (push) Successful in 45s
CI / Test (push) Failing after 58s
CI / Clippy (push) Successful in 2m41s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m14s
build-prerelease / Package cortex RPM (push) Successful in 1m23s
build-prerelease / Build neuron-blackwell (push) Successful in 6m20s
build-prerelease / Build neuron-ampere (push) Successful in 7m18s
build-prerelease / Build neuron-ada (push) Successful in 5m10s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 3m6s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 3m7s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m45s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m5s
Some checks failed
CI / CUDA type-check (push) Successful in 32s
build-prerelease / Resolve version stamps (push) Successful in 38s
CI / Format (push) Successful in 45s
CI / Test (push) Failing after 58s
CI / Clippy (push) Successful in 2m41s
CI / Build cortex SRPM (push) Has been skipped
CI / Build neuron SRPM (push) Has been skipped
CI / Publish cortex to COPR (push) Has been skipped
CI / Publish neuron to COPR (push) Has been skipped
CI / Bump version in source (push) Has been skipped
build-prerelease / Build cortex binary (push) Successful in 4m14s
build-prerelease / Package cortex RPM (push) Successful in 1m23s
build-prerelease / Build neuron-blackwell (push) Successful in 6m20s
build-prerelease / Build neuron-ampere (push) Successful in 7m18s
build-prerelease / Build neuron-ada (push) Successful in 5m10s
build-prerelease / Package helexa-neuron-ada RPM (push) Successful in 3m6s
build-prerelease / Package helexa-neuron-ampere RPM (push) Successful in 3m7s
build-prerelease / Package helexa-neuron-blackwell RPM (push) Successful in 3m45s
build-prerelease / Publish to rpm.lair.cafe (unstable) (push) Successful in 1m5s
- PreprocessProfile::qwen3_6() reads NEURON_VISION_MIN_PIXELS / NEURON_VISION_MAX_PIXELS (clamped to factor² ≤ min ≤ max), matching the NEURON_VISION_LEGACY_* / NEURON_MROPE knob convention. Defaults remain 256²…1024² (64…1024 LM tokens/image). - Test: a max-resolution source caps within the token budget (can't blow NEURON_MAX_PROMPT_TOKENS). - Strip stale fixed-resolution / "MRoPE gap (#15)" / 14×14 language from the preprocess, mod, and rope doc-comments now that resolution is dynamic and M-RoPE is implemented. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -422,15 +422,10 @@ impl Qwen3_5Model {
|
||||
///
|
||||
/// The splice replaces the LM's text-side embedding at each
|
||||
/// `image_token_id` position with the corresponding row from
|
||||
/// `image_embeds`. After the splice the decoder runs unchanged.
|
||||
///
|
||||
/// **MRoPE gap.** Qwen3.6's `rope_parameters` declares MRoPE
|
||||
/// (interleaved text/height/width axes); Stage B applies plain
|
||||
/// text-position RoPE to image tokens. The model still attends
|
||||
/// to image content but loses spatial structure that MRoPE-aware
|
||||
/// position encoding would preserve. Tracked under issue #15
|
||||
/// (numerical validation) — quality benchmark from Stage D should
|
||||
/// surface the impact, and the fix lives in `rope::RotaryEmbedding`.
|
||||
/// `image_embeds`. After the splice the decoder runs the interleaved
|
||||
/// M-RoPE path: `grids` carries each image's post-merge LM grid
|
||||
/// `(lm_gh, lm_gw)` so `get_rope_index` assigns image tokens their 2D
|
||||
/// coordinates (dynamic resolution, #14).
|
||||
pub fn forward_with_vision(
|
||||
&mut self,
|
||||
input_ids: &Tensor,
|
||||
@@ -461,7 +456,7 @@ impl Qwen3_5Model {
|
||||
|
||||
// Vision path: splice image embeddings at `image_token_id`
|
||||
// positions and build interleaved M-RoPE cos/sin so image tokens
|
||||
// carry their 14×14 grid coordinates. Text / decode skip the
|
||||
// carry their 2D (lm_gh × lm_gw) grid coordinates. Text / decode skip the
|
||||
// device→host id copy entirely and take the plain-RoPE fast path
|
||||
// — bit-for-bit the pre-M-RoPE behaviour when `rope_delta == 0`.
|
||||
let (cos, sin) = if let (Some(img), Some(tok_id)) = (image_embeds, image_token_id) {
|
||||
|
||||
@@ -55,18 +55,36 @@ pub struct PreprocessProfile {
|
||||
pub image_std: [f32; 3],
|
||||
}
|
||||
|
||||
/// Default pixel budget for Qwen3.6 (`256² … 1024²` → 64 … 1024 LM
|
||||
/// tokens/image). Generous for documents/OCR, bounded for serving on
|
||||
/// 2×RTX5090. Operators tune with `NEURON_VISION_MIN_PIXELS` /
|
||||
/// `NEURON_VISION_MAX_PIXELS` (matching the other `NEURON_VISION_*` knobs).
|
||||
const QWEN3_6_MIN_PIXELS: u32 = 65_536;
|
||||
const QWEN3_6_MAX_PIXELS: u32 = 1_048_576;
|
||||
|
||||
fn env_pixels(name: &str, default: u32) -> u32 {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.trim().parse::<u32>().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
impl PreprocessProfile {
|
||||
/// Profile for Qwen3.6. Native-aspect `smart_resize` (factor 32),
|
||||
/// normalise to `[-1, 1]` via mean=std=0.5. Pixel budget defaults:
|
||||
/// `min = 256² = 65536` (→ 8×8 = 64 LM tokens) and
|
||||
/// `max = 1024² = 1048576` (→ 32×32 = 1024 LM tokens) — generous for
|
||||
/// documents/OCR, bounded for serving on 2×RTX5090. (Operator
|
||||
/// override lands with the `[harness.candle.vision]` config in #14 C5.)
|
||||
/// normalise to `[-1, 1]` via mean=std=0.5. Pixel budget defaults to
|
||||
/// [`QWEN3_6_MIN_PIXELS`]…[`QWEN3_6_MAX_PIXELS`], overridable via the
|
||||
/// `NEURON_VISION_MIN_PIXELS` / `NEURON_VISION_MAX_PIXELS` env vars.
|
||||
/// The budget is clamped sane: `min ≥ factor²` (at least one LM token)
|
||||
/// and `max ≥ min`.
|
||||
pub fn qwen3_6() -> Self {
|
||||
let factor = 32u32;
|
||||
let f2 = factor * factor;
|
||||
let min_pixels = env_pixels("NEURON_VISION_MIN_PIXELS", QWEN3_6_MIN_PIXELS).max(f2);
|
||||
let max_pixels = env_pixels("NEURON_VISION_MAX_PIXELS", QWEN3_6_MAX_PIXELS).max(min_pixels);
|
||||
Self {
|
||||
factor: 32,
|
||||
min_pixels: 65_536,
|
||||
max_pixels: 1_048_576,
|
||||
factor,
|
||||
min_pixels,
|
||||
max_pixels,
|
||||
image_mean: [0.5, 0.5, 0.5],
|
||||
image_std: [0.5, 0.5, 0.5],
|
||||
}
|
||||
@@ -369,4 +387,18 @@ mod tests {
|
||||
let err = smart_resize(1, 500, 32, 65_536, 1_048_576).unwrap_err();
|
||||
assert!(format!("{err:#}").contains("200:1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qwen3_6_default_budget_bounds_lm_tokens() {
|
||||
// A huge source image caps at max_pixels → the per-image LM token
|
||||
// count stays within budget (so it can't blow NEURON_MAX_PROMPT_TOKENS).
|
||||
let p = PreprocessProfile::qwen3_6();
|
||||
let (h, w) = p.resized_dims(8000, 6000).unwrap();
|
||||
let lm_tokens = (h / p.factor) * (w / p.factor);
|
||||
let budget = p.max_pixels / (p.factor * p.factor);
|
||||
assert!(
|
||||
lm_tokens <= budget,
|
||||
"max-res image LM tokens {lm_tokens} must stay within budget {budget}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user