From 1866b99a89e91c80cd2709a9675df71065404188 Mon Sep 17 00:00:00 2001
From: rob thijssen <grenade@rob.tn>
Date: Tue, 19 May 2026 13:43:02 +0300
Subject: [PATCH] =?UTF-8?q?fix(validate-neuron):=20jq=20for=20JSON,=20say?=
 =?UTF-8?q?=E2=86=92stderr,=20sane=20max=5Ftokens?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three real bugs caught while exercising the script end-to-end against
the live quadbrat node:

1. say() printed status to stdout. Inside run_probe(), the
   "POST /v1/chat/completions (probe: ...)" line was being captured
   by `raw=$(run_probe)` along with the JSON body, so jq saw
   "[host] POST..." as the first line and choked at column 29 with
   "Invalid numeric literal" (it tried to parse the `[` as the start
   of a JSON array). Redirect say() to stderr so command
   substitutions capture only the intended return value.

2. The pretty-print step `echo "${raw}" | yq -r '.'` re-emitted the
   JSON as YAML, which fails on response content that looks like YAML
   markers (chatcmpl ids that parse as aliases, escaped quotes inside
   <think>...</think> blocks). Drop the pretty-print; just echo the
   raw JSON.

3. JSON response parsing now uses jq (always JSON) instead of yq
   (parses input as YAML by default). yq remains in use only for the
   genuinely-YAML asset/manifest.yml elsewhere.

4. max_tokens bumped 32 → 256. Qwen3 prepends a <think>...</think>
   reasoning block before its final answer when the chat template
   enables thinking mode, and that eats most of a small budget — the
   "Paris" answer was being truncated mid-thought. 256 leaves enough
   room for both.

Verified pipeline end-to-end on quadbrat (RTX 3060, helexa-neuron-ampere
git602e8e1): /health OK → /models/load (unsloth/Qwen3-0.6B-GGUF Q4_K_M)
→ /v1/chat/completions → response content contains "Paris".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 script/validate-neuron.sh | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/script/validate-neuron.sh b/script/validate-neuron.sh
index 1252443..47a892b 100755
--- a/script/validate-neuron.sh
+++ b/script/validate-neuron.sh
@@ -31,7 +31,10 @@ BASE="http://${HOST}:${PORT}"
 # beyond gibberish.
 PROBE_PROMPT='What is the capital of France? Respond with the city name only, no punctuation.'
 EXPECT_SUBSTR='Paris'
-MAX_TOKENS=32
+# Qwen3 prepends <think>...</think> reasoning before the answer when the
+# chat template enables thinking mode, which eats most of a small token
+# budget. 256 leaves enough room for thinking + final answer.
+MAX_TOKENS=256
 
 # /models/load is synchronous — neuron blocks the response until the
 # hf-hub download + GGUF parse + tensor materialisation is done. A
@@ -40,7 +43,10 @@ MAX_TOKENS=32
 LOAD_TIMEOUT=600
 INFER_TIMEOUT=120
 
-say() { printf '[%s] %s\n' "${HOST}" "$*"; }
+# Status messages go to stderr so command substitutions like
+# `raw=$(run_probe)` capture only the function's intended return value
+# (an HTTP body), not the progress chatter.
+say() { printf '[%s] %s\n' "${HOST}" "$*" >&2; }
 die() { say "FAIL: $*"; exit 1; }
 
 probe_health() {
@@ -49,7 +55,11 @@ probe_health() {
 }
 
 list_loaded_ids() {
-    curl --silent --fail "${BASE}/models" | yq -r '.[].id'
+    # The manifest is YAML and uses yq; HTTP responses are JSON and use
+    # jq directly. pip-yq parses input as YAML by default, which trips
+    # on JSON content that happens to look like YAML aliases (chatcmpl
+    # ids, escaped quotes inside `<think>...</think>` blocks, etc.).
+    curl --silent --fail "${BASE}/models" | jq -r '.[].id'
 }
 
 is_loaded() {
@@ -88,7 +98,7 @@ EOF
 run_probe() {
     say "POST /v1/chat/completions (probe: ${PROBE_PROMPT})"
     local payload
-    payload=$(yq -n -c \
+    payload=$(jq -n -c \
         --arg model "${MODEL_ID}" \
         --arg content "${PROBE_PROMPT}" \
         --argjson tokens "${MAX_TOKENS}" \
@@ -124,10 +134,15 @@ fi
 
 raw=$(run_probe)
 echo "---"
-echo "${raw}" | yq -r '.'
+# Dump the raw JSON. Don't pipe through `yq -r '.'` — yq's default
+# YAML output mode chokes on JSON strings that contain `<` (and the
+# `<think>` markers Qwen3 emits during reasoning are a perfect
+# example). The targeted `yq -r '.path'` calls below work fine
+# because jq's path filter mode bypasses the YAML re-emit.
+echo "${raw}"
 echo "---"
 
-content=$(echo "${raw}" | yq -r '.choices[0].message.content // empty')
+content=$(echo "${raw}" | jq -r '.choices[0].message.content // empty')
 if [[ -z "${content}" ]]; then
     die "no content in chat completion response"
 fi