From becf61b9c14de709ed4def2887d21e97a0cba594 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Tue, 26 May 2026 15:26:21 +0300 Subject: [PATCH] feat(script): validate-neuron.sh waits for /health activation=ready Adds wait_for_ready() that polls /health until activation.state flips to "ready" (or the NEURON_LOAD_TIMEOUT deadline). Inserted between probe_health and the is_loaded/trigger_load step. Before this, running validate-neuron.sh right after deploy.sh raced the background pre-warm and failed in ~9 ms with "neuron not reachable" (the pre-2026-05-26 build) or with a partial-load error (the new build, where the listener binds before default_models finishes). The poll prints the in_progress model on each tick so an operator watching the log can see which model is delaying readiness. Backs off from 2s to 10s after the first few iterations so a long TP load doesn't spam. Co-Authored-By: Claude Opus 4.7 (1M context) --- script/validate-neuron.sh | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/script/validate-neuron.sh b/script/validate-neuron.sh index 3d39d81..7bd1bbc 100755 --- a/script/validate-neuron.sh +++ b/script/validate-neuron.sh @@ -66,6 +66,58 @@ probe_health() { || die "neuron not reachable at ${BASE}/health" } +# Block until the neuron reports `activation.state == "ready"` on +# `/health`. Without this, validate-neuron.sh used to race the +# background pre-warm (the listener binds immediately but big TP +# loads run for minutes after) and either fail with ECONNREFUSED +# (pre-2026-05-26 build, where load was synchronous before bind) or +# get a 404 from /models/load against a partially-loaded model. +# +# The poll cap is `NEURON_LOAD_TIMEOUT` since pre-warm and an +# on-demand load are the same operation under different triggers. +# Short interval at the start (catches a quick-loading host without +# extra latency) backs off after the first few iterations to keep +# log spam down on a slow load. +wait_for_ready() { + local deadline=$(( $(date +%s) + LOAD_TIMEOUT )) + local state= attempt=0 + while (( $(date +%s) < deadline )); do + attempt=$(( attempt + 1 )) + state=$( + curl --silent --max-time 5 "${BASE}/health" \ + | jq -r '.activation.state // "unknown"' + ) || state=unreachable + case "${state}" in + ready) + say "/health activation.state=ready (after ${attempt} probe(s))" + return 0 + ;; + pre_warming) + local in_progress + in_progress=$( + curl --silent --max-time 5 "${BASE}/health" \ + | jq -r '.activation.in_progress // ""' + ) || in_progress='' + say "/health pre_warming (in_progress=${in_progress}); waiting" + ;; + unreachable) + say "/health unreachable; waiting" + ;; + *) + say "/health unexpected activation.state=${state}; waiting" + ;; + esac + # 2s for the first few iterations to catch quick loads, then + # 10s to avoid log spam on a multi-minute TP load. + if (( attempt < 5 )); then + sleep 2 + else + sleep 10 + fi + done + die "neuron not ready within ${LOAD_TIMEOUT}s (last state: ${state})" +} + list_loaded_ids() { # The manifest is YAML and uses yq; HTTP responses are JSON and use # jq directly. pip-yq parses input as YAML by default, which trips @@ -157,6 +209,11 @@ run_probe() { say "validating neuron at ${BASE}" probe_health say "/health OK" +# Background pre-warm from default_models means /health is reachable +# but `activation.state` can still be `pre_warming` for minutes after +# service start. Block here so the subsequent is_loaded / trigger_load +# steps don't race a partially-materialised model. +wait_for_ready if is_loaded; then say "${MODEL_ID} already loaded"