fix(neuron): surface full anyhow chain + ensure $HOME exists at start

Two fixes uncovered by the live validation against beast/benjy/quadbrat: 1. api.rs swallowed everything beyond the outermost anyhow context. The validation script reported '{"error":"fetch GGUF ...gguf"}' but the actual underlying hf-hub failure (cache dir creation, network, auth, etc.) was hidden. Switching every error response to format!("{e:#}") expands the full cause chain via anyhow's alternate Display format. 2. The neuron systemd unit declared the service user but never ensured /var/lib/neuron (its $HOME) existed. hf-hub defaults its cache to ~/.cache/huggingface/hub — when $HOME is absent the cache dir creation fails and the download aborts. Adding `StateDirectory=neuron` makes systemd create + chown that directory at activation; no spec change needed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
fix(validate-neuron): default to unsloth GGUF + capture curl errors
2026-05-19 08:17:37 +03:00 · 2026-05-19 08:14:31 +03:00 · 2026-05-19 07:58:05 +03:00
3 changed files with 156 additions and 5 deletions
--- a/crates/neuron/src/api.rs
+++ b/crates/neuron/src/api.rs
@@ -56,7 +56,7 @@ async fn list_models(State(state): State<Arc<NeuronState>>) -> impl IntoResponse
        Ok(models) => Json(json!(models)).into_response(),
        Err(e) => (
            StatusCode::INTERNAL_SERVER_ERROR,
-            Json(json!({"error": e.to_string()})),
+            Json(json!({"error": format!("{e:#}")})),
        )
            .into_response(),
    }
@@ -71,7 +71,7 @@ async fn load_model(
        Ok(()) => Json(json!({"status": "loaded"})).into_response(),
        Err(e) => (
            StatusCode::BAD_REQUEST,
-            Json(json!({"error": e.to_string()})),
+            Json(json!({"error": format!("{e:#}")})),
        )
            .into_response(),
    }
@@ -95,7 +95,11 @@ async fn unload_model(
    let registry = state.registry.read().await;
    match registry.unload_model(&model_id).await {
        Ok(()) => Json(json!({"status": "unloaded"})).into_response(),
-        Err(e) => (StatusCode::NOT_FOUND, Json(json!({"error": e.to_string()}))).into_response(),
+        Err(e) => (
            StatusCode::NOT_FOUND,
            Json(json!({"error": format!("{e:#}")})),
        )
            .into_response(),
    }
 }
@@ -151,7 +155,7 @@ async fn chat_completions(
                .into_response(),
            Err(InferenceError::Other(e)) => (
                StatusCode::INTERNAL_SERVER_ERROR,
-                Json(json!({"error": e.to_string()})),
+                Json(json!({"error": format!("{e:#}")})),
            )
                .into_response(),
        }
@@ -165,7 +169,7 @@ async fn chat_completions(
                .into_response(),
            Err(InferenceError::Other(e)) => (
                StatusCode::INTERNAL_SERVER_ERROR,
-                Json(json!({"error": e.to_string()})),
+                Json(json!({"error": format!("{e:#}")})),
            )
                .into_response(),
        }
--- a/data/neuron.service
+++ b/data/neuron.service
@@ -10,6 +10,12 @@ Restart=on-failure
 RestartSec=5
 User=neuron
 Group=neuron
 # /var/lib/neuron is the neuron user's $HOME — hf-hub writes its
 # default cache there (~/.cache/huggingface/hub). Without this directive
 # systemd doesn't create the directory and hf-hub downloads fail with
 # "fetch GGUF <file>: failed to create cache dir".
 StateDirectory=neuron
 StateDirectoryMode=0755
 # Loading default_models from neuron.toml happens before the HTTP
 # listener binds; large models can take many minutes to download and
 # materialise on first activation. systemd's default TimeoutStartSec
--- a/script/validate-neuron.sh
+++ b/script/validate-neuron.sh
@@ -0,0 +1,141 @@
 #!/bin/env bash
 #
 # End-to-end smoke test for a deployed neuron.
 #
 # Confirms the daemon is reachable, loads a small public Qwen3 GGUF,
 # fires a reasoning probe at /v1/chat/completions, and prints the
 # answer. Used to validate the candle harness on a real GPU host
 # before trusting it for production traffic, and as a regression test
 # after pushing new neuron builds.
 #
 # Usage:
 #   script/validate-neuron.sh [host] [model_id] [quant]
 #
 # Defaults:
 #   host     = beast.hanzalova.internal
 #   model_id = unsloth/Qwen3-0.6B-GGUF  (official Qwen3-*-GGUF repos
 #              ship Q8_0 only; unsloth's mirror ships the full Q-spectrum
 #              including Q4_K_M)
 #   quant    = Q4_K_M
 set -euo pipefail
 HOST="${1:-beast.hanzalova.internal}"
 MODEL_ID="${2:-unsloth/Qwen3-0.6B-GGUF}"
 QUANT="${3:-Q4_K_M}"
 PORT="${NEURON_PORT:-13131}"
 BASE="http://${HOST}:${PORT}"
 # Reasoning probe — concrete, low-temperature answer that small models
 # can still get right. "Paris" is a strong signal of basic competence
 # beyond gibberish.
 PROBE_PROMPT='What is the capital of France? Respond with the city name only, no punctuation.'
 EXPECT_SUBSTR='Paris'
 MAX_TOKENS=32
 # /models/load is synchronous — neuron blocks the response until the
 # hf-hub download + GGUF parse + tensor materialisation is done. A
 # fresh 0.6B-Q4_K_M is ~400 MB; on a slow link or cold cache that's
 # easily a minute. Pick a generous ceiling.
 LOAD_TIMEOUT=600
 INFER_TIMEOUT=120
 say() { printf '[%s] %s\n' "${HOST}" "$*"; }
 die() { say "FAIL: $*"; exit 1; }
 probe_health() {
    curl --silent --fail --max-time 5 "${BASE}/health" >/dev/null \
        || die "neuron not reachable at ${BASE}/health"
 }
 list_loaded_ids() {
    curl --silent --fail "${BASE}/models" | yq -r '.[].id'
 }
 is_loaded() {
    list_loaded_ids 2>/dev/null | grep -Fxq "${MODEL_ID}"
 }
 trigger_load() {
    say "POST /models/load ${MODEL_ID} (quant=${QUANT}, device=[0])"
    say "  (synchronous; may take a minute on first run while HF downloads)"
    local payload
    payload=$(cat <<EOF
 {
    "model_id": "${MODEL_ID}",
    "harness": "candle",
    "quant": "${QUANT}",
    "devices": [0]
 }
 EOF
    )
    # --write-out captures the response code on a separate line so we
    # can surface a real diagnostic instead of relying on --fail.
    local resp http_code body
    resp=$(curl --silent --show-error --max-time "${LOAD_TIMEOUT}" \
        --write-out '\n__HTTP__%{http_code}' \
        -X POST "${BASE}/models/load" \
        -H 'content-type: application/json' \
        --data "${payload}") || die "curl /models/load failed: $?"
    http_code=$(echo "${resp}" | grep -oP '(?<=__HTTP__)\d+$' | tail -1)
    body=$(echo "${resp}" | sed '$ s/__HTTP__.*$//')
    if [[ "${http_code}" != "200" ]]; then
        die "load returned HTTP ${http_code}: ${body}"
    fi
    say "load returned ${http_code}: ${body}"
 }
 run_probe() {
    say "POST /v1/chat/completions (probe: ${PROBE_PROMPT})"
    local payload
    payload=$(yq -n -c \
        --arg model "${MODEL_ID}" \
        --arg content "${PROBE_PROMPT}" \
        --argjson tokens "${MAX_TOKENS}" \
        '{
            model: $model,
            messages: [{role: "user", content: $content}],
            temperature: 0.1,
            max_tokens: $tokens
        }')
    local resp http_code body
    resp=$(curl --silent --show-error --max-time "${INFER_TIMEOUT}" \
        --write-out '\n__HTTP__%{http_code}' \
        -X POST "${BASE}/v1/chat/completions" \
        -H 'content-type: application/json' \
        --data "${payload}") || die "curl /v1/chat/completions failed: $?"
    http_code=$(echo "${resp}" | grep -oP '(?<=__HTTP__)\d+$' | tail -1)
    body=$(echo "${resp}" | sed '$ s/__HTTP__.*$//')
    if [[ "${http_code}" != "200" ]]; then
        die "inference returned HTTP ${http_code}: ${body}"
    fi
    echo "${body}"
 }
 say "validating neuron at ${BASE}"
 probe_health
 say "/health OK"
 if is_loaded; then
    say "${MODEL_ID} already loaded"
 else
    trigger_load
 fi
 raw=$(run_probe)
 echo "---"
 echo "${raw}" | yq -r '.'
 echo "---"
 content=$(echo "${raw}" | yq -r '.choices[0].message.content // empty')
 if [[ -z "${content}" ]]; then
    die "no content in chat completion response"
 fi
 say "assistant said: ${content}"
 if echo "${content}" | grep -qiF "${EXPECT_SUBSTR}"; then
    say "PASS — response contains expected substring '${EXPECT_SUBSTR}'"
    exit 0
 else
    die "response did not contain '${EXPECT_SUBSTR}'"
 fi