From f9f5fa41b6a9684c1ccd0652e468c7c9cf4b6850 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Tue, 19 May 2026 08:17:37 +0300 Subject: [PATCH] fix(neuron): surface full anyhow chain + ensure $HOME exists at start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes uncovered by the live validation against beast/benjy/quadbrat: 1. api.rs swallowed everything beyond the outermost anyhow context. The validation script reported '{"error":"fetch GGUF ...gguf"}' but the actual underlying hf-hub failure (cache dir creation, network, auth, etc.) was hidden. Switching every error response to format!("{e:#}") expands the full cause chain via anyhow's alternate Display format. 2. The neuron systemd unit declared the service user but never ensured /var/lib/neuron (its $HOME) existed. hf-hub defaults its cache to ~/.cache/huggingface/hub — when $HOME is absent the cache dir creation fails and the download aborts. Adding `StateDirectory=neuron` makes systemd create + chown that directory at activation; no spec change needed. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/neuron/src/api.rs | 14 +++++++++----- data/neuron.service | 6 ++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/crates/neuron/src/api.rs b/crates/neuron/src/api.rs index 30399d2..19d108f 100644 --- a/crates/neuron/src/api.rs +++ b/crates/neuron/src/api.rs @@ -56,7 +56,7 @@ async fn list_models(State(state): State>) -> impl IntoResponse Ok(models) => Json(json!(models)).into_response(), Err(e) => ( StatusCode::INTERNAL_SERVER_ERROR, - Json(json!({"error": e.to_string()})), + Json(json!({"error": format!("{e:#}")})), ) .into_response(), } @@ -71,7 +71,7 @@ async fn load_model( Ok(()) => Json(json!({"status": "loaded"})).into_response(), Err(e) => ( StatusCode::BAD_REQUEST, - Json(json!({"error": e.to_string()})), + Json(json!({"error": format!("{e:#}")})), ) .into_response(), } @@ -95,7 +95,11 @@ async fn unload_model( let registry = state.registry.read().await; match registry.unload_model(&model_id).await { Ok(()) => Json(json!({"status": "unloaded"})).into_response(), - Err(e) => (StatusCode::NOT_FOUND, Json(json!({"error": e.to_string()}))).into_response(), + Err(e) => ( + StatusCode::NOT_FOUND, + Json(json!({"error": format!("{e:#}")})), + ) + .into_response(), } } @@ -151,7 +155,7 @@ async fn chat_completions( .into_response(), Err(InferenceError::Other(e)) => ( StatusCode::INTERNAL_SERVER_ERROR, - Json(json!({"error": e.to_string()})), + Json(json!({"error": format!("{e:#}")})), ) .into_response(), } @@ -165,7 +169,7 @@ async fn chat_completions( .into_response(), Err(InferenceError::Other(e)) => ( StatusCode::INTERNAL_SERVER_ERROR, - Json(json!({"error": e.to_string()})), + Json(json!({"error": format!("{e:#}")})), ) .into_response(), } diff --git a/data/neuron.service b/data/neuron.service index c844da7..5da4066 100644 --- a/data/neuron.service +++ b/data/neuron.service @@ -10,6 +10,12 @@ Restart=on-failure RestartSec=5 User=neuron Group=neuron +# /var/lib/neuron is the neuron user's $HOME — hf-hub writes its +# default cache there (~/.cache/huggingface/hub). Without this directive +# systemd doesn't create the directory and hf-hub downloads fail with +# "fetch GGUF : failed to create cache dir". +StateDirectory=neuron +StateDirectoryMode=0755 # Loading default_models from neuron.toml happens before the HTTP # listener binds; large models can take many minutes to download and # materialise on first activation. systemd's default TimeoutStartSec