From d3f2d5074909624b757269a063a3627fde1f9d27 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Tue, 26 May 2026 14:05:54 +0300 Subject: [PATCH] feat(deploy): per-host neuron config + pre-warm headline models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds asset/neuron/{beast,benjy,quadbrat}.toml — per-host neuron.toml files keyed by the first dot-component of the host. deploy.sh now rsyncs the matching file to /etc/neuron/neuron.toml on each neuron and stops+starts the service so default_models is re-read. Headline model per host (drives /v1/models output immediately after a clean deploy): beast Qwen/Qwen3.6-27B (q5k, tp=2, devices=[0,1]) benjy Qwen/Qwen3-8B (bf16, devices=[0]) quadbrat Qwen/Qwen3-1.7B (bf16, devices=[0]) Removes the need to follow deploy.sh with `validate-neuron.sh beast Qwen/Qwen3.6-27B q5k 2` to surface the 27B in the catalogue — the neuron loads it itself on activation. The neuron loop now mirrors the cortex flow (stop → install/upgrade → sync config → start) so config-only changes pick up on subsequent deploys; previously a no-package-change deploy would silently leave the host on the old default_models. Co-Authored-By: Claude Opus 4.7 (1M context) --- asset/neuron/beast.toml | 24 ++++++++++++ asset/neuron/benjy.toml | 19 +++++++++ asset/neuron/quadbrat.toml | 19 +++++++++ script/deploy.sh | 80 +++++++++++++++++++++++++------------- 4 files changed, 116 insertions(+), 26 deletions(-) create mode 100644 asset/neuron/beast.toml create mode 100644 asset/neuron/benjy.toml create mode 100644 asset/neuron/quadbrat.toml diff --git a/asset/neuron/beast.toml b/asset/neuron/beast.toml new file mode 100644 index 0000000..432a660 --- /dev/null +++ b/asset/neuron/beast.toml @@ -0,0 +1,24 @@ +# neuron.toml for beast.hanzalova.internal +# +# 2x RTX 5090 (32 GB each) — TP-2 capable. Pre-warms Qwen3.6-27B with +# q5k ISQ across both GPUs at activation, matching the validate-neuron +# invocation: `validate-neuron.sh beast.hanzalova.internal +# Qwen/Qwen3.6-27B q5k 2`. +# +# Synced by script/deploy.sh from asset/neuron/.toml. Edits +# take effect on the next deploy.sh run (which stops + restarts the +# service so default_models is re-read at activation). + +port = 13131 + +[[harnesses]] +name = "candle" + +[harness.candle] + +[[default_models]] +model_id = "Qwen/Qwen3.6-27B" +harness = "candle" +quant = "q5k" +tensor_parallel = 2 +devices = [0, 1] diff --git a/asset/neuron/benjy.toml b/asset/neuron/benjy.toml new file mode 100644 index 0000000..793bd06 --- /dev/null +++ b/asset/neuron/benjy.toml @@ -0,0 +1,19 @@ +# neuron.toml for benjy.hanzalova.internal +# +# 1x RTX 4090 (24 GB) — largest single-GPU host on the fleet. Pre-warms +# Qwen3-8B (bf16, ~18 GB), leaving ~6 GB for KV cache + activations on +# moderate-length contexts. +# +# Synced by script/deploy.sh from asset/neuron/.toml. + +port = 13131 + +[[harnesses]] +name = "candle" + +[harness.candle] + +[[default_models]] +model_id = "Qwen/Qwen3-8B" +harness = "candle" +devices = [0] diff --git a/asset/neuron/quadbrat.toml b/asset/neuron/quadbrat.toml new file mode 100644 index 0000000..4135557 --- /dev/null +++ b/asset/neuron/quadbrat.toml @@ -0,0 +1,19 @@ +# neuron.toml for quadbrat.hanzalova.internal +# +# 1x RTX 3060 (12 GB) — small / quantised tier. Pre-warms Qwen3-1.7B +# (bf16, ~4 GB), leaving ~7 GB for KV cache so long contexts on a small +# model still have plenty of room. +# +# Synced by script/deploy.sh from asset/neuron/.toml. + +port = 13131 + +[[harnesses]] +name = "candle" + +[harness.candle] + +[[default_models]] +model_id = "Qwen/Qwen3-1.7B" +harness = "candle" +devices = [0] diff --git a/script/deploy.sh b/script/deploy.sh index 9ce61da..1f3aa9f 100755 --- a/script/deploy.sh +++ b/script/deploy.sh @@ -233,43 +233,71 @@ fi for entry in "${neuron_entries[@]}"; do IFS=$'\t' read -r neuron_host neuron_flavour <<< "${entry}" package="helexa-neuron-${neuron_flavour}" + # First dot-component of the host keys the per-host config file + # under asset/neuron/.toml. A host listed in the manifest + # without a corresponding config still deploys (the package's + # default /etc/neuron/neuron.toml stays in place; no pre-warm). + short_host="${neuron_host%%.*}" + host_config="${REPO_DIR}/asset/neuron/${short_host}.toml" ensure_lair_repo "${neuron_host}" ensure_cudnn_runtime "${neuron_host}" neuron_nvr=$(installed_nvr "${neuron_host}" "${package}") + + # Stop the service unconditionally before any reconfig step. + # `default_models` is read at activation, so a config change without + # a bounce silently leaves the host on the previous pre-warm set. + # Same shape as the cortex flow above. The `[ ! -f … ]` guard skips + # the stop on a fresh install where the unit file isn't there yet. + if ssh "${neuron_host}" "[ ! -f /usr/lib/systemd/system/neuron.service ] || sudo systemctl stop neuron.service"; then + echo "[${neuron_host}] stopped neuron service" + else + echo "[${neuron_host}] failed to stop neuron service (continuing)" + fi + if needs_update "${neuron_host}" "${package}"; then echo "[${neuron_host}] ${package} update available (current: ${neuron_nvr})" - if ssh "${neuron_host}" "[ ! -f /usr/lib/systemd/system/neuron.service ] || sudo systemctl stop neuron.service"; then - echo "[${neuron_host}] stopped neuron service" - # --allowerasing lets dnf swap out a previously-installed - # bare helexa-neuron or a different flavour without manual - # intervention. The Conflicts: clauses in the spec ensure - # only one flavour is ever resident. - if install_or_upgrade "${neuron_host}" "${package}"; then - neuron_nvr=$(installed_nvr "${neuron_host}" "${package}") - echo "[${neuron_host}] installed/upgraded ${package} to ${neuron_nvr}" - # Ensure firewalld allows neuron port - ssh "${neuron_host}" "sudo firewall-cmd --query-service=helexa-neuron --quiet 2>/dev/null || sudo firewall-cmd --add-service=helexa-neuron --permanent && sudo firewall-cmd --reload" 2>/dev/null || true - if ssh "${neuron_host}" "sudo systemctl daemon-reload && sudo systemctl start neuron.service"; then - echo "[${neuron_host}] started neuron service" - else - echo "[${neuron_host}] failed to start neuron service" - fi - else - echo "[${neuron_host}] failed to install ${package}:" - echo "${__DNF_OUTPUT__}" | sed "s/^/[${neuron_host}] /" - fi + # --allowerasing lets dnf swap out a previously-installed + # bare helexa-neuron or a different flavour without manual + # intervention. The Conflicts: clauses in the spec ensure + # only one flavour is ever resident. + if install_or_upgrade "${neuron_host}" "${package}"; then + neuron_nvr=$(installed_nvr "${neuron_host}" "${package}") + echo "[${neuron_host}] installed/upgraded ${package} to ${neuron_nvr}" + # Ensure firewalld allows neuron port + ssh "${neuron_host}" "sudo firewall-cmd --query-service=helexa-neuron --quiet 2>/dev/null || sudo firewall-cmd --add-service=helexa-neuron --permanent && sudo firewall-cmd --reload" 2>/dev/null || true else - echo "[${neuron_host}] failed to stop neuron service" + echo "[${neuron_host}] failed to install ${package}:" + echo "${__DNF_OUTPUT__}" | sed "s/^/[${neuron_host}] /" fi else echo "[${neuron_host}] ${package} is up to date (${neuron_nvr})" - if ssh "${neuron_host}" systemctl is-active --quiet neuron.service; then - echo "[${neuron_host}] neuron service is active" - elif ssh "${neuron_host}" sudo systemctl start neuron.service; then - echo "[${neuron_host}] started neuron service" + fi + + # Sync per-host neuron.toml — drives default_models pre-warm so + # `/v1/models` on the gateway exposes the host's headline model + # immediately after the service comes back up. Missing per-host + # config leaves the package's installed neuron.toml untouched. + if [[ -f "${host_config}" ]]; then + if rsync \ + --archive \ + --compress \ + --rsync-path 'sudo rsync' \ + --chown root:root \ + --chmod 644 \ + "${host_config}" \ + "${neuron_host}:/etc/neuron/neuron.toml"; then + echo "[${neuron_host}] sync'd asset/neuron/${short_host}.toml" else - echo "[${neuron_host}] failed to start neuron service" + echo "[${neuron_host}] failed to sync neuron.toml" fi + else + echo "[${neuron_host}] no asset/neuron/${short_host}.toml — leaving /etc/neuron/neuron.toml untouched" + fi + + if ssh "${neuron_host}" "sudo systemctl daemon-reload && sudo systemctl start neuron.service"; then + echo "[${neuron_host}] started neuron service" + else + echo "[${neuron_host}] failed to start neuron service" fi done