First end-to-end run of the deploy workflow succeeded (gitea run #289), so the operator-run rolling-deploy script and its YAML manifest are no longer the source of truth — fleet topology lives in .gitea/workflows/deploy.yml and per-host config in script/infra-setup.sh. Per-host neuron config comments updated to point at the new sync path. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
25 lines
639 B
TOML
25 lines
639 B
TOML
# neuron.toml for beast.hanzalova.internal
|
|
#
|
|
# 2x RTX 5090 (32 GB each) — TP-2 capable. Pre-warms Qwen3.6-27B with
|
|
# q5k ISQ across both GPUs at activation, matching the validate-neuron
|
|
# invocation: `validate-neuron.sh beast.hanzalova.internal
|
|
# Qwen/Qwen3.6-27B q5k 2`.
|
|
#
|
|
# Synced to /etc/neuron/neuron.toml by script/infra-setup.sh. Edits
|
|
# take effect after the next deploy workflow run restarts the service
|
|
# (default_models is read at activation).
|
|
|
|
port = 13131
|
|
|
|
[[harnesses]]
|
|
name = "candle"
|
|
|
|
[harness.candle]
|
|
|
|
[[default_models]]
|
|
model_id = "Qwen/Qwen3.6-27B"
|
|
harness = "candle"
|
|
quant = "q6k"
|
|
tensor_parallel = 2
|
|
devices = [0, 1]
|