# neuron.toml for beast.hanzalova.internal
#
# 2x RTX 5090 (32 GB each) — TP-2 capable. Pre-warms Qwen3.6-27B with
# q5k ISQ across both GPUs at activation, matching the validate-neuron
# invocation: `validate-neuron.sh beast.hanzalova.internal
# Qwen/Qwen3.6-27B q5k 2`.
#
# Synced to /etc/neuron/neuron.toml by script/infra-setup.sh. Edits
# take effect after the next deploy workflow run restarts the service
# (default_models is read at activation).

port = 13131

[[harnesses]]
name = "candle"

[harness.candle]

[[default_models]]
model_id = "Qwen/Qwen3.6-27B"
harness = "candle"
quant = "q6k"
tensor_parallel = 2
devices = [0, 1]