cortex/asset/neuron/beast.toml

# neuron.toml for beast.hanzalova.internal
#
# 2x RTX 5090 (32 GB each) — TP-2 capable. Pre-warms Qwen3.6-27B with
# q5k ISQ across both GPUs at activation, matching the validate-neuron
# invocation: `validate-neuron.sh beast.hanzalova.internal
# Qwen/Qwen3.6-27B q5k 2`.
#
# Synced by script/deploy.sh from asset/neuron/<short-host>.toml. Edits
# take effect on the next deploy.sh run (which stops + restarts the
# service so default_models is re-read at activation).

port = 13131

[[harnesses]]
name = "candle"

[harness.candle]

[[default_models]]
model_id = "Qwen/Qwen3.6-27B"
harness = "candle"
quant = "q5k"
tensor_parallel = 2
devices = [0, 1]