# neuron.example.toml — example configuration # # Copy to /etc/neuron/neuron.toml and adjust for your environment. # # Environment variable overrides use NEURON_ prefix with __ separators: # NEURON_PORT=13131 port = 13131 # -- Harnesses --------------------------------------------------------------- # Each [[harnesses]] entry enables an inference engine. Currently only # "candle" is supported — it runs in-process and uses huggingface/candle # for inference on local CUDA devices (or CPU when CUDA is unavailable). [[harnesses]] name = "candle" # -- Candle harness settings ------------------------------------------------- # Optional tuning for the candle harness. [harness.candle] # HuggingFace cache directory for model weights. When unset, hf-hub's # default (~/.cache/huggingface) is used. # hf_cache = "/var/lib/neuron/hf-cache"