[Unit] Description=Neuron — per-node GPU discovery and harness daemon for cortex After=network-online.target Wants=network-online.target [Service] Type=simple ExecStart=/usr/bin/neuron --config /etc/neuron/neuron.toml Restart=on-failure RestartSec=5 User=neuron Group=neuron # /var/lib/neuron is the neuron user's $HOME — hf-hub writes its # default cache there (~/.cache/huggingface/hub). Without this directive # systemd doesn't create the directory and hf-hub downloads fail with # "fetch GGUF : failed to create cache dir". StateDirectory=neuron StateDirectoryMode=0755 # Loading default_models from neuron.toml happens before the HTTP # listener binds; large models can take many minutes to download and # materialise on first activation. systemd's default TimeoutStartSec # (90s) is far too short; allow 30 minutes. TimeoutStartSec=1800s # On stop, neuron drains in-flight requests then unloads every model # to release CUDA contexts cleanly. Allow generous time for big-model # unloads; systemd will SIGKILL after this bound. TimeoutStopSec=120s KillSignal=SIGTERM [Install] WantedBy=multi-user.target