cortex/data/neuron.service

[Unit]
Description=Neuron — per-node GPU discovery and harness daemon for cortex
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
ExecStart=/usr/bin/neuron --config /etc/neuron/neuron.toml
Restart=on-failure
RestartSec=5
User=neuron
Group=neuron
# Loading default_models from neuron.toml happens before the HTTP
# listener binds; large models can take many minutes to download and
# materialise on first activation. systemd's default TimeoutStartSec
# (90s) is far too short; allow 30 minutes.
TimeoutStartSec=1800s
# On stop, neuron drains in-flight requests then unloads every model
# to release CUDA contexts cleanly. Allow generous time for big-model
# unloads; systemd will SIGKILL after this bound.
TimeoutStopSec=120s
KillSignal=SIGTERM

[Install]
WantedBy=multi-user.target