[Unit] Description=Neuron — per-node GPU discovery and harness daemon for cortex After=network-online.target Wants=network-online.target [Service] Type=simple ExecStart=/usr/bin/neuron --config /etc/neuron/neuron.toml Restart=on-failure RestartSec=5 User=neuron Group=neuron # Loading default_models from neuron.toml happens before the HTTP # listener binds; large models can take many minutes to download and # materialise on first activation. systemd's default TimeoutStartSec # (90s) is far too short; allow 30 minutes. TimeoutStartSec=1800s # On stop, neuron drains in-flight requests then unloads every model # to release CUDA contexts cleanly. Allow generous time for big-model # unloads; systemd will SIGKILL after this bound. TimeoutStopSec=120s KillSignal=SIGTERM [Install] WantedBy=multi-user.target