# models.example.toml — model catalogue # # Copy to /etc/cortex/models.toml and adjust for your environment. # Describes how to serve each model. Cortex matches these profiles # against discovered neuron topologies for placement decisions. [[models]] id = "your-org/large-model" harness = "candle" quant = "Q4_K_M" vram_mb = 19000 min_devices = 2 min_device_vram_mb = 10000 pinned_on = ["gpu-large"] [[models]] id = "your-org/medium-model" harness = "candle" quant = "Q6_K" vram_mb = 12000 min_devices = 1 pinned_on = ["gpu-medium"] [[models]] id = "your-org/embedding-model" harness = "candle" quant = "Q8_0" vram_mb = 8000 min_devices = 1