From 5c2bd1a1dab3922b52e334fc326ee95b5b193107 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Mon, 18 May 2026 16:02:49 +0300 Subject: [PATCH] feat(neuron): wire candle harness load/unload via GGUF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 2 of the candle-native pivot. Fleshes out CandleHarness with a LoadedModel registry keyed by model_id, hf-hub-backed GGUF download, and Qwen3 quantized weight construction via candle-transformers' quantized_qwen3 module. unload_model drops the entry; Drop on the candle ModelWeights frees device memory. Device selection prefers CUDA (gated behind the new `cuda` feature), falling back to CPU when CUDA is unavailable so default builds work on non-GPU hosts. The candle CUDA toolchain isn't pulled in unless `--features cuda` is passed, keeping CI green on CPU runners. Config gains a [harness.candle] block with an optional hf_cache path. HarnessRegistry::from_configs now takes HarnessSettings so per-harness config flows through. A gated tests/candle_lifecycle.rs exercises real load → list → unload → list-empty when run with `--features cuda-integration` against a host with HF network access. The default-feature test in tests/api.rs covers the wrong-harness rejection path without needing the network. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1587 ++++++++++++++++++++++- crates/neuron/Cargo.toml | 21 + crates/neuron/src/config.rs | 22 +- crates/neuron/src/harness/candle.rs | 211 ++- crates/neuron/src/harness/mod.rs | 11 +- crates/neuron/src/main.rs | 2 +- crates/neuron/tests/api.rs | 25 +- crates/neuron/tests/candle_lifecycle.rs | 90 ++ neuron.example.toml | 12 +- 9 files changed, 1934 insertions(+), 47 deletions(-) create mode 100644 crates/neuron/tests/candle_lifecycle.rs diff --git a/Cargo.lock b/Cargo.lock index 2c7ade7..4b1158a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "ahash" version = "0.8.12" @@ -9,7 +15,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", + "getrandom 0.3.4", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -23,6 +31,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -206,18 +220,48 @@ dependencies = [ "syn", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -229,6 +273,26 @@ name = "bytemuck" version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -236,6 +300,96 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "candle-core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd9895436c1ba5dc1037a19935d084b838db066ff4e15ef7dded020b7c12a4a" +dependencies = [ + "byteorder", + "candle-kernels", + "candle-ug", + "cudarc 0.19.7", + "float8", + "gemm 0.19.0", + "half", + "libm", + "memmap2", + "num-traits", + "num_cpus", + "rand", + "rand_distr", + "rayon", + "safetensors 0.7.0", + "thiserror 2.0.18", + "tokenizers", + "yoke 0.8.2", + "zip", +] + +[[package]] +name = "candle-kernels" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "742e2ac226b777134436e9e692f44e77c278b8a7abb1554dc10e44dc911b349f" +dependencies = [ + "cudaforge", +] + +[[package]] +name = "candle-nn" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9317a09d6530b758990ed7f625ac69ff43653bc9ee28b0464644ad1169ada87" +dependencies = [ + "candle-core", + "half", + "libc", + "num-traits", + "rayon", + "safetensors 0.7.0", + "serde", + "thiserror 2.0.18", +] + +[[package]] +name = "candle-transformers" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f59d08c89e9f4af9c464e2f3a8e16199e7cc601e6f34538c2cfbb42b623b1783" +dependencies = [ + "byteorder", + "candle-core", + "candle-nn", + "fancy-regex", + "num-traits", + "rand", + "rayon", + "serde", + "serde_json", + "serde_plain", + "tracing", +] + +[[package]] +name = "candle-ug" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca0fc3167cbc99c8ec1be618cb620aa21dca95038f118c3579a79370e3dc5f77" +dependencies = [ + "ug", + "ug-cuda", +] + +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.60" @@ -323,6 +477,34 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -404,6 +586,34 @@ dependencies = [ "urlencoding", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -419,6 +629,168 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "cudaforge" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f7a0d45b139b5beeeb1c34188717e12241c44a0120afb498815ce7f5373c691" +dependencies = [ + "anyhow", + "fs2", + "glob", + "num_cpus", + "rayon", + "serde", + "serde_json", + "sha2", + "thiserror 2.0.18", + "walkdir", + "which", +] + +[[package]] +name = "cudarc" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf99ab37ee7072d64d906aa2dada9a3422f1d975cdf8c8055a573bc84897ed8" +dependencies = [ + "half", + "libloading 0.8.9", +] + +[[package]] +name = "cudarc" +version = "0.19.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cea5f10a99e025c1b44ae2354c2d8326b25ddbd0baf76bde8e55cfd4018a2cc" +dependencies = [ + "float8", + "half", + "libloading 0.9.0", +] + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "dary_heap" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1e3a325bc115f096c8b77bbf027a7c2592230e70be2d985be950d3d5e60ebe" +dependencies = [ + "serde", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.61.2", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -436,6 +808,34 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dyn-stack" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4713e43e2886ba72b8271aa66c93d722116acf7a75555cce11dcde84388fe8" +dependencies = [ + "bytemuck", + "dyn-stack-macros", +] + +[[package]] +name = "dyn-stack-macros" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -445,6 +845,24 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + [[package]] name = "equivalent" version = "1.0.2" @@ -461,6 +879,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" + [[package]] name = "eventsource-stream" version = "0.2.3" @@ -472,6 +896,17 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "fancy-regex" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + [[package]] name = "fastrand" version = "2.4.1" @@ -498,6 +933,28 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "float8" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d1f04709a8ac06e8e8042875a3c466cc4832d3c1a18dbcb9dba3c6e83046bc" +dependencies = [ + "half", + "num-traits", + "rand", + "rand_distr", +] + [[package]] name = "fnv" version = "1.0.7" @@ -510,6 +967,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "foreign-types" version = "0.3.2" @@ -534,6 +997,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -628,6 +1101,254 @@ dependencies = [ "slab", ] +[[package]] +name = "gemm" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451" +dependencies = [ + "dyn-stack", + "gemm-c32 0.18.2", + "gemm-c64 0.18.2", + "gemm-common 0.18.2", + "gemm-f16 0.18.2", + "gemm-f32 0.18.2", + "gemm-f64 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb" +dependencies = [ + "dyn-stack", + "gemm-c32 0.19.0", + "gemm-c64 0.19.0", + "gemm-common 0.19.0", + "gemm-f16 0.19.0", + "gemm-f32 0.19.0", + "gemm-f64 0.19.0", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847" +dependencies = [ + "dyn-stack", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf" +dependencies = [ + "dyn-stack", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-common" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3" +dependencies = [ + "bytemuck", + "dyn-stack", + "half", + "libm", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp 0.21.5", + "raw-cpuid", + "rayon", + "seq-macro", + "sysctl", +] + +[[package]] +name = "gemm-common" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88027625910cc9b1085aaaa1c4bc46bb3a36aad323452b33c25b5e4e7c8e2a3e" +dependencies = [ + "bytemuck", + "dyn-stack", + "half", + "libm", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp 0.22.2", + "raw-cpuid", + "rayon", + "seq-macro", + "sysctl", +] + +[[package]] +name = "gemm-f16" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109" +dependencies = [ + "dyn-stack", + "gemm-common 0.18.2", + "gemm-f32 0.18.2", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f16" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", + "gemm-f32 0.19.0", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864" +dependencies = [ + "dyn-stack", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd" +dependencies = [ + "dyn-stack", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -664,6 +1385,12 @@ dependencies = [ "wasip3", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "h2" version = "0.4.13" @@ -683,13 +1410,41 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "rand", + "rand_distr", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "foldhash", + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", + "serde", + "serde_core", ] [[package]] @@ -704,6 +1459,36 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hf-hub" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" +dependencies = [ + "dirs", + "futures", + "http", + "indicatif", + "libc", + "log", + "native-tls", + "num_cpus", + "rand", + "reqwest", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "ureq", + "windows-sys 0.60.2", +] + [[package]] name = "http" version = "1.4.0" @@ -809,7 +1594,7 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-util", @@ -861,7 +1646,7 @@ dependencies = [ "displaydoc", "potential_utf", "utf8_iter", - "yoke", + "yoke 0.8.2", "zerofrom", "zerovec", ] @@ -928,7 +1713,7 @@ dependencies = [ "displaydoc", "icu_locale_core", "writeable", - "yoke", + "yoke 0.8.2", "zerofrom", "zerotrie", "zerovec", @@ -940,6 +1725,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -973,6 +1764,19 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + [[package]] name = "inlinable_string" version = "0.1.15" @@ -1001,6 +1805,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" @@ -1047,6 +1860,41 @@ version = "0.2.185" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libloading" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libredox" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" +dependencies = [ + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -1074,6 +1922,22 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "macro_rules_attribute" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + [[package]] name = "matchers" version = "0.2.0" @@ -1095,6 +1959,16 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", + "stable_deref_trait", +] + [[package]] name = "metrics" version = "0.24.3" @@ -1111,7 +1985,7 @@ version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" dependencies = [ - "base64", + "base64 0.22.1", "http-body-util", "hyper", "hyper-rustls", @@ -1154,6 +2028,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.2.0" @@ -1165,6 +2049,28 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "monostate" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67" +dependencies = [ + "monostate-impl", + "serde", + "serde_core", +] + +[[package]] +name = "monostate-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "native-tls" version = "0.2.18" @@ -1189,12 +2095,17 @@ dependencies = [ "anyhow", "async-trait", "axum", + "candle-core", + "candle-nn", + "candle-transformers", "clap", "cortex-core", "figment", + "hf-hub", "reqwest", "serde", "serde_json", + "tokenizers", "tokio", "toml", "tracing", @@ -1220,6 +2131,71 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "bytemuck", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1227,8 +2203,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.21.4" @@ -1241,6 +2234,28 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "onig" +version = "6.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc3cbf698f9438986c11a880c90a6d04b9de27575afd28bbf45b154b6c709e2" +dependencies = [ + "bitflags", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e68317604e77e53b85896388e1a803c1d21b74c899ec9e5e1112db90735edd7" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "openssl" version = "0.10.77" @@ -1285,6 +2300,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "parking_lot" version = "0.12.5" @@ -1308,6 +2329,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pear" version = "0.2.9" @@ -1405,6 +2432,43 @@ dependencies = [ "yansi", ] +[[package]] +name = "pulp" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907" +dependencies = [ + "bytemuck", + "cfg-if", + "libm", + "num-complex", + "reborrow", + "version_check", +] + +[[package]] +name = "pulp" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632" +dependencies = [ + "bytemuck", + "cfg-if", + "libm", + "num-complex", + "paste", + "pulp-wasm-simd-flag", + "raw-cpuid", + "reborrow", + "version_check", +] + +[[package]] +name = "pulp-wasm-simd-flag" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" + [[package]] name = "quanta" version = "0.12.6" @@ -1470,6 +2534,16 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "rand_xoshiro" version = "0.7.0" @@ -1488,6 +2562,43 @@ dependencies = [ "bitflags", ] +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f" +dependencies = [ + "either", + "itertools", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "reborrow" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1497,6 +2608,29 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 2.0.18", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.14" @@ -1520,7 +2654,7 @@ version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-core", @@ -1591,7 +2725,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" dependencies = [ "aws-lc-rs", + "log", "once_cell", + "ring", "rustls-pki-types", "rustls-webpki", "subtle", @@ -1643,6 +2779,36 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "safetensors" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "safetensors" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5" +dependencies = [ + "hashbrown 0.16.1", + "serde", + "serde_json", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.29" @@ -1687,6 +2853,12 @@ version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.228" @@ -1741,6 +2913,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_plain" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50" +dependencies = [ + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -1762,6 +2943,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1787,6 +2979,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "sketches-ddsketch" version = "0.3.1" @@ -1815,12 +3013,41 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -1864,6 +3091,20 @@ dependencies = [ "syn", ] +[[package]] +name = "sysctl" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" +dependencies = [ + "bitflags", + "byteorder", + "enum-as-inner", + "libc", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "system-configuration" version = "0.7.0" @@ -1957,6 +3198,39 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tokenizers" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223" +dependencies = [ + "ahash", + "aho-corasick", + "compact_str", + "dary_heap", + "derive_builder", + "esaxx-rs", + "getrandom 0.3.4", + "itertools", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand", + "rayon", + "rayon-cond", + "regex", + "regex-syntax", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 2.0.18", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.51.1" @@ -2199,6 +3473,52 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typed-path" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" + +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "ug" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76b761acf8af3494640d826a8609e2265e19778fb43306c7f15379c78c9b05b0" +dependencies = [ + "gemm 0.18.2", + "half", + "libloading 0.8.9", + "memmap2", + "num", + "num-traits", + "num_cpus", + "rayon", + "safetensors 0.4.5", + "serde", + "thiserror 1.0.69", + "tracing", + "yoke 0.7.5", +] + +[[package]] +name = "ug-cuda" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0a1fa748f26166778c33b8498255ebb7c6bffb472bcc0a72839e07ebb1d9b5" +dependencies = [ + "cudarc 0.17.8", + "half", + "serde", + "thiserror 1.0.69", + "ug", +] + [[package]] name = "uncased" version = "0.9.10" @@ -2214,18 +3534,65 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "untrusted" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "native-tls", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.8" @@ -2274,6 +3641,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2419,6 +3796,46 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "which" +version = "7.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d643ce3fd3e5b54854602a080f34fb10ab75e0b813ee32d00ca2b44fa74762" +dependencies = [ + "either", + "env_home", + "rustix", + "winsafe", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2435,6 +3852,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -2517,7 +3943,25 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", ] [[package]] @@ -2535,14 +3979,31 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -2551,48 +4012,96 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.7.15" @@ -2602,6 +4111,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -2702,6 +4217,18 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive 0.7.5", + "zerofrom", +] + [[package]] name = "yoke" version = "0.8.2" @@ -2709,10 +4236,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", - "yoke-derive", + "yoke-derive 0.8.2", "zerofrom", ] +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "yoke-derive" version = "0.8.2" @@ -2779,7 +4318,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", - "yoke", + "yoke 0.8.2", "zerofrom", ] @@ -2789,7 +4328,7 @@ version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ - "yoke", + "yoke 0.8.2", "zerofrom", "zerovec-derive", ] @@ -2805,6 +4344,18 @@ dependencies = [ "syn", ] +[[package]] +name = "zip" +version = "7.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42e33efc22a0650c311c2ef19115ce232583abbe80850bc8b66509ebef02de0" +dependencies = [ + "crc32fast", + "indexmap", + "memchr", + "typed-path", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/crates/neuron/Cargo.toml b/crates/neuron/Cargo.toml index 284a936..c2c6145 100644 --- a/crates/neuron/Cargo.toml +++ b/crates/neuron/Cargo.toml @@ -12,6 +12,18 @@ path = "src/lib.rs" name = "neuron" path = "src/main.rs" +[features] +default = [] +# Enables CUDA acceleration in candle. Without this feature, candle +# compiles for CPU only and Device::new_cuda calls fall back to CPU. +cuda = [ + "candle-core/cuda", + "candle-nn/cuda", + "candle-transformers/cuda", +] +# Reserved for GPU-only integration tests in later stages. +cuda-integration = ["cuda"] + [dependencies] cortex-core.workspace = true tokio.workspace = true @@ -27,6 +39,15 @@ clap.workspace = true figment.workspace = true toml.workspace = true +# candle for in-process inference. CUDA support is gated behind the +# crate's `cuda` feature (default off) so the workspace builds on +# non-CUDA hosts and CI runners. +candle-core = "0.10.2" +candle-nn = "0.10.2" +candle-transformers = "0.10.2" +tokenizers = { version = "0.22", default-features = false, features = ["onig"] } +hf-hub = { version = "0.4", features = ["tokio"] } + [dev-dependencies] tokio = { workspace = true, features = ["test-util"] } reqwest.workspace = true diff --git a/crates/neuron/src/config.rs b/crates/neuron/src/config.rs index 36177d9..e9b2250 100644 --- a/crates/neuron/src/config.rs +++ b/crates/neuron/src/config.rs @@ -6,7 +6,7 @@ use figment::{ providers::{Env, Format, Toml}, }; use serde::{Deserialize, Serialize}; -use std::path::Path; +use std::path::{Path, PathBuf}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NeuronConfig { @@ -14,6 +14,25 @@ pub struct NeuronConfig { pub port: u16, #[serde(default)] pub harnesses: Vec, + /// Per-harness configuration. Currently only `candle` is recognised. + #[serde(default)] + pub harness: HarnessSettings, +} + +/// Settings for individual harness implementations. Each harness owns +/// its own sub-table so users only configure the harnesses they enable. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct HarnessSettings { + #[serde(default)] + pub candle: CandleHarnessConfig, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct CandleHarnessConfig { + /// HuggingFace cache directory for model weights. + /// When unset, defers to hf-hub's default (~/.cache/huggingface). + #[serde(default)] + pub hf_cache: Option, } fn default_port() -> u16 { @@ -35,6 +54,7 @@ impl Default for NeuronConfig { Self { port: 13131, harnesses: vec![], + harness: HarnessSettings::default(), } } } diff --git a/crates/neuron/src/harness/candle.rs b/crates/neuron/src/harness/candle.rs index c872fdf..72b188a 100644 --- a/crates/neuron/src/harness/candle.rs +++ b/crates/neuron/src/harness/candle.rs @@ -1,24 +1,121 @@ //! Candle harness — in-process inference using huggingface/candle. //! -//! This is the sole `Harness` implementation. Unlike the previous -//! mistralrs/llamacpp harnesses, candle inference runs inside the neuron -//! process itself — no external subprocess, no systemd indirection. -//! -//! Stage 1 ships this as an inert skeleton; Stage 2 wires up actual -//! model load/unload via `candle-transformers`. +//! This is the sole `Harness` implementation. Inference runs inside +//! the neuron process; there is no external subprocess. Stage 2 wires +//! up GGUF (currently Qwen3 only) model load/unload via +//! `candle-transformers::models::quantized_qwen3`. Stage 3 adds the +//! inference endpoint. -use anyhow::Result; +use anyhow::{Context, Result}; use async_trait::async_trait; +use candle_core::Device; +use candle_core::quantized::gguf_file; +use candle_transformers::models::quantized_qwen3::ModelWeights as QuantizedQwen3Weights; use cortex_core::harness::{Harness, HarnessHealth, ModelInfo, ModelSpec}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use tokenizers::Tokenizer; +use tokio::sync::{Mutex, RwLock}; +/// In-process candle harness. Owns the loaded model registry. pub struct CandleHarness { - /// URL where this neuron serves inference (its own bind address). + models: Arc>>>, + hf_cache: Option, bind_url: String, } +/// A loaded model with its tokenizer, device placement, and architecture- +/// specific weights. The `arch` field is mutexed because future inference +/// calls take `&mut self` on the underlying ModelWeights (KV cache state). +pub struct LoadedModel { + pub model_id: String, + pub arch: Mutex, + pub tokenizer: Tokenizer, + pub device: Device, + pub quant: Option, + pub devices: Vec, +} + +/// Architecture-specific weights. Stage 2 supports only Qwen3 quantized; +/// Stage 8 broadens this to additional families and non-quantized variants. +pub enum ModelArch { + Qwen3Quantized(QuantizedQwen3Weights), +} + impl CandleHarness { - pub fn new(bind_url: String) -> Self { - Self { bind_url } + pub fn new(bind_url: String, hf_cache: Option) -> Self { + Self { + models: Arc::new(RwLock::new(HashMap::new())), + hf_cache, + bind_url, + } + } + + /// Pick a candle `Device` for the requested indices. Without the + /// `cuda` feature, or if CUDA initialisation fails, falls back to CPU. + fn pick_device(devices: &[u32]) -> Result { + let _idx = devices.first().copied().unwrap_or(0) as usize; + #[cfg(feature = "cuda")] + { + match Device::new_cuda(_idx) { + Ok(d) => return Ok(d), + Err(e) => tracing::warn!( + device = _idx, + error = %e, + "CUDA device unavailable, falling back to CPU" + ), + } + } + Ok(Device::Cpu) + } + + /// Resolve a model spec to local GGUF and tokenizer file paths via + /// hf-hub. Downloads on first use; subsequent calls are cached. + async fn resolve_files(&self, spec: &ModelSpec) -> Result<(PathBuf, PathBuf)> { + let mut builder = hf_hub::api::tokio::ApiBuilder::new(); + if let Some(cache) = &self.hf_cache { + builder = builder.with_cache_dir(cache.clone()); + } + let api = builder.build().context("build hf-hub API")?; + let repo = api.model(spec.model_id.clone()); + + let info = repo + .info() + .await + .with_context(|| format!("fetch HF repo info for {}", spec.model_id))?; + + let quant = spec.quant.as_deref().unwrap_or(""); + let quant_lc = quant.to_lowercase(); + let gguf_filename = info + .siblings + .iter() + .map(|s| s.rfilename.as_str()) + .filter(|name| name.to_lowercase().ends_with(".gguf")) + .find(|name| quant_lc.is_empty() || name.to_lowercase().contains(&quant_lc)) + .ok_or_else(|| { + anyhow::anyhow!( + "no GGUF file matching quant {:?} in repo {}", + spec.quant, + spec.model_id + ) + })? + .to_string(); + + tracing::info!( + model = %spec.model_id, + file = %gguf_filename, + "resolving GGUF (may be cached)" + ); + let gguf_path = repo + .get(&gguf_filename) + .await + .with_context(|| format!("fetch GGUF {gguf_filename}"))?; + let tokenizer_path = repo + .get("tokenizer.json") + .await + .context("fetch tokenizer.json")?; + Ok((gguf_path, tokenizer_path)) } } @@ -37,18 +134,98 @@ impl Harness for CandleHarness { } async fn list_models(&self) -> Result> { - Ok(Vec::new()) + let models = self.models.read().await; + Ok(models + .values() + .map(|m| ModelInfo { + id: m.model_id.clone(), + harness: "candle".into(), + status: "loaded".into(), + devices: m.devices.clone(), + vram_used_mb: None, + }) + .collect()) } - async fn load_model(&self, _spec: &ModelSpec) -> Result<()> { - anyhow::bail!("candle harness load_model not implemented yet (Stage 2)") + async fn load_model(&self, spec: &ModelSpec) -> Result<()> { + if spec.harness != "candle" { + anyhow::bail!("expected harness=candle, got harness={}", spec.harness); + } + + { + let models = self.models.read().await; + if models.contains_key(&spec.model_id) { + anyhow::bail!("model '{}' already loaded", spec.model_id); + } + } + + let devices = spec.devices.clone().unwrap_or_else(|| vec![0]); + let device = Self::pick_device(&devices)?; + + let (gguf_path, tokenizer_path) = self.resolve_files(spec).await?; + + let tokenizer = Tokenizer::from_file(&tokenizer_path) + .map_err(|e| anyhow::anyhow!("load tokenizer: {e}"))?; + + // File I/O + GGUF parsing + tensor materialisation are CPU-bound, + // so run them on a blocking task to avoid stalling the runtime. + let device_for_load = device.clone(); + let gguf_path_for_load = gguf_path.clone(); + let model_id_for_log = spec.model_id.clone(); + let arch = tokio::task::spawn_blocking(move || -> Result { + tracing::info!(model = %model_id_for_log, path = ?gguf_path_for_load, "loading GGUF"); + let mut file = std::fs::File::open(&gguf_path_for_load).context("open GGUF file")?; + let content = gguf_file::Content::read(&mut file) + .map_err(|e| anyhow::anyhow!("parse GGUF: {e}"))?; + + let architecture = content + .metadata + .get("general.architecture") + .and_then(|v| v.to_string().ok().cloned()) + .unwrap_or_default(); + tracing::info!(architecture = %architecture, "GGUF architecture"); + + match architecture.as_str() { + "qwen3" => { + let weights = + QuantizedQwen3Weights::from_gguf(content, &mut file, &device_for_load) + .map_err(|e| anyhow::anyhow!("from_gguf qwen3: {e}"))?; + Ok(ModelArch::Qwen3Quantized(weights)) + } + other => anyhow::bail!( + "unsupported GGUF architecture '{other}'; Stage 2 only supports qwen3" + ), + } + }) + .await + .context("blocking load task panicked")??; + + let loaded = Arc::new(LoadedModel { + model_id: spec.model_id.clone(), + arch: Mutex::new(arch), + tokenizer, + device, + quant: spec.quant.clone(), + devices, + }); + + let mut models = self.models.write().await; + models.insert(spec.model_id.clone(), loaded); + tracing::info!(model = %spec.model_id, "model loaded"); + Ok(()) } - async fn unload_model(&self, _model_id: &str) -> Result<()> { - anyhow::bail!("candle harness unload_model not implemented yet (Stage 2)") + async fn unload_model(&self, model_id: &str) -> Result<()> { + let mut models = self.models.write().await; + if models.remove(model_id).is_none() { + anyhow::bail!("model '{model_id}' not loaded"); + } + tracing::info!(model = %model_id, "model unloaded"); + Ok(()) } - async fn inference_endpoint(&self, _model_id: &str) -> Option { - Some(self.bind_url.clone()) + async fn inference_endpoint(&self, model_id: &str) -> Option { + let models = self.models.read().await; + models.contains_key(model_id).then(|| self.bind_url.clone()) } } diff --git a/crates/neuron/src/harness/mod.rs b/crates/neuron/src/harness/mod.rs index b8746fb..229d748 100644 --- a/crates/neuron/src/harness/mod.rs +++ b/crates/neuron/src/harness/mod.rs @@ -84,12 +84,19 @@ impl HarnessRegistry { /// `bind_url` is the URL where this neuron serves inference (its own /// listen address). In-process harnesses (currently the only kind) /// return this URL from `inference_endpoint`. - pub fn from_configs(configs: &[HarnessConfig], bind_url: &str) -> Self { + pub fn from_configs( + configs: &[HarnessConfig], + bind_url: &str, + settings: &crate::config::HarnessSettings, + ) -> Self { let mut registry = Self::new(); for config in configs { match config.name.as_str() { "candle" => { - registry.register(Box::new(candle::CandleHarness::new(bind_url.to_string()))); + registry.register(Box::new(candle::CandleHarness::new( + bind_url.to_string(), + settings.candle.hf_cache.clone(), + ))); } other => { tracing::warn!(harness = other, "unknown harness type, skipping"); diff --git a/crates/neuron/src/main.rs b/crates/neuron/src/main.rs index ef1d13e..00b3802 100644 --- a/crates/neuron/src/main.rs +++ b/crates/neuron/src/main.rs @@ -51,7 +51,7 @@ async fn main() -> Result<()> { // Build harness registry from config. In-process harnesses (candle) // need to know neuron's own bind URL so they can return it from // inference_endpoint. - let registry = HarnessRegistry::from_configs(&cfg.harnesses, &bind_url); + let registry = HarnessRegistry::from_configs(&cfg.harnesses, &bind_url, &cfg.harness); discovery_result.harnesses = registry.names(); let health_cache = Arc::new(health::HealthCache::new()); diff --git a/crates/neuron/tests/api.rs b/crates/neuron/tests/api.rs index f6448ae..c7f8e73 100644 --- a/crates/neuron/tests/api.rs +++ b/crates/neuron/tests/api.rs @@ -135,17 +135,21 @@ async fn test_models_empty_registry() { assert!(body.as_array().unwrap().is_empty()); } -/// Verify the candle harness registers and the load endpoint returns a -/// "not implemented" error in Stage 1 (Stage 2 wires up actual loading). +/// Verify the candle harness registers, list is empty by default, and a +/// load attempt for an obviously-bogus model id returns a 4xx error +/// without crashing the daemon. Real load/unload exercising actual GGUF +/// download is covered by `tests/candle_lifecycle.rs` (cuda-integration). #[tokio::test] -async fn test_candle_harness_registers_but_load_unimplemented() { +async fn test_candle_harness_registers_and_rejects_bogus_model() { use cortex_core::harness::HarnessConfig; + use neuron::config::HarnessSettings; let registry = HarnessRegistry::from_configs( &[HarnessConfig { name: "candle".into(), }], "http://localhost:13131", + &HarnessSettings::default(), ); let health_cache = Arc::new(HealthCache::new()); @@ -165,7 +169,6 @@ async fn test_candle_harness_registers_but_load_unimplemented() { let client = reqwest::Client::new(); - // GET /models — candle harness has no models loaded yet. let resp = client .get(format!("{neuron_url}/models")) .send() @@ -175,12 +178,22 @@ async fn test_candle_harness_registers_but_load_unimplemented() { let models: Vec = resp.json().await.unwrap(); assert!(models.is_empty()); - // POST /models/load — Stage 1 skeleton returns an error. + // Sending a wrong-harness spec should be rejected synchronously + // without touching the network or the model registry. let resp = client .post(format!("{neuron_url}/models/load")) - .json(&json!({"model_id": "some-model", "harness": "candle"})) + .json(&json!({"model_id": "definitely/not-real", "harness": "not-candle"})) .send() .await .unwrap(); assert_eq!(resp.status(), 400); + + // Registry still empty. + let resp = client + .get(format!("{neuron_url}/models")) + .send() + .await + .unwrap(); + let models: Vec = resp.json().await.unwrap(); + assert!(models.is_empty()); } diff --git a/crates/neuron/tests/candle_lifecycle.rs b/crates/neuron/tests/candle_lifecycle.rs new file mode 100644 index 0000000..312e45f --- /dev/null +++ b/crates/neuron/tests/candle_lifecycle.rs @@ -0,0 +1,90 @@ +//! Real model load/unload lifecycle through the candle harness. +//! +//! Gated behind the `cuda-integration` feature because it downloads a +//! real (small) GGUF from HuggingFace and materialises tensors on the +//! configured device. Run on a host with network access and either a +//! CUDA GPU (when built with `--features cuda`) or enough CPU RAM to +//! hold the model. +//! +//! Usage: +//! cargo test -p neuron --features cuda-integration --test candle_lifecycle +//! +//! Optional environment variables: +//! NEURON_TEST_MODEL_ID — HuggingFace repo to load (default: a small +//! public Qwen3 GGUF repo). +//! NEURON_TEST_QUANT — quant substring matched against GGUF +//! filenames (default: "Q4_K_M"). +//! HF_HOME — HuggingFace cache directory. + +#![cfg(feature = "cuda-integration")] + +use cortex_core::harness::{HarnessConfig, ModelSpec}; +use neuron::config::HarnessSettings; +use neuron::harness::HarnessRegistry; +use std::path::PathBuf; + +#[tokio::test] +async fn test_candle_qwen3_load_unload_lifecycle() { + let _ = tracing_subscriber::fmt() + .with_test_writer() + .with_env_filter("info,neuron=debug") + .try_init(); + + let model_id = std::env::var("NEURON_TEST_MODEL_ID") + .unwrap_or_else(|_| "Qwen/Qwen3-0.6B-GGUF".to_string()); + let quant = std::env::var("NEURON_TEST_QUANT").unwrap_or_else(|_| "Q4_K_M".to_string()); + + let mut settings = HarnessSettings::default(); + if let Ok(home) = std::env::var("HF_HOME") { + settings.candle.hf_cache = Some(PathBuf::from(home)); + } + + let registry = HarnessRegistry::from_configs( + &[HarnessConfig { + name: "candle".into(), + }], + "http://localhost:13131", + &settings, + ); + + let spec = ModelSpec { + model_id: model_id.clone(), + harness: "candle".into(), + quant: Some(quant), + tensor_parallel: None, + devices: Some(vec![0]), + }; + + registry + .load_model(&spec) + .await + .expect("load_model should succeed"); + + let models = registry + .list_all_models() + .await + .expect("list_all_models"); + assert_eq!(models.len(), 1, "expected exactly one loaded model"); + assert_eq!(models[0].id, model_id); + assert_eq!(models[0].harness, "candle"); + assert_eq!(models[0].status, "loaded"); + + let url = registry.inference_endpoint(&model_id).await; + assert_eq!(url, Some("http://localhost:13131".into())); + + // Re-loading the same model should be rejected. + let again = registry.load_model(&spec).await; + assert!(again.is_err(), "second load should error"); + + registry + .unload_model(&model_id) + .await + .expect("unload_model should succeed"); + + let models = registry.list_all_models().await.expect("list_all_models"); + assert!(models.is_empty(), "registry should be empty after unload"); + + // Unloading a model that isn't loaded should error. + let err = registry.unload_model(&model_id).await; + assert!(err.is_err(), "unload of missing model should error"); +} diff --git a/neuron.example.toml b/neuron.example.toml index b728b3d..8a71372 100644 --- a/neuron.example.toml +++ b/neuron.example.toml @@ -8,9 +8,17 @@ port = 13131 # -- Harnesses --------------------------------------------------------------- -# Each [[harnesses]] entry declares an inference engine. Currently only +# Each [[harnesses]] entry enables an inference engine. Currently only # "candle" is supported — it runs in-process and uses huggingface/candle -# for inference on local CUDA devices. +# for inference on local CUDA devices (or CPU when CUDA is unavailable). [[harnesses]] name = "candle" + +# -- Candle harness settings ------------------------------------------------- +# Optional tuning for the candle harness. + +[harness.candle] +# HuggingFace cache directory for model weights. When unset, hf-hub's +# default (~/.cache/huggingface) is used. +# hf_cache = "/var/lib/neuron/hf-cache"