[package] name = "neuron" version.workspace = true edition.workspace = true license.workspace = true [lib] name = "neuron" path = "src/lib.rs" [[bin]] name = "neuron" path = "src/main.rs" [features] default = [] # Enables CUDA acceleration in candle. Without this feature, candle # compiles for CPU only and Device::new_cuda calls fall back to CPU. cuda = [ "candle-core/cuda", "candle-nn/cuda", "candle-transformers/cuda", ] # Use cuDNN for convolution / attention kernels. Requires CUDA. cudnn = [ "cuda", "candle-core/cudnn", "candle-nn/cudnn", "candle-transformers/cudnn", ] # FlashAttention kernels. Requires CUDA. flash-attn = [ "cuda", "candle-transformers/flash-attn", ] # Reserved for GPU-only integration tests in later stages. cuda-integration = ["cuda"] [dependencies] cortex-core.workspace = true tokio.workspace = true axum.workspace = true serde.workspace = true serde_json.workspace = true reqwest.workspace = true tracing.workspace = true tracing-subscriber.workspace = true anyhow.workspace = true async-trait.workspace = true clap.workspace = true thiserror.workspace = true futures.workspace = true tokio-stream.workspace = true figment.workspace = true toml.workspace = true # candle for in-process inference. CUDA support is gated behind the # crate's `cuda` feature (default off) so the workspace builds on # non-CUDA hosts and CI runners. candle-core = "0.10.2" candle-nn = "0.10.2" candle-transformers = "0.10.2" tokenizers = { version = "0.22", default-features = false, features = ["onig"] } hf-hub = { version = "0.4", features = ["tokio"] } [dev-dependencies] tokio = { workspace = true, features = ["test-util"] } reqwest.workspace = true