diff --git a/Cargo.lock b/Cargo.lock index e6cd5a4..4690de1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -905,8 +905,7 @@ dependencies = [ [[package]] name = "cudarc" version = "0.19.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cea5f10a99e025c1b44ae2354c2d8326b25ddbd0baf76bde8e55cfd4018a2cc" +source = "git+https://github.com/grenade/cudarc?rev=4dff0be72d8a685d6691a6a53d4c95e1fe932277#4dff0be72d8a685d6691a6a53d4c95e1fe932277" dependencies = [ "float8", "half", diff --git a/Cargo.toml b/Cargo.toml index 2ad8471..b69d9d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,3 +61,12 @@ eventsource-stream = "0.2" # workspace crates cortex-core = { path = "crates/cortex-core" } cortex-gateway = { path = "crates/cortex-gateway" } + +# Patched cudarc (affects neuron's 0.19.x only; candle's 0.17.x is +# untouched since the fork is 0.19.7 and doesn't satisfy a 0.17 req). Adds +# Comm::abort / get_async_error / raw comm() — needed for #17 Stage 2 TP +# hang-recovery (abort a wedged collective from another thread, then +# rebuild the comm). Pinned to a fork revision pending upstream review +# (grenade/cudarc @ nccl-comm-abort). +[patch.crates-io] +cudarc = { git = "https://github.com/grenade/cudarc", rev = "4dff0be72d8a685d6691a6a53d4c95e1fe932277" }