From 8e882c07570ec715c707b196b9aaf7be41b85229 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Tue, 19 May 2026 17:24:13 +0300 Subject: [PATCH] fix(neuron/tp): NcclError {e:?} + cudarc 0.19 deprecation cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cuda-feature-only build errors only the CI runner catches: 1. cudarc::nccl::NcclError doesn't impl Display in 0.19.x, so the `format!("...: {e}")` map_err calls fail to compile when the cuda feature actually wires them up. Switch every NcclError-typed `{e}` in nccl_state.rs to `{e:?}` — surfaces variant + ncclResult code in the same diagnostic shape just via Debug instead of Display. 2. cudarc::CudaStream::memcpy_stod / memcpy_dtov are deprecated in 0.19.7 in favour of clone_htod / clone_dtoh. The replacements take/return the same types, so the swap is mechanical. Dev box can't compile with --features cuda (no nvcc), so these only surface in the build-prerelease CUDA matrix jobs. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/neuron/src/harness/tp/nccl_state.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/neuron/src/harness/tp/nccl_state.rs b/crates/neuron/src/harness/tp/nccl_state.rs index 9f57fa9..7638599 100644 --- a/crates/neuron/src/harness/tp/nccl_state.rs +++ b/crates/neuron/src/harness/tp/nccl_state.rs @@ -118,7 +118,9 @@ mod cuda_impl { /// the leader to mint the shared communicator id which is then /// broadcast to every worker via the RPC `Init` message. pub fn generate_comm_id_hex() -> Result { - let id = Id::new().map_err(|e| format!("Id::new(): {e}"))?; + // NcclError lacks a Display impl in cudarc 0.19.x — surface + // via Debug throughout this module. + let id = Id::new().map_err(|e| format!("Id::new(): {e:?}"))?; let bytes_u8: [u8; NCCL_ID_BYTES] = std::array::from_fn(|i| id.internal()[i] as u8); Ok(encode_hex(&bytes_u8)) } @@ -169,7 +171,7 @@ mod cuda_impl { let comm = Comm::from_rank(stream, cfg.rank as usize, cfg.world_size as usize, id) .map_err(|e| { format!( - "Comm::from_rank(rank={}, world={}) failed: {e}", + "Comm::from_rank(rank={}, world={}) failed: {e:?}", cfg.rank, cfg.world_size ) })?; @@ -182,15 +184,18 @@ mod cuda_impl { fn try_sanity_check(comm: &Comm) -> Result { let stream = comm.stream().clone(); let input = stream - .memcpy_stod(&[1u32]) + .clone_htod(&[1u32]) .map_err(|e| format!("htod sentinel: {e}"))?; let mut output = stream .alloc_zeros::(1) .map_err(|e| format!("alloc output: {e}"))?; + // cudarc::nccl::NcclError doesn't impl Display in 0.19.x — + // surface via Debug so we still see the variant + ncclResult + // code instead of a generic "{e}" failure. comm.all_reduce(&input, &mut output, &ReduceOp::Sum) - .map_err(|e| format!("all_reduce: {e}"))?; + .map_err(|e| format!("all_reduce: {e:?}"))?; let result = stream - .memcpy_dtov(&output) + .clone_dtoh(&output) .map_err(|e| format!("dtoh result: {e}"))?; Ok(result[0]) }