diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pyx b/cuda_core/cuda/core/_memory/_memory_pool.pyx index 1e9f5116c1..c3dc4114db 100644 --- a/cuda_core/cuda/core/_memory/_memory_pool.pyx +++ b/cuda_core/cuda/core/_memory/_memory_pool.pyx @@ -348,8 +348,8 @@ cdef int _MP_init_current(_MemPool self, int dev_id, _MemPoolOptions opts) excep elif opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED \ and opts._location == cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA: IF CUDA_CORE_BUILD_MAJOR >= 13: - assert dev_id == 0 - loc.id = 0 + assert dev_id >= 0 + loc.id = dev_id loc.type = opts._location with nogil: HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, opts._type)) diff --git a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx index b2a9db4594..3d1e916c95 100644 --- a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx @@ -76,8 +76,8 @@ def _check_numa_nodes(): if numa_count is not None and numa_count > 1: warnings.warn( f"System has {numa_count} NUMA nodes. IPC-enabled pinned memory " - f"uses location ID 0, which may not work correctly with multiple " - f"NUMA nodes.", + f"targets the host NUMA node closest to the current device; " + f"this may not work correctly with multiple NUMA nodes.", UserWarning, stacklevel=3 ) @@ -85,6 +85,38 @@ def _check_numa_nodes(): _numa_warning_shown = True +def _host_numa_id_for_current_device() -> int: + """Return host NUMA node closest to current device (fallback to 0).""" + cdef cydriver.CUdevice dev + cdef cydriver.CUresult err + cdef int host_numa_id + + with nogil: + err = cydriver.cuCtxGetDevice(&dev) + if err in ( + cydriver.CUresult.CUDA_ERROR_INVALID_CONTEXT, + cydriver.CUresult.CUDA_ERROR_NOT_INITIALIZED, + ): + return 0 + HANDLE_RETURN(err) + + with nogil: + err = cydriver.cuDeviceGetAttribute( + &host_numa_id, + cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID, + dev + ) + if err in ( + cydriver.CUresult.CUDA_ERROR_INVALID_VALUE, + cydriver.CUresult.CUDA_ERROR_NOT_SUPPORTED, + ): + return 0 + HANDLE_RETURN(err) + if host_numa_id < 0: + return 0 + return host_numa_id + + __all__ = ['PinnedMemoryResource', 'PinnedMemoryResourceOptions'] @@ -134,10 +166,10 @@ cdef class PinnedMemoryResource(_MemPool): allocations between processes, specify ``ipc_enabled=True`` in the initializer option. When IPC is enabled, the location type is automatically set to CU_MEM_LOCATION_TYPE_HOST_NUMA instead of CU_MEM_LOCATION_TYPE_HOST, - with location ID 0. + using the host NUMA node closest to the current device. - Note: IPC support for pinned memory requires a single NUMA node. A warning - is issued if multiple NUMA nodes are detected. + Note: IPC support for pinned memory can be sensitive to NUMA placement. A + warning is issued if multiple NUMA nodes are detected. See :class:`DeviceMemoryResource` for more details on IPC usage patterns. """ @@ -150,6 +182,7 @@ cdef class PinnedMemoryResource(_MemPool): cdef _MemPoolOptions opts_base = _MemPoolOptions() cdef bint ipc_enabled = False + cdef int location_id = -1 if opts: ipc_enabled = opts.ipc_enabled if ipc_enabled and not _ipc.is_supported(): @@ -157,6 +190,7 @@ cdef class PinnedMemoryResource(_MemPool): if ipc_enabled: # Check for multiple NUMA nodes on Linux _check_numa_nodes() + location_id = _host_numa_id_for_current_device() opts_base._max_size = opts.max_size opts_base._use_current = False opts_base._ipc_enabled = ipc_enabled @@ -166,7 +200,7 @@ cdef class PinnedMemoryResource(_MemPool): opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED - super().__init__(0 if ipc_enabled else -1, opts_base) + super().__init__(location_id if ipc_enabled else -1, opts_base) def __reduce__(self): return PinnedMemoryResource.from_registry, (self.uuid,) diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index 9a88f5f483..60bc90bbdd 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -978,7 +978,7 @@ def test_mempool_ipc_errors(mempool_device): def test_pinned_mempool_ipc_basic(): """Test basic IPC functionality for PinnedMemoryResource.""" - device = Device() + device = Device(0) device.set_current() skip_if_pinned_memory_unsupported(device) @@ -995,7 +995,11 @@ def test_pinned_mempool_ipc_basic(): assert mr.is_ipc_enabled assert mr.is_device_accessible assert mr.is_host_accessible - assert mr.device_id == 0 # IPC-enabled uses location id 0 + # For IPC-enabled pinned pools, device_id reflects the host NUMA location ID. + expected_numa_id = device.properties.host_numa_id + if expected_numa_id < 0: + expected_numa_id = 0 + assert mr.device_id == expected_numa_id # Test allocation handle export alloc_handle = mr.get_allocation_handle()