ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
MemoryManagerCUDA.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <cuda.h>
9 #include <cuda_runtime.h>
10 
13 
14 namespace cloudViewer {
15 namespace core {
16 
17 void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) {
18  CUDAScopedDevice scoped_device(device);
19 
20  void* ptr;
21  if (device.IsCUDA()) {
22 #if CUDART_VERSION >= 11020
23  if (cuda::SupportsMemoryPools(device)) {
24  CLOUDVIEWER_CUDA_CHECK(cudaMallocAsync(
25  static_cast<void**>(&ptr), byte_size, cuda::GetStream()));
26  } else {
28  cudaMalloc(static_cast<void**>(&ptr), byte_size));
29  }
30 #else
32  cudaMalloc(static_cast<void**>(&ptr), byte_size));
33 #endif
34  } else {
35  utility::LogError("Internal error: Unimplemented device {}.",
36  device.ToString());
37  }
38  return ptr;
39 }
40 
41 void MemoryManagerCUDA::Free(void* ptr, const Device& device) {
42  CUDAScopedDevice scoped_device(device);
43 
44  if (device.IsCUDA()) {
45  if (ptr && IsCUDAPointer(ptr, device)) {
46 #if CUDART_VERSION >= 11020
47  if (cuda::SupportsMemoryPools(device)) {
48  CLOUDVIEWER_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
49  } else {
50  CLOUDVIEWER_CUDA_CHECK(cudaFree(ptr));
51  }
52 #else
53  CLOUDVIEWER_CUDA_CHECK(cudaFree(ptr));
54 #endif
55  }
56  } else {
57  utility::LogError("Internal error: Unimplemented device {}.",
58  device.ToString());
59  }
60 }
61 
62 void MemoryManagerCUDA::Memcpy(void* dst_ptr,
63  const Device& dst_device,
64  const void* src_ptr,
65  const Device& src_device,
66  size_t num_bytes) {
67  if (dst_device.IsCUDA() && src_device.IsCPU()) {
68  if (!IsCUDAPointer(dst_ptr, dst_device)) {
69  utility::LogError("dst_ptr is not a CUDA pointer.");
70  }
71  CUDAScopedDevice scoped_device(dst_device);
72  CLOUDVIEWER_CUDA_CHECK(cudaMemcpyAsync(dst_ptr, src_ptr, num_bytes,
73  cudaMemcpyHostToDevice,
74  cuda::GetStream()));
75  } else if (dst_device.IsCPU() && src_device.IsCUDA()) {
76  if (!IsCUDAPointer(src_ptr, src_device)) {
77  utility::LogError("src_ptr is not a CUDA pointer.");
78  }
79  CUDAScopedDevice scoped_device(src_device);
80  CLOUDVIEWER_CUDA_CHECK(cudaMemcpyAsync(dst_ptr, src_ptr, num_bytes,
81  cudaMemcpyDeviceToHost,
82  cuda::GetStream()));
83  } else if (dst_device.IsCUDA() && src_device.IsCUDA()) {
84  if (!IsCUDAPointer(dst_ptr, dst_device)) {
85  utility::LogError("dst_ptr is not a CUDA pointer.");
86  }
87  if (!IsCUDAPointer(src_ptr, src_device)) {
88  utility::LogError("src_ptr is not a CUDA pointer.");
89  }
90 
91  if (dst_device == src_device) {
92  CUDAScopedDevice scoped_device(src_device);
93  CLOUDVIEWER_CUDA_CHECK(cudaMemcpyAsync(dst_ptr, src_ptr, num_bytes,
94  cudaMemcpyDeviceToDevice,
95  cuda::GetStream()));
96  } else if (CUDAState::GetInstance().IsP2PEnabled(src_device.GetID(),
97  dst_device.GetID())) {
98  CLOUDVIEWER_CUDA_CHECK(cudaMemcpyPeerAsync(
99  dst_ptr, dst_device.GetID(), src_ptr, src_device.GetID(),
100  num_bytes, cuda::GetStream()));
101  } else {
102  void* cpu_buf = MemoryManager::Malloc(num_bytes, Device("CPU:0"));
103  {
104  CUDAScopedDevice scoped_device(src_device);
105  CLOUDVIEWER_CUDA_CHECK(cudaMemcpyAsync(
106  cpu_buf, src_ptr, num_bytes, cudaMemcpyDeviceToHost,
107  cuda::GetStream()));
108  }
109  {
110  CUDAScopedDevice scoped_device(dst_device);
111  CLOUDVIEWER_CUDA_CHECK(cudaMemcpyAsync(
112  dst_ptr, cpu_buf, num_bytes, cudaMemcpyHostToDevice,
113  cuda::GetStream()));
114  }
115  MemoryManager::Free(cpu_buf, Device("CPU:0"));
116  }
117  } else {
118  utility::LogError("Wrong cudaMemcpyKind.");
119  }
120 }
121 
122 bool MemoryManagerCUDA::IsCUDAPointer(const void* ptr, const Device& device) {
123  CUDAScopedDevice scoped_device(device);
124 
125  cudaPointerAttributes attributes;
126  cudaPointerGetAttributes(&attributes, ptr);
127  return attributes.devicePointer != nullptr ? true : false;
128 }
129 
130 } // namespace core
131 } // namespace cloudViewer
Common CUDA utilities.
#define CLOUDVIEWER_CUDA_CHECK(err)
Definition: CUDAUtils.h:47
static void * Malloc(size_t byte_size, const Device &device)
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
#define LogError(...)
Definition: Logging.h:60
ccGuiPythonInstance * GetInstance() noexcept
Definition: Runtime.cpp:72
bool SupportsMemoryPools(const Device &device)
Definition: CUDAUtils.cpp:112
void Free(benchmark::State &state, int size, const Device &device, const MemoryManagerBackend &backend)
void Malloc(benchmark::State &state, int size, const Device &device, const MemoryManagerBackend &backend)
Generic file read and write utility for python interface.