ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
CUDAUtils.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <Logging.h>
11 
12 #include "cloudViewer/Macro.h"
13 
14 #ifdef BUILD_CUDA_MODULE
16 #endif
17 
18 namespace cloudViewer {
19 namespace core {
20 namespace cuda {
21 
22 int DeviceCount() {
23 #ifdef BUILD_CUDA_MODULE
24  try {
25  int num_devices;
26  CLOUDVIEWER_CUDA_CHECK(cudaGetDeviceCount(&num_devices));
27  return num_devices;
28  }
29  // This function is also used to detect CUDA support in our Python code.
30  // Thus, catch any errors if no GPU is available.
31  catch (const std::runtime_error&) {
32  return 0;
33  }
34 #else
35  return 0;
36 #endif
37 }
38 
39 bool IsAvailable() { return cuda::DeviceCount() > 0; }
40 
41 void ReleaseCache() {
42 #ifdef BUILD_CUDA_MODULE
43 #ifdef ENABLE_CACHED_CUDA_MANAGER
44  // Release cache from all devices. Since only memory from MemoryManagerCUDA
45  // is cached at the moment, this works as expected. In the future, the logic
46  // could become more fine-grained.
48 #else
50  "Built without cached CUDA memory manager, cuda::ReleaseCache() "
51  "has no effect.");
52 #endif
53 
54 #else
55  utility::LogWarning("Built without CUDA module, cuda::ReleaseCache().");
56 #endif
57 }
58 
59 void Synchronize() {
60 #ifdef BUILD_CUDA_MODULE
61  for (int i = 0; i < DeviceCount(); ++i) {
63  }
64 #endif
65 }
66 
67 void Synchronize(const Device& device) {
68 #ifdef BUILD_CUDA_MODULE
69  if (device.IsCUDA()) {
70  CUDAScopedDevice scoped_device(device);
71  CLOUDVIEWER_CUDA_CHECK(cudaDeviceSynchronize());
72  }
73 #endif
74 }
75 
76 void AssertCUDADeviceAvailable(int device_id) {
77 #ifdef BUILD_CUDA_MODULE
78  int num_devices = cuda::DeviceCount();
79  if (num_devices == 0) {
81  "Invalid device 'CUDA:{}'. -DBUILD_CUDA_MODULE=ON, but no "
82  "CUDA device available.",
83  device_id);
84  } else if (num_devices == 1 && device_id != 0) {
86  "Invalid CUDA Device 'CUDA:{}'. Device ID expected to "
87  "be 0, but got {}.",
88  device_id, device_id);
89  } else if (device_id < 0 || device_id >= num_devices) {
91  "Invalid CUDA Device 'CUDA:{}'. Device ID expected to "
92  "be between 0 to {}, but got {}.",
93  device_id, num_devices - 1, device_id);
94  }
95 #else
97  "-DBUILD_CUDA_MODULE=OFF. Please build with -DBUILD_CUDA_MODULE=ON "
98  "to use CUDA device.");
99 #endif
100 }
101 
102 void AssertCUDADeviceAvailable(const Device& device) {
103  if (device.IsCUDA()) {
105  } else {
107  "Expected device-type to be CUDA, but got device '{}'",
108  device.ToString());
109  }
110 }
111 
112 bool SupportsMemoryPools(const Device& device) {
113 #if defined(BUILD_CUDA_MODULE) && (CUDART_VERSION >= 11020)
114  if (device.IsCUDA()) {
115  int driverVersion = 0;
116  int deviceSupportsMemoryPools = 0;
117  CLOUDVIEWER_CUDA_CHECK(cudaDriverGetVersion(&driverVersion));
118  if (driverVersion >=
119  11020) { // avoid invalid value error in cudaDeviceGetAttribute
120  CLOUDVIEWER_CUDA_CHECK(cudaDeviceGetAttribute(
121  &deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported,
122  device.GetID()));
123  }
124  return !!deviceSupportsMemoryPools;
125  } else {
126  return false;
127  }
128 #else
129  return false;
130 #endif
131 }
132 
133 #ifdef BUILD_CUDA_MODULE
134 int GetDevice() {
135  int device;
136  CLOUDVIEWER_CUDA_CHECK(cudaGetDevice(&device));
137  return device;
138 }
139 
140 static void SetDevice(int device_id) {
141  AssertCUDADeviceAvailable(device_id);
142  CLOUDVIEWER_CUDA_CHECK(cudaSetDevice(device_id));
143 }
144 
145 class CUDAStream {
146 public:
147  static CUDAStream& GetInstance() {
148  // The global stream state is given per thread like CUDA's internal
149  // device state.
150  static thread_local CUDAStream instance;
151  return instance;
152  }
153 
154  cudaStream_t Get() { return stream_; }
155  void Set(cudaStream_t stream) { stream_ = stream; }
156 
157  static cudaStream_t Default() { return static_cast<cudaStream_t>(0); }
158 
159 private:
160  CUDAStream() = default;
161  CUDAStream(const CUDAStream&) = delete;
162  CUDAStream& operator=(const CUDAStream&) = delete;
163 
164  cudaStream_t stream_ = Default();
165 };
166 
167 cudaStream_t GetStream() { return CUDAStream::GetInstance().Get(); }
168 
169 static void SetStream(cudaStream_t stream) {
170  CUDAStream::GetInstance().Set(stream);
171 }
172 
173 cudaStream_t GetDefaultStream() { return CUDAStream::Default(); }
174 
175 #endif
176 
177 } // namespace cuda
178 
179 #ifdef BUILD_CUDA_MODULE
180 
182  : prev_device_id_(cuda::GetDevice()) {
183  cuda::SetDevice(device_id);
184 }
185 
186 CUDAScopedDevice::CUDAScopedDevice(const Device& device)
187  : CUDAScopedDevice(device.GetID()) {
189 }
190 
191 CUDAScopedDevice::~CUDAScopedDevice() { cuda::SetDevice(prev_device_id_); }
192 
193 constexpr CUDAScopedStream::CreateNewStreamTag
194  CUDAScopedStream::CreateNewStream;
195 
196 CUDAScopedStream::CUDAScopedStream(const CreateNewStreamTag&)
197  : prev_stream_(cuda::GetStream()), owns_new_stream_(true) {
198  CLOUDVIEWER_CUDA_CHECK(cudaStreamCreate(&new_stream_));
199  cuda::SetStream(new_stream_);
200 }
201 
202 CUDAScopedStream::CUDAScopedStream(cudaStream_t stream)
203  : prev_stream_(cuda::GetStream()),
204  new_stream_(stream),
205  owns_new_stream_(false) {
206  cuda::SetStream(stream);
207 }
208 
209 CUDAScopedStream::~CUDAScopedStream() {
210  if (owns_new_stream_) {
211  CLOUDVIEWER_CUDA_CHECK(cudaStreamDestroy(new_stream_));
212  }
213  cuda::SetStream(prev_stream_);
214 }
215 
216 CUDAState& CUDAState::GetInstance() {
217  static CUDAState instance;
218  return instance;
219 }
220 
221 bool CUDAState::IsP2PEnabled(int src_id, int tar_id) const {
224  return p2p_enabled_[src_id][tar_id];
225 }
226 
227 bool CUDAState::IsP2PEnabled(const Device& src, const Device& tar) const {
230  return p2p_enabled_[src.GetID()][tar.GetID()];
231 }
232 
233 void CUDAState::ForceDisableP2PForTesting() {
234  for (int src_id = 0; src_id < cuda::DeviceCount(); ++src_id) {
235  for (int tar_id = 0; tar_id < cuda::DeviceCount(); ++tar_id) {
236  if (src_id != tar_id && p2p_enabled_[src_id][tar_id]) {
237  p2p_enabled_[src_id][tar_id] = false;
238  }
239  }
240  }
241 }
242 
243 CUDAState::CUDAState() {
244  // Check and enable all possible peer to peer access.
245  p2p_enabled_ = std::vector<std::vector<bool>>(
246  cuda::DeviceCount(), std::vector<bool>(cuda::DeviceCount(), false));
247 
248  for (int src_id = 0; src_id < cuda::DeviceCount(); ++src_id) {
249  for (int tar_id = 0; tar_id < cuda::DeviceCount(); ++tar_id) {
250  if (src_id == tar_id) {
251  p2p_enabled_[src_id][tar_id] = true;
252  } else {
253  CUDAScopedDevice scoped_device(src_id);
254 
255  // Check access.
256  int can_access = 0;
258  cudaDeviceCanAccessPeer(&can_access, src_id, tar_id));
259  // Enable access.
260  if (can_access) {
261  p2p_enabled_[src_id][tar_id] = true;
262  cudaError_t err = cudaDeviceEnablePeerAccess(tar_id, 0);
263  if (err == cudaErrorPeerAccessAlreadyEnabled) {
264  // Ignore error since P2P is already enabled.
265  cudaGetLastError();
266  } else {
268  }
269  } else {
270  p2p_enabled_[src_id][tar_id] = false;
271  }
272  }
273  }
274  }
275 }
276 
277 int GetCUDACurrentDeviceTextureAlignment() {
278  int value;
279  CLOUDVIEWER_CUDA_CHECK(cudaDeviceGetAttribute(
280  &value, cudaDevAttrTextureAlignment, cuda::GetDevice()));
281  return value;
282 }
283 
284 int GetCUDACurrentWarpSize() {
285  int value;
286  CLOUDVIEWER_CUDA_CHECK(cudaDeviceGetAttribute(&value, cudaDevAttrWarpSize,
287  cuda::GetDevice()));
288  return value;
289 }
290 
291 size_t GetCUDACurrentTotalMemSize() {
292  size_t free;
293  size_t total;
294  CLOUDVIEWER_CUDA_CHECK(cudaMemGetInfo(&free, &total));
295  return total;
296 }
297 
298 #endif
299 
300 } // namespace core
301 } // namespace cloudViewer
302 
303 #ifdef BUILD_CUDA_MODULE
304 
305 namespace cloudViewer {
306 namespace core {
307 
308 void __CLOUDVIEWER_CUDA_CHECK(cudaError_t err,
309  const char* file,
310  const int line) {
311  if (err != cudaSuccess) {
312  utility::LogError("{}:{} CUDA runtime error: {}", file, line,
313  cudaGetErrorString(err));
314  }
315 }
316 
317 void __CLOUDVIEWER_GET_LAST_CUDA_ERROR(const char* message,
318  const char* file,
319  const int line) {
320  cudaError_t err = cudaGetLastError();
321  if (err != cudaSuccess) {
322  utility::LogError("{}:{} {}: CLOUDVIEWER_GET_LAST_CUDA_ERROR(): {}",
323  file, line, message, cudaGetErrorString(err));
324  }
325 }
326 
327 } // namespace core
328 } // namespace cloudViewer
329 
330 #endif
331 
332 // C interface to provide un-mangled function to Python ctypes
335 }
CLOUDVIEWER_DLL_EXPORT int cloudViewer_core_cuda_device_count()
Definition: CUDAUtils.cpp:333
Common CUDA utilities.
#define CLOUDVIEWER_CUDA_CHECK(err)
Definition: CUDAUtils.h:47
#define CLOUDVIEWER_DLL_EXPORT
Definition: Macro.h:22
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214
bool IsCUDA() const
Returns true iff device type is CUDA.
Definition: Device.h:49
std::string ToString() const
Returns string representation of device, e.g. "CPU:0", "CUDA:0".
Definition: Device.cpp:89
int GetID() const
Returns the device index (within the same device type).
Definition: Device.h:61
#define LogWarning(...)
Definition: Logging.h:72
#define LogError(...)
Definition: Logging.h:60
ccGuiPythonInstance * GetInstance() noexcept
Definition: Runtime.cpp:72
void AssertCUDADeviceAvailable(int device_id)
Definition: CUDAUtils.cpp:76
bool SupportsMemoryPools(const Device &device)
Definition: CUDAUtils.cpp:112
void ReleaseCache()
Releases CUDA memory manager cache. This is typically used for debugging.
Definition: CUDAUtils.cpp:41
Generic file read and write utility for python interface.