14 #ifdef BUILD_CUDA_MODULE
23 #ifdef BUILD_CUDA_MODULE
31 catch (
const std::runtime_error&) {
42 #ifdef BUILD_CUDA_MODULE
43 #ifdef ENABLE_CACHED_CUDA_MANAGER
50 "Built without cached CUDA memory manager, cuda::ReleaseCache() "
60 #ifdef BUILD_CUDA_MODULE
68 #ifdef BUILD_CUDA_MODULE
77 #ifdef BUILD_CUDA_MODULE
79 if (num_devices == 0) {
81 "Invalid device 'CUDA:{}'. -DBUILD_CUDA_MODULE=ON, but no "
82 "CUDA device available.",
84 }
else if (num_devices == 1 && device_id != 0) {
86 "Invalid CUDA Device 'CUDA:{}'. Device ID expected to "
88 device_id, device_id);
89 }
else if (device_id < 0 || device_id >= num_devices) {
91 "Invalid CUDA Device 'CUDA:{}'. Device ID expected to "
92 "be between 0 to {}, but got {}.",
93 device_id, num_devices - 1, device_id);
97 "-DBUILD_CUDA_MODULE=OFF. Please build with -DBUILD_CUDA_MODULE=ON "
98 "to use CUDA device.");
107 "Expected device-type to be CUDA, but got device '{}'",
113 #if defined(BUILD_CUDA_MODULE) && (CUDART_VERSION >= 11020)
115 int driverVersion = 0;
116 int deviceSupportsMemoryPools = 0;
121 &deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported,
124 return !!deviceSupportsMemoryPools;
133 #ifdef BUILD_CUDA_MODULE
140 static void SetDevice(
int device_id) {
150 static thread_local CUDAStream instance;
154 cudaStream_t Get() {
return stream_; }
155 void Set(cudaStream_t stream) { stream_ = stream; }
157 static cudaStream_t
Default() {
return static_cast<cudaStream_t
>(0); }
160 CUDAStream() =
default;
161 CUDAStream(
const CUDAStream&) =
delete;
162 CUDAStream& operator=(
const CUDAStream&) =
delete;
164 cudaStream_t stream_ =
Default();
169 static void SetStream(cudaStream_t stream) {
173 cudaStream_t GetDefaultStream() {
return CUDAStream::Default(); }
179 #ifdef BUILD_CUDA_MODULE
182 : prev_device_id_(cuda::GetDevice()) {
183 cuda::SetDevice(device_id);
187 : CUDAScopedDevice(device.GetID()) {
193 constexpr CUDAScopedStream::CreateNewStreamTag
194 CUDAScopedStream::CreateNewStream;
196 CUDAScopedStream::CUDAScopedStream(
const CreateNewStreamTag&)
197 : prev_stream_(cuda::GetStream()), owns_new_stream_(true) {
199 cuda::SetStream(new_stream_);
202 CUDAScopedStream::CUDAScopedStream(cudaStream_t stream)
203 : prev_stream_(cuda::GetStream()),
205 owns_new_stream_(false) {
206 cuda::SetStream(stream);
209 CUDAScopedStream::~CUDAScopedStream() {
210 if (owns_new_stream_) {
213 cuda::SetStream(prev_stream_);
217 static CUDAState instance;
221 bool CUDAState::IsP2PEnabled(
int src_id,
int tar_id)
const {
224 return p2p_enabled_[src_id][tar_id];
227 bool CUDAState::IsP2PEnabled(
const Device& src,
const Device& tar)
const {
230 return p2p_enabled_[src.GetID()][tar.GetID()];
233 void CUDAState::ForceDisableP2PForTesting() {
236 if (src_id != tar_id && p2p_enabled_[src_id][tar_id]) {
237 p2p_enabled_[src_id][tar_id] =
false;
243 CUDAState::CUDAState() {
245 p2p_enabled_ = std::vector<std::vector<bool>>(
250 if (src_id == tar_id) {
251 p2p_enabled_[src_id][tar_id] =
true;
253 CUDAScopedDevice scoped_device(src_id);
258 cudaDeviceCanAccessPeer(&can_access, src_id, tar_id));
261 p2p_enabled_[src_id][tar_id] =
true;
262 cudaError_t err = cudaDeviceEnablePeerAccess(tar_id, 0);
263 if (err == cudaErrorPeerAccessAlreadyEnabled) {
270 p2p_enabled_[src_id][tar_id] =
false;
277 int GetCUDACurrentDeviceTextureAlignment() {
280 &value, cudaDevAttrTextureAlignment, cuda::GetDevice()));
284 int GetCUDACurrentWarpSize() {
291 size_t GetCUDACurrentTotalMemSize() {
303 #ifdef BUILD_CUDA_MODULE
308 void __CLOUDVIEWER_CUDA_CHECK(cudaError_t err,
311 if (err != cudaSuccess) {
313 cudaGetErrorString(err));
317 void __CLOUDVIEWER_GET_LAST_CUDA_ERROR(
const char* message,
320 cudaError_t err = cudaGetLastError();
321 if (err != cudaSuccess) {
323 file, line, message, cudaGetErrorString(err));
CLOUDVIEWER_DLL_EXPORT int cloudViewer_core_cuda_device_count()
#define CLOUDVIEWER_CUDA_CHECK(err)
#define CLOUDVIEWER_DLL_EXPORT
When CUDA is not enabled, this is a dummy class.
CUDAScopedDevice(int device_id)
bool IsCUDA() const
Returns true iff device type is CUDA.
std::string ToString() const
Returns string representation of device, e.g. "CPU:0", "CUDA:0".
int GetID() const
Returns the device index (within the same device type).
static void ReleaseCache()
ccGuiPythonInstance * GetInstance() noexcept
void AssertCUDADeviceAvailable(int device_id)
bool SupportsMemoryPools(const Device &device)
void ReleaseCache()
Releases CUDA memory manager cache. This is typically used for debugging.
Generic file read and write utility for python interface.