1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
12 namespace cloudViewer {
15 // We require at least CUDA 8.0 for compilation
16 #if CUDA_VERSION < 8000
17 #error "CUDA >= 8.0 is required"
20 // We validate this against the actual architecture in device initialization
21 constexpr int kWarpSize = 32;
23 // This is a memory barrier for intra-warp writes to shared memory.
24 __forceinline__ __device__ void warpFence() {
25 #if CUDA_VERSION >= 9000
28 // For the time being, assume synchronicity.
29 // __threadfence_block();
33 #if CUDA_VERSION > 9000
34 // Based on the CUDA version (we assume what version of nvcc/ptxas we were
35 // compiled with), the register allocation algorithm is much better, so only
36 // enable the 2048 selection code if we are above 9.0 (9.2 seems to be ok)
37 #define GPU_MAX_SELECTION_K 2048
39 #define GPU_MAX_SELECTION_K 1024
43 } // namespace cloudViewer