ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
cuda_utils.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <cmath>
11 
12 namespace cloudViewer {
13 namespace ml {
14 namespace contrib {
15 
16 #define TOTAL_THREADS 1024
17 #define THREADS_PER_BLOCK 256
18 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
19 
20 inline int OptNumThreads(int work_size) {
21  const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
22 
23  return max(min(1 << pow_2, TOTAL_THREADS), 1);
24 }
25 
26 inline dim3 OptBlockConfig(int x, int y) {
27  const int x_threads = OptNumThreads(x);
28  const int y_threads =
29  max(min(OptNumThreads(y), TOTAL_THREADS / x_threads), 1);
30  dim3 block_config(x_threads, y_threads, 1);
31  return block_config;
32 }
33 
34 } // namespace contrib
35 } // namespace ml
36 } // namespace cloudViewer
#define TOTAL_THREADS
Definition: cuda_utils.h:16
int min(int a, int b)
Definition: cutil_math.h:53
int max(int a, int b)
Definition: cutil_math.h:48
dim3 OptBlockConfig(int x, int y)
Definition: cuda_utils.h:26
int OptNumThreads(int work_size)
Definition: cuda_utils.h:20
Generic file read and write utility for python interface.