ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
BlockSelectFloat32.cu
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include "core/nns/kernel/BlockSelectImpl.cuh"
9 
10 namespace cloudViewer {
11 namespace core {
12 BLOCK_SELECT_IMPL(float, int32_t, true, 1, 1);
13 BLOCK_SELECT_IMPL(float, int32_t, false, 1, 1);
14 
15 BLOCK_SELECT_IMPL(float, int32_t, true, 32, 2);
16 BLOCK_SELECT_IMPL(float, int32_t, false, 32, 2);
17 
18 BLOCK_SELECT_IMPL(float, int32_t, true, 64, 3);
19 BLOCK_SELECT_IMPL(float, int32_t, false, 64, 3);
20 
21 BLOCK_SELECT_IMPL(float, int32_t, true, 128, 3);
22 BLOCK_SELECT_IMPL(float, int32_t, false, 128, 3);
23 
24 BLOCK_SELECT_IMPL(float, int32_t, true, 256, 4);
25 BLOCK_SELECT_IMPL(float, int32_t, false, 256, 4);
26 
27 BLOCK_SELECT_IMPL(float, int32_t, true, 512, 8);
28 BLOCK_SELECT_IMPL(float, int32_t, false, 512, 8);
29 
30 BLOCK_SELECT_IMPL(float, int32_t, true, 1024, 8);
31 BLOCK_SELECT_IMPL(float, int32_t, false, 1024, 8);
32 
33 #if GPU_MAX_SELECTION_K >= 2048
34 BLOCK_SELECT_IMPL(float, int32_t, true, 2048, 8);
35 BLOCK_SELECT_IMPL(float, int32_t, false, 2048, 8);
36 #endif
37 
38 BLOCK_SELECT_IMPL(float, int64_t, true, 1, 1);
39 BLOCK_SELECT_IMPL(float, int64_t, false, 1, 1);
40 
41 BLOCK_SELECT_IMPL(float, int64_t, true, 32, 2);
42 BLOCK_SELECT_IMPL(float, int64_t, false, 32, 2);
43 
44 BLOCK_SELECT_IMPL(float, int64_t, true, 64, 3);
45 BLOCK_SELECT_IMPL(float, int64_t, false, 64, 3);
46 
47 BLOCK_SELECT_IMPL(float, int64_t, true, 128, 3);
48 BLOCK_SELECT_IMPL(float, int64_t, false, 128, 3);
49 
50 BLOCK_SELECT_IMPL(float, int64_t, true, 256, 4);
51 BLOCK_SELECT_IMPL(float, int64_t, false, 256, 4);
52 
53 BLOCK_SELECT_IMPL(float, int64_t, true, 512, 8);
54 BLOCK_SELECT_IMPL(float, int64_t, false, 512, 8);
55 
56 BLOCK_SELECT_IMPL(float, int64_t, true, 1024, 8);
57 BLOCK_SELECT_IMPL(float, int64_t, false, 1024, 8);
58 
59 #if GPU_MAX_SELECTION_K >= 2048
60 BLOCK_SELECT_IMPL(float, int64_t, true, 2048, 8);
61 BLOCK_SELECT_IMPL(float, int64_t, false, 2048, 8);
62 #endif
63 
64 void runBlockSelectPair(cudaStream_t stream,
65  float* inK,
66  int32_t* inV,
67  float* outK,
68  int32_t* outV,
69  bool dir,
70  int k,
71  int dim,
72  int num_points) {
73  CLOUDVIEWER_ASSERT(k <= GPU_MAX_SELECTION_K);
74 
75  if (dir) {
76  if (k == 1) {
77  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 1);
78  } else if (k <= 32) {
79  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 32);
80  } else if (k <= 64) {
81  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 64);
82  } else if (k <= 128) {
83  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 128);
84  } else if (k <= 256) {
85  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 256);
86  } else if (k <= 512) {
87  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 512);
88  } else if (k <= 1024) {
89  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 1024);
90 #if GPU_MAX_SELECTION_K >= 2048
91  } else if (k <= 2048) {
92  BLOCK_SELECT_PAIR_CALL(float, int32_t, true, 2048);
93 #endif
94  }
95  } else {
96  if (k == 1) {
97  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 1);
98  } else if (k <= 32) {
99  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 32);
100  } else if (k <= 64) {
101  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 64);
102  } else if (k <= 128) {
103  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 128);
104  } else if (k <= 256) {
105  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 256);
106  } else if (k <= 512) {
107  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 512);
108  } else if (k <= 1024) {
109  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 1024);
110 #if GPU_MAX_SELECTION_K >= 2048
111  } else if (k <= 2048) {
112  BLOCK_SELECT_PAIR_CALL(float, int32_t, false, 2048);
113 #endif
114  }
115  }
116 }
117 
118 void runBlockSelectPair(cudaStream_t stream,
119  float* inK,
120  int64_t* inV,
121  float* outK,
122  int64_t* outV,
123  bool dir,
124  int k,
125  int dim,
126  int num_points) {
127  CLOUDVIEWER_ASSERT(k <= GPU_MAX_SELECTION_K);
128 
129  if (dir) {
130  if (k == 1) {
131  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 1);
132  } else if (k <= 32) {
133  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 32);
134  } else if (k <= 64) {
135  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 64);
136  } else if (k <= 128) {
137  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 128);
138  } else if (k <= 256) {
139  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 256);
140  } else if (k <= 512) {
141  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 512);
142  } else if (k <= 1024) {
143  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 1024);
144 #if GPU_MAX_SELECTION_K >= 2048
145  } else if (k <= 2048) {
146  BLOCK_SELECT_PAIR_CALL(float, int64_t, true, 2048);
147 #endif
148  }
149  } else {
150  if (k == 1) {
151  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 1);
152  } else if (k <= 32) {
153  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 32);
154  } else if (k <= 64) {
155  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 64);
156  } else if (k <= 128) {
157  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 128);
158  } else if (k <= 256) {
159  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 256);
160  } else if (k <= 512) {
161  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 512);
162  } else if (k <= 1024) {
163  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 1024);
164 #if GPU_MAX_SELECTION_K >= 2048
165  } else if (k <= 2048) {
166  BLOCK_SELECT_PAIR_CALL(float, int64_t, false, 2048);
167 #endif
168  }
169  }
170 }
171 
172 } // namespace core
173 } // namespace cloudViewer