1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
8 #include "core/nns/kernel/BlockSelectImpl.cuh"
10 namespace cloudViewer {
12 BLOCK_SELECT_IMPL(double, int32_t, true, 1, 1);
13 BLOCK_SELECT_IMPL(double, int32_t, false, 1, 1);
15 BLOCK_SELECT_IMPL(double, int32_t, true, 32, 2);
16 BLOCK_SELECT_IMPL(double, int32_t, false, 32, 2);
18 BLOCK_SELECT_IMPL(double, int32_t, true, 64, 3);
19 BLOCK_SELECT_IMPL(double, int32_t, false, 64, 3);
21 BLOCK_SELECT_IMPL(double, int32_t, true, 128, 3);
22 BLOCK_SELECT_IMPL(double, int32_t, false, 128, 3);
24 BLOCK_SELECT_IMPL(double, int32_t, true, 256, 4);
25 BLOCK_SELECT_IMPL(double, int32_t, false, 256, 4);
27 BLOCK_SELECT_IMPL(double, int32_t, true, 512, 8);
28 BLOCK_SELECT_IMPL(double, int32_t, false, 512, 8);
30 BLOCK_SELECT_IMPL(double, int32_t, true, 1024, 8);
31 BLOCK_SELECT_IMPL(double, int32_t, false, 1024, 8);
33 #if GPU_MAX_SELECTION_K >= 2048
34 BLOCK_SELECT_IMPL(double, int32_t, true, 2048, 8);
35 BLOCK_SELECT_IMPL(double, int32_t, false, 2048, 8);
38 BLOCK_SELECT_IMPL(double, int64_t, true, 1, 1);
39 BLOCK_SELECT_IMPL(double, int64_t, false, 1, 1);
41 BLOCK_SELECT_IMPL(double, int64_t, true, 32, 2);
42 BLOCK_SELECT_IMPL(double, int64_t, false, 32, 2);
44 BLOCK_SELECT_IMPL(double, int64_t, true, 64, 3);
45 BLOCK_SELECT_IMPL(double, int64_t, false, 64, 3);
47 BLOCK_SELECT_IMPL(double, int64_t, true, 128, 3);
48 BLOCK_SELECT_IMPL(double, int64_t, false, 128, 3);
50 BLOCK_SELECT_IMPL(double, int64_t, true, 256, 4);
51 BLOCK_SELECT_IMPL(double, int64_t, false, 256, 4);
53 BLOCK_SELECT_IMPL(double, int64_t, true, 512, 8);
54 BLOCK_SELECT_IMPL(double, int64_t, false, 512, 8);
56 BLOCK_SELECT_IMPL(double, int64_t, true, 1024, 8);
57 BLOCK_SELECT_IMPL(double, int64_t, false, 1024, 8);
59 #if GPU_MAX_SELECTION_K >= 2048
60 BLOCK_SELECT_IMPL(double, int64_t, true, 2048, 8);
61 BLOCK_SELECT_IMPL(double, int64_t, false, 2048, 8);
64 void runBlockSelectPair(cudaStream_t stream,
73 CLOUDVIEWER_ASSERT(k <= GPU_MAX_SELECTION_K);
77 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 1);
79 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 32);
81 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 64);
82 } else if (k <= 128) {
83 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 128);
84 } else if (k <= 256) {
85 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 256);
86 } else if (k <= 512) {
87 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 512);
88 } else if (k <= 1024) {
89 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 1024);
90 #if GPU_MAX_SELECTION_K >= 2048
91 } else if (k <= 2048) {
92 BLOCK_SELECT_PAIR_CALL(double, int32_t, true, 2048);
97 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 1);
99 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 32);
100 } else if (k <= 64) {
101 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 64);
102 } else if (k <= 128) {
103 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 128);
104 } else if (k <= 256) {
105 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 256);
106 } else if (k <= 512) {
107 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 512);
108 } else if (k <= 1024) {
109 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 1024);
110 #if GPU_MAX_SELECTION_K >= 2048
111 } else if (k <= 2048) {
112 BLOCK_SELECT_PAIR_CALL(double, int32_t, false, 2048);
118 void runBlockSelectPair(cudaStream_t stream,
127 CLOUDVIEWER_ASSERT(k <= GPU_MAX_SELECTION_K);
131 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 1);
132 } else if (k <= 32) {
133 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 32);
134 } else if (k <= 64) {
135 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 64);
136 } else if (k <= 128) {
137 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 128);
138 } else if (k <= 256) {
139 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 256);
140 } else if (k <= 512) {
141 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 512);
142 } else if (k <= 1024) {
143 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 1024);
144 #if GPU_MAX_SELECTION_K >= 2048
145 } else if (k <= 2048) {
146 BLOCK_SELECT_PAIR_CALL(double, int64_t, true, 2048);
151 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 1);
152 } else if (k <= 32) {
153 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 32);
154 } else if (k <= 64) {
155 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 64);
156 } else if (k <= 128) {
157 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 128);
158 } else if (k <= 256) {
159 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 256);
160 } else if (k <= 512) {
161 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 512);
162 } else if (k <= 1024) {
163 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 1024);
164 #if GPU_MAX_SELECTION_K >= 2048
165 } else if (k <= 2048) {
166 BLOCK_SELECT_PAIR_CALL(double, int64_t, false, 2048);
173 } // namespace cloudViewer