ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
HashMap.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <benchmark/benchmark.h>
11 
12 #include <numeric>
13 #include <random>
14 
17 #include "cloudViewer/core/Dtype.h"
22 
23 namespace cloudViewer {
24 namespace core {
25 
26 template <typename K, typename V>
27 class HashData {
28 public:
29  HashData(int count, int slots) {
30  keys_.resize(count);
31  vals_.resize(count);
32 
33  std::vector<int> indices(count);
34  std::iota(indices.begin(), indices.end(), 0);
35  std::shuffle(indices.begin(), indices.end(),
36  std::default_random_engine(0));
37 
38  // Ensure enough duplicates for harder tests
39  for (int i = 0; i < count; ++i) {
40  int v = indices[i] % slots;
41  keys_[i] = K(v * k_factor_);
42  vals_[i] = V(v);
43  }
44  }
45 
46 public:
47  const int k_factor_ = 101;
48  std::vector<K> keys_;
49  std::vector<V> vals_;
50 };
51 
52 void HashInsertInt(benchmark::State& state,
53  int capacity,
54  int duplicate_factor,
55  const Device& device,
56  const HashBackendType& backend) {
57  int slots = std::max(1, capacity / duplicate_factor);
58  HashData<int, int> data(capacity, slots);
59 
60  Tensor keys(data.keys_, {capacity}, core::Int32, device);
61  Tensor values(data.vals_, {capacity}, core::Int32, device);
62 
63  HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,
64  backend);
65  Tensor buf_indices, masks;
66  hashmap_warmup.Insert(keys, values, buf_indices, masks);
67 
68  for (auto _ : state) {
69  state.PauseTiming();
70  HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,
71  backend);
72  Tensor buf_indices, masks;
73 
74  cuda::Synchronize(device);
75  state.ResumeTiming();
76 
77  hashmap.Insert(keys, values, buf_indices, masks);
78 
79  cuda::Synchronize(device);
80  state.PauseTiming();
81 
82  int64_t s = hashmap.Size();
83  if (s != slots) {
85  "Error returning hashmap size, expected {}, but got {}.",
86  slots, s);
87  }
88  state.ResumeTiming();
89  }
90 }
91 
92 void HashEraseInt(benchmark::State& state,
93  int capacity,
94  int duplicate_factor,
95  const Device& device,
96  const HashBackendType& backend) {
97  int slots = std::max(1, capacity / duplicate_factor);
98  HashData<int, int> data(capacity, slots);
99 
100  Tensor keys(data.keys_, {capacity}, core::Int32, device);
101  Tensor values(data.vals_, {capacity}, core::Int32, device);
102 
103  HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,
104  backend);
105  Tensor buf_indices, masks;
106  hashmap_warmup.Insert(keys, values, buf_indices, masks);
107 
108  for (auto _ : state) {
109  state.PauseTiming();
110  HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,
111  backend);
112  Tensor buf_indices, masks;
113  hashmap.Insert(keys, values, buf_indices, masks);
114 
115  cuda::Synchronize(device);
116  state.ResumeTiming();
117 
118  hashmap.Erase(keys, masks);
119 
120  cuda::Synchronize(device);
121  state.PauseTiming();
122 
123  int64_t s = hashmap.Size();
124  if (s != 0) {
126  "Error returning hashmap size, expected {}, but got {}.", 0,
127  s);
128  }
129  state.ResumeTiming();
130  }
131 }
132 
133 void HashFindInt(benchmark::State& state,
134  int capacity,
135  int duplicate_factor,
136  const Device& device,
137  const HashBackendType& backend) {
138  int slots = std::max(1, capacity / duplicate_factor);
139  HashData<int, int> data(capacity, slots);
140 
141  Tensor keys(data.keys_, {capacity}, core::Int32, device);
142  Tensor values(data.vals_, {capacity}, core::Int32, device);
143 
144  HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,
145  backend);
146  Tensor buf_indices, masks;
147  // Insert as warp-up
148  hashmap.Insert(keys, values, buf_indices, masks);
149 
150  for (auto _ : state) {
151  hashmap.Find(keys, buf_indices, masks);
152  cuda::Synchronize(device);
153  }
154 }
155 
156 void HashClearInt(benchmark::State& state,
157  int capacity,
158  int duplicate_factor,
159  const Device& device,
160  const HashBackendType& backend) {
161  int slots = std::max(1, capacity / duplicate_factor);
162  HashData<int, int> data(capacity, slots);
163 
164  Tensor keys(data.keys_, {capacity}, core::Int32, device);
165  Tensor values(data.vals_, {capacity}, core::Int32, device);
166 
167  HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,
168  backend);
169  Tensor buf_indices, masks;
170  hashmap_warmup.Insert(keys, values, buf_indices, masks);
171 
172  for (auto _ : state) {
173  state.PauseTiming();
174  HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,
175  backend);
176  Tensor buf_indices, masks;
177 
178  hashmap.Insert(keys, values, buf_indices, masks);
179 
180  int64_t s = hashmap.Size();
181  if (s != slots) {
183  "Error returning hashmap size, expected {}, but got {}.",
184  slots, s);
185  }
186 
187  cuda::Synchronize(device);
188  state.ResumeTiming();
189 
190  hashmap.Clear();
191 
192  cuda::Synchronize(device);
193  state.PauseTiming();
194 
195  s = hashmap.Size();
196  if (s != 0) {
198  "Error returning hashmap size, expected {}, but got {}.", 0,
199  s);
200  }
201  state.ResumeTiming();
202  }
203 }
204 
205 void HashReserveInt(benchmark::State& state,
206  int capacity,
207  int duplicate_factor,
208  const Device& device,
209  const HashBackendType& backend) {
210  int slots = std::max(1, capacity / duplicate_factor);
211  HashData<int, int> data(capacity, slots);
212 
213  Tensor keys(data.keys_, {capacity}, core::Int32, device);
214  Tensor values(data.vals_, {capacity}, core::Int32, device);
215 
216  HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,
217  backend);
218  Tensor buf_indices, masks;
219  hashmap_warmup.Insert(keys, values, buf_indices, masks);
220 
221  for (auto _ : state) {
222  state.PauseTiming();
223  HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,
224  backend);
225  Tensor buf_indices, masks;
226 
227  hashmap.Insert(keys, values, buf_indices, masks);
228 
229  int64_t s = hashmap.Size();
230  if (s != slots) {
232  "Error returning hashmap size, expected {}, but got {}.",
233  slots, s);
234  }
235 
236  cuda::Synchronize(device);
237  state.ResumeTiming();
238 
239  hashmap.Reserve(2 * capacity);
240 
241  cuda::Synchronize(device);
242  state.PauseTiming();
243 
244  s = hashmap.Size();
245  if (s != slots) {
247  "Error returning hashmap size, expected {}, but got {}.",
248  slots, s);
249  }
250  state.ResumeTiming();
251  }
252 }
253 
254 class Int3 {
255 public:
256  Int3() : x_(0), y_(0), z_(0) {};
257  Int3(int k) : x_(k), y_(k * 2), z_(k * 4) {};
258  bool operator==(const Int3& other) const {
259  return x_ == other.x_ && y_ == other.y_ && z_ == other.z_;
260  }
261  int x_;
262  int y_;
263  int z_;
264 };
265 
266 void HashInsertInt3(benchmark::State& state,
267  int capacity,
268  int duplicate_factor,
269  const Device& device,
270  const HashBackendType& backend) {
271  int slots = std::max(1, capacity / duplicate_factor);
272  HashData<Int3, int> data(capacity, slots);
273 
274  std::vector<int> keys_Int3;
275  keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),
276  reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);
277  Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);
278  Tensor values(data.vals_, {capacity}, core::Int32, device);
279 
280  HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,
281  backend);
282  Tensor buf_indices, masks;
283  hashmap_warmup.Insert(keys, values, buf_indices, masks);
284 
285  for (auto _ : state) {
286  state.PauseTiming();
287  HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,
288  backend);
289  Tensor buf_indices, masks;
290 
291  cuda::Synchronize(device);
292  state.ResumeTiming();
293 
294  hashmap.Insert(keys, values, buf_indices, masks);
295 
296  cuda::Synchronize(device);
297  state.PauseTiming();
298 
299  int64_t s = hashmap.Size();
300  if (s != slots) {
302  "Error returning hashmap size, expected {}, but got {}.",
303  slots, s);
304  }
305  state.ResumeTiming();
306  }
307 }
308 
309 void HashEraseInt3(benchmark::State& state,
310  int capacity,
311  int duplicate_factor,
312  const Device& device,
313  const HashBackendType& backend) {
314  int slots = std::max(1, capacity / duplicate_factor);
315  HashData<Int3, int> data(capacity, slots);
316 
317  std::vector<int> keys_Int3;
318  keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),
319  reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);
320  Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);
321  Tensor values(data.vals_, {capacity}, core::Int32, device);
322 
323  HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,
324  backend);
325  Tensor buf_indices, masks;
326  hashmap_warmup.Insert(keys, values, buf_indices, masks);
327 
328  for (auto _ : state) {
329  state.PauseTiming();
330  HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,
331  backend);
332  Tensor buf_indices, masks;
333  hashmap.Insert(keys, values, buf_indices, masks);
334 
335  cuda::Synchronize(device);
336  state.ResumeTiming();
337 
338  hashmap.Erase(keys, masks);
339 
340  cuda::Synchronize(device);
341  state.PauseTiming();
342 
343  int64_t s = hashmap.Size();
344  if (s != 0) {
346  "Error returning hashmap size, expected {}, but got {}.", 0,
347  s);
348  }
349  state.ResumeTiming();
350  }
351 }
352 
353 void HashFindInt3(benchmark::State& state,
354  int capacity,
355  int duplicate_factor,
356  const Device& device,
357  const HashBackendType& backend) {
358  int slots = std::max(1, capacity / duplicate_factor);
359  HashData<Int3, int> data(capacity, slots);
360 
361  std::vector<int> keys_Int3;
362  keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),
363  reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);
364  Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);
365  Tensor values(data.vals_, {capacity}, core::Int32, device);
366 
367  HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,
368  backend);
369  Tensor buf_indices, masks;
370  hashmap.Insert(keys, values, buf_indices, masks);
371 
372  for (auto _ : state) {
373  hashmap.Find(keys, buf_indices, masks);
374  cuda::Synchronize(device);
375  }
376 }
377 
378 void HashClearInt3(benchmark::State& state,
379  int capacity,
380  int duplicate_factor,
381  const Device& device,
382  const HashBackendType& backend) {
383  int slots = std::max(1, capacity / duplicate_factor);
384  HashData<Int3, int> data(capacity, slots);
385 
386  std::vector<int> keys_Int3;
387  keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),
388  reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);
389  Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);
390  Tensor values(data.vals_, {capacity}, core::Int32, device);
391 
392  HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,
393  backend);
394  Tensor buf_indices, masks;
395  hashmap_warmup.Insert(keys, values, buf_indices, masks);
396 
397  for (auto _ : state) {
398  state.PauseTiming();
399  HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,
400  backend);
401  Tensor buf_indices, masks;
402 
403  hashmap.Insert(keys, values, buf_indices, masks);
404 
405  int64_t s = hashmap.Size();
406  if (s != slots) {
408  "Error returning hashmap size, expected {}, but got {}.",
409  slots, s);
410  }
411 
412  cuda::Synchronize(device);
413  state.ResumeTiming();
414 
415  hashmap.Clear();
416 
417  state.PauseTiming();
418  cuda::Synchronize(device);
419 
420  s = hashmap.Size();
421  if (s != 0) {
423  "Error returning hashmap size, expected {}, but got {}.", 0,
424  s);
425  }
426  state.ResumeTiming();
427  }
428 }
429 
430 void HashReserveInt3(benchmark::State& state,
431  int capacity,
432  int duplicate_factor,
433  const Device& device,
434  const HashBackendType& backend) {
435  int slots = std::max(1, capacity / duplicate_factor);
436  HashData<Int3, int> data(capacity, slots);
437 
438  std::vector<int> keys_Int3;
439  keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),
440  reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);
441  Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);
442  Tensor values(data.vals_, {capacity}, core::Int32, device);
443 
444  HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,
445  backend);
446  Tensor buf_indices, masks;
447  hashmap_warmup.Insert(keys, values, buf_indices, masks);
448 
449  for (auto _ : state) {
450  state.PauseTiming();
451  HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,
452  backend);
453  Tensor buf_indices, masks;
454 
455  hashmap.Insert(keys, values, buf_indices, masks);
456 
457  int64_t s = hashmap.Size();
458  if (s != slots) {
460  "Error returning hashmap size, expected {}, but got {}.",
461  slots, s);
462  }
463 
464  cuda::Synchronize(device);
465  state.ResumeTiming();
466 
467  hashmap.Reserve(2 * capacity);
468 
469  cuda::Synchronize(device);
470  state.PauseTiming();
471 
472  s = hashmap.Size();
473  if (s != slots) {
475  "Error returning hashmap size, expected {}, but got {}.",
476  slots, s);
477  }
478  state.ResumeTiming();
479  }
480 }
481 
482 // Note: to enable large scale insertion (> 1M entries), change
483 // default_max_load_factor() in stdgpu from 1.0 to 1.2~1.4.
484 #define ENUM_BM_CAPACITY(FN, FACTOR, DEVICE, BACKEND) \
485  BENCHMARK_CAPTURE(FN, BACKEND##_100_##FACTOR, 100, FACTOR, DEVICE, \
486  BACKEND) \
487  ->Unit(benchmark::kMillisecond); \
488  BENCHMARK_CAPTURE(FN, BACKEND##_1000_##FACTOR, 1000, FACTOR, DEVICE, \
489  BACKEND) \
490  ->Unit(benchmark::kMillisecond); \
491  BENCHMARK_CAPTURE(FN, BACKEND##_10000_##FACTOR, 10000, FACTOR, DEVICE, \
492  BACKEND) \
493  ->Unit(benchmark::kMillisecond); \
494  BENCHMARK_CAPTURE(FN, BACKEND##_100000_##FACTOR, 100000, FACTOR, DEVICE, \
495  BACKEND) \
496  ->Unit(benchmark::kMillisecond); \
497  BENCHMARK_CAPTURE(FN, BACKEND##_1000000_##FACTOR, 1000000, FACTOR, DEVICE, \
498  BACKEND) \
499  ->Unit(benchmark::kMillisecond);
500 
501 #define ENUM_BM_FACTOR(FN, DEVICE, BACKEND) \
502  ENUM_BM_CAPACITY(FN, 1, DEVICE, BACKEND) \
503  ENUM_BM_CAPACITY(FN, 2, DEVICE, BACKEND) \
504  ENUM_BM_CAPACITY(FN, 4, DEVICE, BACKEND) \
505  ENUM_BM_CAPACITY(FN, 8, DEVICE, BACKEND) \
506  ENUM_BM_CAPACITY(FN, 16, DEVICE, BACKEND) \
507  ENUM_BM_CAPACITY(FN, 32, DEVICE, BACKEND)
508 
509 #ifdef BUILD_CUDA_MODULE
510 #define ENUM_BM_BACKEND(FN) \
511  ENUM_BM_FACTOR(FN, Device("CPU:0"), HashBackendType::TBB) \
512  ENUM_BM_FACTOR(FN, Device("CUDA:0"), HashBackendType::Slab) \
513  ENUM_BM_FACTOR(FN, Device("CUDA:0"), HashBackendType::StdGPU)
514 #else
515 #define ENUM_BM_BACKEND(FN) \
516  ENUM_BM_FACTOR(FN, Device("CPU:0"), HashBackendType::TBB)
517 #endif
518 
529 
530 } // namespace core
531 } // namespace cloudViewer
Common CUDA utilities.
#define ENUM_BM_BACKEND(FN)
Definition: HashMap.cpp:515
int count
#define slots
std::vector< V > vals_
Definition: HashMap.cpp:49
std::vector< K > keys_
Definition: HashMap.cpp:48
HashData(int count, int slots)
Definition: HashMap.cpp:29
bool operator==(const Int3 &other) const
Definition: HashMap.cpp:258
#define LogError(...)
Definition: Logging.h:60
int max(int a, int b)
Definition: cutil_math.h:48
void HashClearInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:156
void HashReserveInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:430
void HashFindInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:133
void HashClearInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:378
void HashInsertInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:266
void HashReserveInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:205
void HashEraseInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:92
void HashFindInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:353
const Dtype Int32
Definition: Dtype.cpp:46
void HashInsertInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:52
void HashEraseInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:309
Generic file read and write utility for python interface.