cpp_api/api/CVViewer_2benchmarks_2core_2HashMap_8cpp_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 #include "cloudViewer/core/hashmap/HashMap.h"


 #include <benchmark/benchmark.h>


 #include <numeric>

 #include <random>


 #include "cloudViewer/core/AdvancedIndexing.h"

 #include "cloudViewer/core/CUDAUtils.h"

 #include "cloudViewer/core/Dtype.h"

 #include "cloudViewer/core/MemoryManager.h"

 #include "cloudViewer/core/SizeVector.h"

 #include "cloudViewer/core/Tensor.h"

 #include "cloudViewer/core/kernel/Kernel.h"


 namespace cloudViewer {

 namespace core {


 template <typename K, typename V>

 class HashData {

 public:

     HashData(int count, int slots) {

         keys_.resize(count);

         vals_.resize(count);


         std::vector<int> indices(count);

         std::iota(indices.begin(), indices.end(), 0);

         std::shuffle(indices.begin(), indices.end(),

                      std::default_random_engine(0));


         // Ensure enough duplicates for harder tests

         for (int i = 0; i < count; ++i) {

             int v = indices[i] % slots;

             keys_[i] = K(v * k_factor_);

             vals_[i] = V(v);

         }

     }


 public:

     const int k_factor_ = 101;

     std::vector<K> keys_;

     std::vector<V> vals_;

 };


 void HashInsertInt(benchmark::State& state,

                    int capacity,

                    int duplicate_factor,

                    const Device& device,

                    const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<int, int> data(capacity, slots);


     Tensor keys(data.keys_, {capacity}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Insert(keys, values, buf_indices, masks);


         cuda::Synchronize(device);

         state.PauseTiming();


         int64_t s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }

         state.ResumeTiming();

     }

 }


 void HashEraseInt(benchmark::State& state,

                   int capacity,

                   int duplicate_factor,

                   const Device& device,

                   const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<int, int> data(capacity, slots);


     Tensor keys(data.keys_, {capacity}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;

         hashmap.Insert(keys, values, buf_indices, masks);


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Erase(keys, masks);


         cuda::Synchronize(device);

         state.PauseTiming();


         int64_t s = hashmap.Size();

         if (s != 0) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.", 0,

                     s);

         }

         state.ResumeTiming();

     }

 }


 void HashFindInt(benchmark::State& state,

                  int capacity,

                  int duplicate_factor,

                  const Device& device,

                  const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<int, int> data(capacity, slots);


     Tensor keys(data.keys_, {capacity}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,

                     backend);

     Tensor buf_indices, masks;

     // Insert as warp-up

     hashmap.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         hashmap.Find(keys, buf_indices, masks);

         cuda::Synchronize(device);

     }

 }


 void HashClearInt(benchmark::State& state,

                   int capacity,

                   int duplicate_factor,

                   const Device& device,

                   const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<int, int> data(capacity, slots);


     Tensor keys(data.keys_, {capacity}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;


         hashmap.Insert(keys, values, buf_indices, masks);


         int64_t s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Clear();


         cuda::Synchronize(device);

         state.PauseTiming();


         s = hashmap.Size();

         if (s != 0) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.", 0,

                     s);

         }

         state.ResumeTiming();

     }

 }


 void HashReserveInt(benchmark::State& state,

                     int capacity,

                     int duplicate_factor,

                     const Device& device,

                     const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<int, int> data(capacity, slots);


     Tensor keys(data.keys_, {capacity}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {1}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {1}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;


         hashmap.Insert(keys, values, buf_indices, masks);


         int64_t s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Reserve(2 * capacity);


         cuda::Synchronize(device);

         state.PauseTiming();


         s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }

         state.ResumeTiming();

     }

 }


 class Int3 {

 public:

     Int3() : x_(0), y_(0), z_(0) {};

     Int3(int k) : x_(k), y_(k * 2), z_(k * 4) {};

     bool operator==(const Int3& other) const {

         return x_ == other.x_ && y_ == other.y_ && z_ == other.z_;

     }

     int x_;

     int y_;

     int z_;

 };


 void HashInsertInt3(benchmark::State& state,

                     int capacity,

                     int duplicate_factor,

                     const Device& device,

                     const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<Int3, int> data(capacity, slots);


     std::vector<int> keys_Int3;

     keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),

                      reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);

     Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Insert(keys, values, buf_indices, masks);


         cuda::Synchronize(device);

         state.PauseTiming();


         int64_t s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }

         state.ResumeTiming();

     }

 }


 void HashEraseInt3(benchmark::State& state,

                    int capacity,

                    int duplicate_factor,

                    const Device& device,

                    const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<Int3, int> data(capacity, slots);


     std::vector<int> keys_Int3;

     keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),

                      reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);

     Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;

         hashmap.Insert(keys, values, buf_indices, masks);


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Erase(keys, masks);


         cuda::Synchronize(device);

         state.PauseTiming();


         int64_t s = hashmap.Size();

         if (s != 0) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.", 0,

                     s);

         }

         state.ResumeTiming();

     }

 }


 void HashFindInt3(benchmark::State& state,

                   int capacity,

                   int duplicate_factor,

                   const Device& device,

                   const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<Int3, int> data(capacity, slots);


     std::vector<int> keys_Int3;

     keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),

                      reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);

     Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,

                     backend);

     Tensor buf_indices, masks;

     hashmap.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         hashmap.Find(keys, buf_indices, masks);

         cuda::Synchronize(device);

     }

 }


 void HashClearInt3(benchmark::State& state,

                    int capacity,

                    int duplicate_factor,

                    const Device& device,

                    const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<Int3, int> data(capacity, slots);


     std::vector<int> keys_Int3;

     keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),

                      reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);

     Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;


         hashmap.Insert(keys, values, buf_indices, masks);


         int64_t s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Clear();


         state.PauseTiming();

         cuda::Synchronize(device);


         s = hashmap.Size();

         if (s != 0) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.", 0,

                     s);

         }

         state.ResumeTiming();

     }

 }


 void HashReserveInt3(benchmark::State& state,

                      int capacity,

                      int duplicate_factor,

                      const Device& device,

                      const HashBackendType& backend) {

     int slots = std::max(1, capacity / duplicate_factor);

     HashData<Int3, int> data(capacity, slots);


     std::vector<int> keys_Int3;

     keys_Int3.assign(reinterpret_cast<int*>(data.keys_.data()),

                      reinterpret_cast<int*>(data.keys_.data()) + 3 * capacity);

     Tensor keys(keys_Int3, {capacity, 3}, core::Int32, device);

     Tensor values(data.vals_, {capacity}, core::Int32, device);


     HashMap hashmap_warmup(capacity, core::Int32, {3}, core::Int32, {1}, device,

                            backend);

     Tensor buf_indices, masks;

     hashmap_warmup.Insert(keys, values, buf_indices, masks);


     for (auto _ : state) {

         state.PauseTiming();

         HashMap hashmap(capacity, core::Int32, {3}, core::Int32, {1}, device,

                         backend);

         Tensor buf_indices, masks;


         hashmap.Insert(keys, values, buf_indices, masks);


         int64_t s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }


         cuda::Synchronize(device);

         state.ResumeTiming();


         hashmap.Reserve(2 * capacity);


         cuda::Synchronize(device);

         state.PauseTiming();


         s = hashmap.Size();

         if (s != slots) {

             utility::LogError(

                     "Error returning hashmap size, expected {}, but got {}.",

                     slots, s);

         }

         state.ResumeTiming();

     }

 }


 // Note: to enable large scale insertion (> 1M entries), change

 // default_max_load_factor() in stdgpu from 1.0 to 1.2~1.4.

 #define ENUM_BM_CAPACITY(FN, FACTOR, DEVICE, BACKEND)                          \

     BENCHMARK_CAPTURE(FN, BACKEND##_100_##FACTOR, 100, FACTOR, DEVICE,         \

                       BACKEND)                                                 \

             ->Unit(benchmark::kMillisecond);                                   \

     BENCHMARK_CAPTURE(FN, BACKEND##_1000_##FACTOR, 1000, FACTOR, DEVICE,       \

                       BACKEND)                                                 \

             ->Unit(benchmark::kMillisecond);                                   \

     BENCHMARK_CAPTURE(FN, BACKEND##_10000_##FACTOR, 10000, FACTOR, DEVICE,     \

                       BACKEND)                                                 \

             ->Unit(benchmark::kMillisecond);                                   \

     BENCHMARK_CAPTURE(FN, BACKEND##_100000_##FACTOR, 100000, FACTOR, DEVICE,   \

                       BACKEND)                                                 \

             ->Unit(benchmark::kMillisecond);                                   \

     BENCHMARK_CAPTURE(FN, BACKEND##_1000000_##FACTOR, 1000000, FACTOR, DEVICE, \

                       BACKEND)                                                 \

             ->Unit(benchmark::kMillisecond);


 #define ENUM_BM_FACTOR(FN, DEVICE, BACKEND)   \

     ENUM_BM_CAPACITY(FN, 1, DEVICE, BACKEND)  \

     ENUM_BM_CAPACITY(FN, 2, DEVICE, BACKEND)  \

     ENUM_BM_CAPACITY(FN, 4, DEVICE, BACKEND)  \

     ENUM_BM_CAPACITY(FN, 8, DEVICE, BACKEND)  \

     ENUM_BM_CAPACITY(FN, 16, DEVICE, BACKEND) \

     ENUM_BM_CAPACITY(FN, 32, DEVICE, BACKEND)


 #ifdef BUILD_CUDA_MODULE

 #define ENUM_BM_BACKEND(FN)                                     \

     ENUM_BM_FACTOR(FN, Device("CPU:0"), HashBackendType::TBB)   \

     ENUM_BM_FACTOR(FN, Device("CUDA:0"), HashBackendType::Slab) \

     ENUM_BM_FACTOR(FN, Device("CUDA:0"), HashBackendType::StdGPU)

 #else

 #define ENUM_BM_BACKEND(FN) \

     ENUM_BM_FACTOR(FN, Device("CPU:0"), HashBackendType::TBB)

 #endif


 ENUM_BM_BACKEND(HashInsertInt)

 ENUM_BM_BACKEND(HashInsertInt3)

 ENUM_BM_BACKEND(HashEraseInt)

 ENUM_BM_BACKEND(HashEraseInt3)

 ENUM_BM_BACKEND(HashFindInt)

 ENUM_BM_BACKEND(HashFindInt3)

 ENUM_BM_BACKEND(HashClearInt)

 ENUM_BM_BACKEND(HashClearInt3)

 ENUM_BM_BACKEND(HashReserveInt)

 ENUM_BM_BACKEND(HashReserveInt3)


 }  // namespace core

 }  // namespace cloudViewer

AdvancedIndexing.h

CUDAUtils.h
Common CUDA utilities.

ENUM_BM_BACKEND
#define ENUM_BM_BACKEND(FN)
Definition: HashMap.cpp:515

Dtype.h

count
int count
Definition: FileIOFactory.cpp:132

HashMap.h

Kernel.h

MemoryManager.h

slots
#define slots
Definition: PythonPlugin.cpp:25

SizeVector.h

Tensor.h

cloudViewer::core::Device
Definition: Device.h:18

cloudViewer::core::HashData
Definition: HashMap.cpp:27

cloudViewer::core::HashData::vals_
std::vector< V > vals_
Definition: HashMap.cpp:49

cloudViewer::core::HashData::keys_
std::vector< K > keys_
Definition: HashMap.cpp:48

cloudViewer::core::HashData::HashData
HashData(int count, int slots)
Definition: HashMap.cpp:29

cloudViewer::core::HashData::k_factor_
const int k_factor_
Definition: HashMap.cpp:47

cloudViewer::core::HashMap
Definition: HashMap.h:23

cloudViewer::core::Int3
Definition: HashMap.cpp:254

cloudViewer::core::Int3::Int3
Int3(int k)
Definition: HashMap.cpp:257

cloudViewer::core::Int3::z_
int z_
Definition: HashMap.cpp:263

cloudViewer::core::Int3::y_
int y_
Definition: HashMap.cpp:262

cloudViewer::core::Int3::x_
int x_
Definition: HashMap.cpp:261

cloudViewer::core::Int3::Int3
Int3()
Definition: HashMap.cpp:256

cloudViewer::core::Int3::operator==
bool operator==(const Int3 &other) const
Definition: HashMap.cpp:258

cloudViewer::core::Tensor
Definition: Tensor.h:32

LogError
#define LogError(...)
Definition: Logging.h:60

max
int max(int a, int b)
Definition: cutil_math.h:48

cloudViewer::core::cuda::Synchronize
void Synchronize()
Definition: CUDAUtils.cpp:59

cloudViewer::core::HashClearInt
void HashClearInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:156

cloudViewer::core::HashReserveInt3
void HashReserveInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:430

cloudViewer::core::HashFindInt
void HashFindInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:133

cloudViewer::core::HashBackendType
HashBackendType
Definition: HashMap.h:21

cloudViewer::core::HashClearInt3
void HashClearInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:378

cloudViewer::core::HashInsertInt3
void HashInsertInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:266

cloudViewer::core::HashReserveInt
void HashReserveInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:205

cloudViewer::core::HashEraseInt
void HashEraseInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:92

cloudViewer::core::HashFindInt3
void HashFindInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:353

cloudViewer::core::Int32
const Dtype Int32
Definition: Dtype.cpp:46

cloudViewer::core::HashInsertInt
void HashInsertInt(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:52

cloudViewer::core::HashEraseInt3
void HashEraseInt3(benchmark::State &state, int capacity, int duplicate_factor, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:309

cloudViewer
Generic file read and write utility for python interface.
Definition: AutoSegmentationTools.h:16