ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
HashMap.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <Helper.h>
11 #include <Logging.h>
12 
16 
17 namespace cloudViewer {
18 namespace core {
19 
20 HashMap::HashMap(int64_t init_capacity,
21  const Dtype& key_dtype,
22  const SizeVector& key_element_shape,
23  const Dtype& value_dtype,
24  const SizeVector& value_element_shape,
25  const Device& device,
26  const HashBackendType& backend)
27  : key_dtype_(key_dtype),
28  key_element_shape_(key_element_shape),
29  dtypes_value_({value_dtype}),
30  element_shapes_value_({value_element_shape}) {
31  Init(init_capacity, device, backend);
32 }
33 
34 HashMap::HashMap(int64_t init_capacity,
35  const Dtype& key_dtype,
36  const SizeVector& key_element_shape,
37  const std::vector<Dtype>& dtypes_value,
38  const std::vector<SizeVector>& element_shapes_value,
39  const Device& device,
40  const HashBackendType& backend)
41  : key_dtype_(key_dtype),
42  key_element_shape_(key_element_shape),
43  dtypes_value_(dtypes_value),
44  element_shapes_value_(element_shapes_value) {
45  Init(init_capacity, device, backend);
46 }
47 
48 void HashMap::Reserve(int64_t capacity) {
49  int64_t count = Size();
50  if (capacity <= count) {
51  utility::LogDebug("Target capacity smaller then current size, abort.");
52  return;
53  }
54 
55  Tensor active_keys;
56  std::vector<Tensor> active_values;
57 
58  if (count > 0) {
59  Tensor active_buf_indices = GetActiveIndices();
60  Tensor active_indices = active_buf_indices.To(core::Int64);
61 
62  active_keys = GetKeyTensor().IndexGet({active_indices});
63  auto value_buffers = GetValueTensors();
64  for (auto& value_buffer : value_buffers) {
65  active_values.emplace_back(value_buffer.IndexGet({active_indices}));
66  }
67  }
68 
69  device_hashmap_->Free();
70  device_hashmap_->Allocate(capacity);
71  device_hashmap_->Reserve(capacity);
72 
73  if (count > 0) {
74  Tensor output_buf_indices, output_masks;
75  InsertImpl(active_keys, active_values, output_buf_indices,
76  output_masks);
77  }
78 }
79 
80 std::pair<Tensor, Tensor> HashMap::Insert(const Tensor& input_keys,
81  const Tensor& input_values) {
82  Tensor output_buf_indices, output_masks;
83  Insert(input_keys, input_values, output_buf_indices, output_masks);
84  return std::make_pair(output_buf_indices, output_masks);
85 }
86 
87 std::pair<Tensor, Tensor> HashMap::Insert(
88  const Tensor& input_keys, const std::vector<Tensor>& input_values_soa) {
89  Tensor output_buf_indices, output_masks;
90  Insert(input_keys, input_values_soa, output_buf_indices, output_masks);
91  return std::make_pair(output_buf_indices, output_masks);
92 }
93 
94 std::pair<Tensor, Tensor> HashMap::Activate(const Tensor& input_keys) {
95  Tensor output_buf_indices, output_masks;
96  Activate(input_keys, output_buf_indices, output_masks);
97  return std::make_pair(output_buf_indices, output_masks);
98 }
99 
100 std::pair<Tensor, Tensor> HashMap::Find(const Tensor& input_keys) {
101  Tensor output_buf_indices, output_masks;
102  Find(input_keys, output_buf_indices, output_masks);
103  return std::make_pair(output_buf_indices, output_masks);
104 }
105 
106 Tensor HashMap::Erase(const Tensor& input_keys) {
107  Tensor output_masks;
108  Erase(input_keys, output_masks);
109  return output_masks;
110 }
111 
113  Tensor output_buf_indices;
114  GetActiveIndices(output_buf_indices);
115  return output_buf_indices;
116 }
117 
118 void HashMap::InsertImpl(const Tensor& input_keys,
119  const std::vector<Tensor>& input_values_soa,
120  Tensor& output_buf_indices,
121  Tensor& output_masks,
122  bool is_activate_op) {
123  CheckKeyCompatibility(input_keys);
124  if (!is_activate_op) {
125  CheckKeyValueLengthCompatibility(input_keys, input_values_soa);
126  CheckValueCompatibility(input_values_soa);
127  }
128 
129  int64_t length = input_keys.GetLength();
130  PrepareIndicesOutput(output_buf_indices, length);
131  PrepareMasksOutput(output_masks, length);
132 
133  std::vector<const void*> input_values_ptrs;
134  for (const auto& input_value : input_values_soa) {
135  input_values_ptrs.push_back(input_value.GetDataPtr());
136  }
137 
138  device_hashmap_->Insert(
139  input_keys.GetDataPtr(), input_values_ptrs,
140  static_cast<buf_index_t*>(output_buf_indices.GetDataPtr()),
141  output_masks.GetDataPtr<bool>(), length);
142 }
143 
144 void HashMap::Insert(const Tensor& input_keys,
145  const Tensor& input_values,
146  Tensor& output_buf_indices,
147  Tensor& output_masks) {
148  Insert(input_keys, std::vector<Tensor>{input_values}, output_buf_indices,
149  output_masks);
150 }
151 
152 void HashMap::Insert(const Tensor& input_keys,
153  const std::vector<Tensor>& input_values_soa,
154  Tensor& output_buf_indices,
155  Tensor& output_masks) {
156  int64_t length = input_keys.GetLength();
157  int64_t new_size = Size() + length;
158  int64_t capacity = GetCapacity();
159 
160  if (new_size > capacity) {
161  Reserve(std::max(new_size, capacity * 2));
162  }
163  InsertImpl(input_keys, input_values_soa, output_buf_indices, output_masks);
164 }
165 
166 void HashMap::Activate(const Tensor& input_keys,
167  Tensor& output_buf_indices,
168  Tensor& output_masks) {
169  int64_t length = input_keys.GetLength();
170  int64_t new_size = Size() + length;
171  int64_t capacity = GetCapacity();
172 
173  if (new_size > capacity) {
174  Reserve(std::max(new_size, capacity * 2));
175  }
176 
177  std::vector<Tensor> null_tensors_soa;
178  InsertImpl(input_keys, null_tensors_soa, output_buf_indices, output_masks,
179  /* is_activate_op */ true);
180 }
181 
182 void HashMap::Find(const Tensor& input_keys,
183  Tensor& output_buf_indices,
184  Tensor& output_masks) {
185  CheckKeyLength(input_keys);
186  CheckKeyCompatibility(input_keys);
187 
188  int64_t length = input_keys.GetLength();
189  PrepareIndicesOutput(output_buf_indices, length);
190  PrepareMasksOutput(output_masks, length);
191 
192  device_hashmap_->Find(
193  input_keys.GetDataPtr(),
194  static_cast<buf_index_t*>(output_buf_indices.GetDataPtr()),
195  output_masks.GetDataPtr<bool>(), length);
196 }
197 
198 void HashMap::Erase(const Tensor& input_keys, Tensor& output_masks) {
199  CheckKeyLength(input_keys);
200  CheckKeyCompatibility(input_keys);
201 
202  int64_t length = input_keys.GetLength();
203  PrepareMasksOutput(output_masks, length);
204 
205  device_hashmap_->Erase(input_keys.GetDataPtr(),
206  output_masks.GetDataPtr<bool>(), length);
207 }
208 
209 void HashMap::GetActiveIndices(Tensor& output_buf_indices) const {
210  int64_t length = device_hashmap_->Size();
211  PrepareIndicesOutput(output_buf_indices, length);
212 
213  device_hashmap_->GetActiveIndices(
214  static_cast<buf_index_t*>(output_buf_indices.GetDataPtr()));
215 }
216 
217 void HashMap::Clear() { device_hashmap_->Clear(); }
218 
219 void HashMap::Save(const std::string& file_name) {
220  t::io::WriteHashMap(file_name, *this);
221 }
222 
223 HashMap HashMap::Load(const std::string& file_name) {
224  return t::io::ReadHashMap(file_name);
225 }
226 
227 HashMap HashMap::Clone() const { return To(GetDevice(), /*copy=*/true); }
228 
229 HashMap HashMap::To(const Device& device, bool copy) const {
230  if (!copy && GetDevice() == device) {
231  return *this;
232  }
233 
234  Tensor keys = GetKeyTensor();
235  std::vector<Tensor> values = GetValueTensors();
236 
237  Tensor active_buf_indices_i32;
238  GetActiveIndices(active_buf_indices_i32);
239  Tensor active_indices = active_buf_indices_i32.To(core::Int64);
240 
241  Tensor active_keys = keys.IndexGet({active_indices}).To(device);
242  std::vector<Tensor> soa_active_values;
243  for (const auto& value : values) {
244  soa_active_values.push_back(
245  value.IndexGet({active_indices}).To(device));
246  }
247 
248  HashMap new_hashmap(GetCapacity(), key_dtype_, key_element_shape_,
249  dtypes_value_, element_shapes_value_, device);
250  Tensor buf_indices, masks;
251  new_hashmap.Insert(active_keys, soa_active_values, buf_indices, masks);
252 
253  return new_hashmap;
254 }
255 
256 int64_t HashMap::Size() const { return device_hashmap_->Size(); }
257 
258 int64_t HashMap::GetCapacity() const { return device_hashmap_->GetCapacity(); }
259 
260 int64_t HashMap::GetBucketCount() const {
261  return device_hashmap_->GetBucketCount();
262 }
263 
264 Device HashMap::GetDevice() const { return device_hashmap_->GetDevice(); }
265 
267  int64_t capacity = GetCapacity();
268  SizeVector key_shape = key_element_shape_;
269  key_shape.insert(key_shape.begin(), capacity);
270  return Tensor(key_shape, shape_util::DefaultStrides(key_shape),
271  device_hashmap_->GetKeyBuffer().GetDataPtr(), key_dtype_,
272  device_hashmap_->GetKeyBuffer().GetBlob());
273 }
274 
275 std::vector<Tensor> HashMap::GetValueTensors() const {
276  int64_t capacity = GetCapacity();
277 
278  std::vector<Tensor> value_buffers = device_hashmap_->GetValueBuffers();
279 
280  std::vector<Tensor> soa_value_tensor;
281  for (size_t i = 0; i < element_shapes_value_.size(); ++i) {
282  SizeVector value_shape = element_shapes_value_[i];
283  value_shape.insert(value_shape.begin(), capacity);
284 
285  Dtype value_dtype = dtypes_value_[i];
286  soa_value_tensor.push_back(
287  Tensor(value_shape, shape_util::DefaultStrides(value_shape),
288  value_buffers[i].GetDataPtr(), value_dtype,
289  value_buffers[i].GetBlob()));
290  }
291  return soa_value_tensor;
292 }
293 
295  int64_t capacity = GetCapacity();
296 
297  if (i >= dtypes_value_.size()) {
298  utility::LogError("Value index ({}) out of bound (>= {})", i,
299  dtypes_value_.size());
300  }
301 
302  Tensor value_buffer = device_hashmap_->GetValueBuffer(i);
303 
304  SizeVector value_shape = element_shapes_value_[i];
305  value_shape.insert(value_shape.begin(), capacity);
306 
307  Dtype value_dtype = dtypes_value_[i];
308  return Tensor(value_shape, shape_util::DefaultStrides(value_shape),
309  value_buffer.GetDataPtr(), value_dtype,
310  value_buffer.GetBlob());
311 }
312 
313 std::vector<int64_t> HashMap::BucketSizes() const {
314  return device_hashmap_->BucketSizes();
315 };
316 
317 float HashMap::LoadFactor() const { return device_hashmap_->LoadFactor(); }
318 
319 void HashMap::Init(int64_t init_capacity,
320  const Device& device,
321  const HashBackendType& backend) {
322  // Key check
323  if (key_dtype_.GetDtypeCode() == Dtype::DtypeCode::Undefined) {
324  utility::LogError("Undefined key dtype is not allowed.");
325  }
326  if (key_element_shape_.NumElements() == 0) {
328  "Key element shape must contain at least 1 element, "
329  "but got 0.");
330  }
331 
332  // Value check
333  if (dtypes_value_.size() != element_shapes_value_.size()) {
335  "Size of value_dtype ({}) mismatches with size of "
336  "element_shapes_value ({}).",
337  dtypes_value_.size(), element_shapes_value_.size());
338  }
339  for (const auto& value_dtype : dtypes_value_) {
340  if (value_dtype.GetDtypeCode() == Dtype::DtypeCode::Undefined) {
341  utility::LogError("Undefined value dtype is not allowed.");
342  }
343  }
344  for (const auto& value_element_shape : element_shapes_value_) {
345  if (value_element_shape.NumElements() == 0) {
347  "Value element shape must contain at least 1 "
348  "element, but got 0.");
349  }
350  }
351 
352  device_hashmap_ = CreateDeviceHashBackend(
353  init_capacity, key_dtype_, key_element_shape_, dtypes_value_,
354  element_shapes_value_, device, backend);
355 }
356 
357 void HashMap::CheckKeyLength(const Tensor& input_keys) const {
358  int64_t key_len = input_keys.GetLength();
359  if (key_len == 0) {
360  utility::LogError("Input number of keys should > 0, but got 0.");
361  }
362 }
363 
365  const Tensor& input_keys,
366  const std::vector<Tensor>& input_values_soa) const {
367  int64_t key_len = input_keys.GetLength();
368  if (key_len == 0) {
369  utility::LogError("Input number of keys should > 0, but got 0.");
370  }
371  for (size_t i = 0; i < input_values_soa.size(); ++i) {
372  Tensor input_value = input_values_soa[i];
373  if (input_value.GetLength() != key_len) {
375  "Input number of values at {} mismatch with number of "
376  "keys "
377  "{}",
378  key_len, input_value.GetLength());
379  }
380  }
381 }
382 
383 void HashMap::CheckKeyCompatibility(const Tensor& input_keys) const {
384  SizeVector input_key_elem_shape(input_keys.GetShape());
385  input_key_elem_shape.erase(input_key_elem_shape.begin());
386 
387  int64_t input_key_elem_bytesize = input_key_elem_shape.NumElements() *
388  input_keys.GetDtype().ByteSize();
389  int64_t stored_key_elem_bytesize =
390  key_element_shape_.NumElements() * key_dtype_.ByteSize();
391  if (input_key_elem_bytesize != stored_key_elem_bytesize) {
393  "Input key element bytesize ({}) mismatch with stored ({})",
394  input_key_elem_bytesize, stored_key_elem_bytesize);
395  }
396 }
397 
399  const std::vector<Tensor>& input_values_soa) const {
400  if (input_values_soa.size() != element_shapes_value_.size()) {
402  "Input number of value arrays ({}) mismatches with stored "
403  "({})",
404  input_values_soa.size(), element_shapes_value_.size());
405  }
406 
407  for (size_t i = 0; i < input_values_soa.size(); ++i) {
408  Tensor input_value = input_values_soa[i];
409  SizeVector input_value_i_elem_shape(input_value.GetShape());
410  input_value_i_elem_shape.erase(input_value_i_elem_shape.begin());
411 
412  int64_t input_value_i_elem_bytesize =
413  input_value_i_elem_shape.NumElements() *
414  input_value.GetDtype().ByteSize();
415 
416  int64_t stored_value_i_elem_bytesize =
417  element_shapes_value_[i].NumElements() *
418  dtypes_value_[i].ByteSize();
419  if (input_value_i_elem_bytesize != stored_value_i_elem_bytesize) {
421  "Input value[{}] element bytesize ({}) mismatch with "
422  "stored ({})",
423  i, input_value_i_elem_bytesize,
424  stored_value_i_elem_bytesize);
425  }
426  }
427 }
428 
429 void HashMap::PrepareIndicesOutput(Tensor& output_buf_indices,
430  int64_t length) const {
431  if (output_buf_indices.GetLength() != length ||
432  output_buf_indices.GetDtype() != core::Int32 ||
433  output_buf_indices.GetDevice() != GetDevice()) {
434  output_buf_indices = Tensor({length}, core::Int32, GetDevice());
435  }
436 }
437 
438 void HashMap::PrepareMasksOutput(Tensor& output_masks, int64_t length) const {
439  if (output_masks.GetLength() != length ||
440  output_masks.GetDtype() != core::Bool ||
441  output_masks.GetDevice() != GetDevice()) {
442  output_masks = Tensor({length}, core::Bool, GetDevice());
443  }
444 }
445 
446 } // namespace core
447 } // namespace cloudViewer
int count
bool copy
Definition: VtkUtils.cpp:74
int64_t ByteSize() const
Definition: Dtype.h:59
DtypeCode GetDtypeCode() const
Definition: Dtype.h:61
void PrepareMasksOutput(Tensor &output_masks, int64_t length) const
Definition: HashMap.cpp:438
std::vector< int64_t > BucketSizes() const
Return number of elements per bucket.
Definition: HashMap.cpp:313
void PrepareIndicesOutput(Tensor &output_buf_indices, int64_t length) const
Definition: HashMap.cpp:429
HashMap(int64_t init_capacity, const Dtype &key_dtype, const SizeVector &key_element_shape, const Dtype &value_dtype, const SizeVector &value_element_shapes, const Device &device, const HashBackendType &backend=HashBackendType::Default)
Initialize a hash map given a key and a value dtype and element shape.
Definition: HashMap.cpp:20
std::pair< Tensor, Tensor > Activate(const Tensor &input_keys)
Definition: HashMap.cpp:94
void InsertImpl(const Tensor &input_keys, const std::vector< Tensor > &input_values_soa, Tensor &output_buf_indices, Tensor &output_masks, bool is_activate_op=false)
Definition: HashMap.cpp:118
std::pair< Tensor, Tensor > Find(const Tensor &input_keys)
Definition: HashMap.cpp:100
void CheckKeyCompatibility(const Tensor &input_keys) const
Definition: HashMap.cpp:383
HashMap To(const Device &device, bool copy=false) const
Convert the hash map to another device.
Definition: HashMap.cpp:229
std::vector< Tensor > GetValueTensors() const
Definition: HashMap.cpp:275
void CheckKeyValueLengthCompatibility(const Tensor &input_keys, const std::vector< Tensor > &input_values_soa) const
Definition: HashMap.cpp:364
void Clear()
Clear stored map without reallocating the buffers.
Definition: HashMap.cpp:217
int64_t GetBucketCount() const
Get the number of buckets of the internal hash map.
Definition: HashMap.cpp:260
Tensor Erase(const Tensor &input_keys)
Definition: HashMap.cpp:106
void CheckKeyLength(const Tensor &input_keys) const
Definition: HashMap.cpp:357
static HashMap Load(const std::string &file_name)
Definition: HashMap.cpp:223
void Save(const std::string &file_name)
Definition: HashMap.cpp:219
void Init(int64_t init_capacity, const Device &device, const HashBackendType &backend)
Definition: HashMap.cpp:319
int64_t GetCapacity() const
Get the capacity of the hash map.
Definition: HashMap.cpp:258
std::pair< Tensor, Tensor > Insert(const Tensor &input_keys, const Tensor &input_values)
Definition: HashMap.cpp:80
void Reserve(int64_t capacity)
Reserve the internal hash map with the given capacity by rehashing.
Definition: HashMap.cpp:48
Tensor GetActiveIndices() const
Definition: HashMap.cpp:112
Tensor GetKeyTensor() const
Definition: HashMap.cpp:266
void CheckValueCompatibility(const std::vector< Tensor > &input_values_soa) const
Definition: HashMap.cpp:398
float LoadFactor() const
Return size / bucket_count.
Definition: HashMap.cpp:317
Device GetDevice() const override
Get the device of the hash map.
Definition: HashMap.cpp:264
int64_t Size() const
Get the size (number of active entries) of the hash map.
Definition: HashMap.cpp:256
Tensor GetValueTensor(size_t index=0) const
Definition: HashMap.cpp:294
HashMap Clone() const
Clone the hash map with buffers.
Definition: HashMap.cpp:227
iterator erase(const_iterator CI)
Definition: SmallVector.h:779
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:853
int64_t GetLength() const
Definition: Tensor.h:1125
Dtype GetDtype() const
Definition: Tensor.h:1164
Tensor IndexGet(const std::vector< Tensor > &index_tensors) const
Advanced indexing getter. This will always allocate a new Tensor.
Definition: Tensor.cpp:905
Device GetDevice() const override
Definition: Tensor.cpp:1435
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739
std::shared_ptr< Blob > GetBlob() const
Definition: Tensor.h:1168
#define LogError(...)
Definition: Logging.h:60
#define LogDebug(...)
Definition: Logging.h:90
__host__ __device__ float length(float2 v)
Definition: cutil_math.h:1162
int max(int a, int b)
Definition: cutil_math.h:48
Helper functions for the ml ops.
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition: ShapeUtil.cpp:214
std::shared_ptr< DeviceHashBackend > CreateDeviceHashBackend(int64_t init_capacity, const Dtype &key_dtype, const SizeVector &key_element_shape, const std::vector< Dtype > &value_dtypes, const std::vector< SizeVector > &value_element_shapes, const Device &device, const HashBackendType &backend)
const Dtype Bool
Definition: Dtype.cpp:52
const Dtype Int64
Definition: Dtype.cpp:47
CLOUDVIEWER_HOST_DEVICE Pair< First, Second > make_pair(const First &_first, const Second &_second)
Definition: SlabTraits.h:49
const Dtype Int32
Definition: Dtype.cpp:46
core::HashMap ReadHashMap(const std::string &file_name)
Definition: HashMapIO.cpp:48
void WriteHashMap(const std::string &file_name, const core::HashMap &hashmap)
Definition: HashMapIO.cpp:17
Generic file read and write utility for python interface.