ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
Indexer.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <numeric>
11 
12 #ifdef BUILD_ISPC_MODULE
13 #include "Indexer_ispc.h"
14 #endif
15 
16 namespace cloudViewer {
17 namespace core {
18 
19 #ifdef BUILD_ISPC_MODULE
20 ispc::TensorRef TensorRef::ToISPC() const {
21  ispc::TensorRef ispc_tensor_ref;
22 
23  ispc_tensor_ref.data_ptr_ = data_ptr_;
24  ispc_tensor_ref.ndims_ = ndims_;
25  ispc_tensor_ref.dtype_byte_size_ = dtype_byte_size_;
26  for (int64_t i = 0; i < ndims_; ++i) {
27  ispc_tensor_ref.shape_[i] = shape_[i];
28  ispc_tensor_ref.byte_strides_[i] = byte_strides_[i];
29  }
30 
31  return ispc_tensor_ref;
32 }
33 #endif
34 
35 Indexer::Indexer(const std::vector<Tensor>& input_tensors,
36  const Tensor& output_tensor,
37  DtypePolicy dtype_policy,
38  const SizeVector& reduction_dims)
39  : Indexer(input_tensors,
40  std::vector<Tensor>{output_tensor},
41  dtype_policy,
42  reduction_dims) {}
43 
44 Indexer::Indexer(const std::vector<Tensor>& input_tensors,
45  const std::vector<Tensor>& output_tensors,
46  DtypePolicy dtype_policy,
47  const SizeVector& reduction_dims) {
48  // Check the number of inputs and outputs.
49  num_inputs_ = static_cast<int64_t>(input_tensors.size());
50  num_outputs_ = static_cast<int64_t>(output_tensors.size());
51  if (num_inputs_ < 1) {
52  utility::LogError("Indexer must have at least one input.");
53  }
54  if (num_inputs_ > MAX_INPUTS) {
56  "Indexer cannot have more than {} inputs, but got {}.",
58  }
59  if (num_outputs_ < 1) {
60  utility::LogError("Indexer must have at least one input.");
61  }
62  if (num_outputs_ > MAX_OUTPUTS) {
64  "Indexer cannot have more than {} outputs, but got {}.",
66  }
67 
68  // Check DtypePolicy.
69  if (dtype_policy == DtypePolicy::ALL_SAME) {
70  const Dtype ref_dtype = input_tensors[0].GetDtype();
71  for (const auto& input_tensor : input_tensors) {
72  if (input_tensor.GetDtype() != ref_dtype) {
73  utility::LogError("Dype mismatch {} != {}.",
74  input_tensor.GetDtype().ToString(),
75  ref_dtype.ToString());
76  }
77  }
78  for (const auto& output_tensor : output_tensors) {
79  if (output_tensor.GetDtype() != ref_dtype) {
80  utility::LogError("Dype mismatch {} != {}.",
81  output_tensor.GetDtype().ToString(),
82  ref_dtype.ToString());
83  }
84  }
85  } else if (dtype_policy == DtypePolicy::INPUT_SAME) {
86  const Dtype ref_dtype = input_tensors[0].GetDtype();
87  for (const auto& input_tensor : input_tensors) {
88  if (input_tensor.GetDtype() != ref_dtype) {
89  utility::LogError("Dype mismatch {} != {}.",
90  input_tensor.GetDtype().ToString(),
91  ref_dtype.ToString());
92  }
93  }
94  } else if (dtype_policy == DtypePolicy::INPUT_SAME_OUTPUT_BOOL) {
95  const Dtype ref_dtype = input_tensors[0].GetDtype();
96  for (const auto& input_tensor : input_tensors) {
97  if (input_tensor.GetDtype() != ref_dtype) {
98  utility::LogError("Dype mismatch {} != {}.",
99  input_tensor.GetDtype().ToString(),
100  ref_dtype.ToString());
101  }
102  }
103  for (const auto& output_tensor : output_tensors) {
104  if (output_tensor.GetDtype() != core::Bool) {
105  utility::LogError("Dype mismatch {} != {}.",
106  output_tensor.GetDtype().ToString(),
107  core::Bool.ToString());
108  }
109  }
110  } else if (dtype_policy == DtypePolicy::NONE) {
111  // Do nothing.
112  } else {
113  utility::LogError("Unimplemented dtype policy");
114  }
115 
116  // Convert to TensorRef.
117  for (int64_t i = 0; i < num_inputs_; ++i) {
118  inputs_[i] = TensorRef(input_tensors[i]);
119  }
120  for (int64_t i = 0; i < num_outputs_; ++i) {
121  outputs_[i] = TensorRef(output_tensors[i]);
122  }
123 
124  // For simplicity, all outputs must have the same shape.
125  SizeVector ref_output_shape = output_tensors[0].GetShape();
126  for (const auto& output_tensor : output_tensors) {
127  if (output_tensor.GetShape() != ref_output_shape) {
129  "For broadcast, all output shapes must be the same, "
130  "but {} != {}",
131  output_tensor.GetShape(), ref_output_shape);
132  }
133  }
134 
135  // Theoretically, reduction can be mixed with broadcasting. For
136  // simplicity, we require explicit broadcasting after reduction.
137  if (reduction_dims.size() > 0) {
138  if (num_inputs_ != 1) {
140  "Internal error: reduction op can only have 1 inputs.");
141  }
142 
143  for (int64_t i = 0; i < num_outputs_; ++i) {
144  // Sanity check. The indexer only handles keepdim == true.
145  // This also ensures that reduction is not mixed with broadcasting.
146  if (shape_util::ReductionShape(input_tensors[0].GetShape(),
147  reduction_dims, true) !=
148  output_tensors[i].GetShape()) {
150  "Reduction dimensions mismatch, input's shape {}, "
151  "reduction dims {}, output's shape {}.",
152  input_tensors[0].GetShape(), reduction_dims,
153  output_tensors[i].GetShape());
154  }
155 
156  // For each reduction dim, set the corresponding output strides to
157  // 0.
158  ReductionRestride(outputs_[i], inputs_[0].ndims_, inputs_[0].shape_,
159  reduction_dims);
160  }
161 
162  // ndims_ == inputs_[0].ndims_ == output_.ndims
163  ndims_ = inputs_[0].ndims_;
164 
165  // Permute reduction dimensions to front
166  ReorderDimensions(reduction_dims);
167 
168  // Fill global shape
169  for (int64_t i = 0; i < ndims_; ++i) {
170  primary_shape_[i] = inputs_[0].shape_[i];
171  }
172 
173  // Combine dimensions to reduce index computation.
175  } else {
176  // Broadcast inputs to match output shape, by resetting input's
177  // shape and strides.
178  // outputs_[0] is used since all outputs have the same shape.
179  for (int64_t i = 0; i < num_inputs_; ++i) {
181  outputs_[0].shape_);
182  }
183 
184  // Fill global shape.
185  // outputs_[0] is used since all outputs have the same shape.
186  ndims_ = outputs_[0].ndims_;
187  for (int64_t i = 0; i < ndims_; ++i) {
188  primary_shape_[i] = outputs_[0].shape_[i];
189  }
190  }
191 
192  // Fill global strides primary_strides_.
194 
196 }
197 
199  // 2^31 - 1 = 2147483647
200  int64_t max_value = std::numeric_limits<int32_t>::max();
201 
202  if (NumWorkloads() > max_value) {
203  return false;
204  }
205 
206  // Check inputs
207  for (int64_t i = 0; i < num_inputs_; i++) {
208  int64_t max_offset = 1;
209  for (int dim = 0; dim < ndims_; dim++) {
210  max_offset +=
211  (primary_shape_[dim] - 1) * inputs_[i].byte_strides_[dim];
212  }
213  if (max_offset > max_value) {
214  return false;
215  }
216  }
217 
218  // Check outputs
219  for (int64_t i = 0; i < num_outputs_; i++) {
220  int64_t max_offset = 1;
221  for (int dim = 0; dim < ndims_; dim++) {
222  max_offset +=
223  (primary_shape_[dim] - 1) * outputs_[i].byte_strides_[dim];
224  }
225 
226  if (max_offset > max_value) {
227  return false;
228  }
229  }
230 
231  return true;
232 }
233 
235  return IndexerIterator(*this);
236 }
237 
238 std::unique_ptr<Indexer> Indexer::SplitLargestDim() {
239  // Get the dimension to split.
240  if (ndims_ == 0) {
241  utility::LogError("Cannot split when ndims_ == 0");
242  return nullptr;
243  }
244  if (primary_shape_[ndims_ - 1] < 2) {
245  utility::LogError("primary_shape_[ndims_ - 1] = {} < 2, cannot split.",
246  primary_shape_[ndims_ - 1]);
247  return nullptr;
248  }
249  int64_t max_extent = -1;
250  int64_t dim_to_split = -1;
251  for (int64_t dim = ndims_ - 1; dim >= 0; dim--) {
252  int64_t size = primary_shape_[dim];
253 
254  // Inputs
255  for (int64_t i = 0; i < num_inputs_; i++) {
256  int64_t extent = (size - 1) * inputs_[i].byte_strides_[dim];
257  if (extent > max_extent) {
258  max_extent = extent;
259  dim_to_split = dim;
260  }
261  }
262 
263  // Outputs
264  for (int64_t i = 0; i < num_outputs_; i++) {
265  int64_t extent = (size - 1) * outputs_[i].byte_strides_[dim];
266  if (extent > max_extent) {
267  max_extent = extent;
268  dim_to_split = dim;
269  }
270  }
271  }
272  if (max_extent < 0) {
274  "Internal error: max_extent must be >= 0, but got {}.",
275  max_extent);
276  return nullptr;
277  }
278  if (!(dim_to_split >= 0 && dim_to_split < ndims_)) {
280  "Internal error: 0 <= dim_to_split < {} required, but got {}.",
281  ndims_, dim_to_split);
282  return nullptr;
283  }
284  if (primary_shape_[dim_to_split] < 2) {
286  "Internal error: cannot split dimension size {}, must be >= 2.",
287  primary_shape_[dim_to_split]);
288  return nullptr;
289  }
290 
291  std::unique_ptr<Indexer> copy(new Indexer(*this));
292  bool overlaps = IsReductionDim(dim_to_split);
293  auto copy_size = primary_shape_[dim_to_split] / 2;
294  auto this_size = primary_shape_[dim_to_split] - copy_size;
295  copy->ShrinkDim(dim_to_split, 0, copy_size);
296  copy->final_output_ &= !overlaps;
297  this->ShrinkDim(dim_to_split, copy_size, this_size);
298  this->accumulate_ |= overlaps;
299 
300  return copy;
301 }
302 
303 Indexer Indexer::GetPerOutputIndexer(int64_t output_idx) const {
304  // E.g. input_shape = (4, 3, 2), output_shape = (1, 3, 2), reduce_dim = 0.
305  // Then, output_idx = 0 -> inputs (*, 0, 0) -> offset_indices (0, 0, 0)
306  // output_idx = 1 -> inputs (*, 0, 1) -> offset_indices (0, 0, 1)
307  // output_idx = 2 -> inputs (*, 1, 0) -> offset_indices (0, 1, 0)
308  // output_idx = 3 -> inputs (*, 1, 1) -> offset_indices (0, 1, 1)
309  // output_idx = 4 -> inputs (*, 2, 0) -> offset_indices (0, 2, 0)
310  // output_idx = 5 -> inputs (*, 2, 1) -> offset_indices (0, 2, 1)
311  int64_t output_shape[MAX_DIMS] = {0};
312  int64_t output_default_strides[MAX_DIMS] = {0};
313  int64_t offset_indices[MAX_DIMS] = {0};
314 
315  for (int64_t i = 0; i < ndims_; ++i) {
316  if (IsReductionDim(i)) {
317  output_shape[i] = 1;
318  } else {
319  output_shape[i] = primary_shape_[i];
320  }
321  }
322  int64_t stride = 1;
323  for (int64_t i = ndims_ - 1; i >= 0; --i) {
324  output_default_strides[i] = stride;
325  // Handles 0-sized dimensions
326  stride = output_shape[i] > 1 ? stride * output_shape[i] : stride;
327  }
328  for (int64_t i = 0; i < ndims_; ++i) {
329  offset_indices[i] = output_idx / output_default_strides[i];
330  output_idx = output_idx % output_default_strides[i];
331  }
332 
333  Indexer sub_indexer = *this;
334  for (int64_t dim = 0; dim < sub_indexer.ndims_; ++dim) {
335  for (int64_t i = 0; i < sub_indexer.num_inputs_; ++i) {
336  sub_indexer.inputs_[i].data_ptr_ =
337  ((char*)sub_indexer.inputs_[i].data_ptr_) +
338  sub_indexer.inputs_[i].byte_strides_[dim] *
339  offset_indices[dim];
340  if (!sub_indexer.IsReductionDim(dim)) {
341  sub_indexer.inputs_[i].shape_[dim] = 1;
342  }
343  }
344  for (int64_t i = 0; i < sub_indexer.num_outputs_; ++i) {
345  sub_indexer.outputs_[i].data_ptr_ =
346  ((char*)sub_indexer.outputs_[i].data_ptr_) +
347  sub_indexer.outputs_[i].byte_strides_[dim] *
348  offset_indices[dim];
349  if (!sub_indexer.IsReductionDim(dim)) {
350  sub_indexer.outputs_[i].shape_[dim] = 1;
351  }
352  }
353  if (!sub_indexer.IsReductionDim(dim)) {
354  sub_indexer.GetPrimaryShape()[dim] = 1;
355  }
356  }
357  sub_indexer.UpdatePrimaryStrides();
358 
359  sub_indexer.UpdateContiguousFlags();
360 
361  return sub_indexer;
362 }
363 
364 void Indexer::ShrinkDim(int64_t dim, int64_t start, int64_t size) {
365  // inputs_ and output_'s shapes are not important.
366  if (!(dim >= 0 && dim < ndims_)) {
367  utility::LogError("0 <= dim < {} required, but got {}.", ndims_, dim);
368  return;
369  }
370  if (size <= 0) {
371  utility::LogError("Invalid size {}, must be > 0.", size);
372  return;
373  }
374  // Inputs
375  for (int64_t i = 0; i < num_inputs_; ++i) {
376  inputs_[i].data_ptr_ = static_cast<char*>(inputs_[i].data_ptr_) +
377  inputs_[i].byte_strides_[dim] * start;
378  }
379  // Outputs
380  for (int64_t i = 0; i < num_outputs_; ++i) {
381  outputs_[i].data_ptr_ = static_cast<char*>(outputs_[i].data_ptr_) +
382  outputs_[i].byte_strides_[dim] * start;
383  }
384 
385  primary_shape_[dim] = size;
387 
389 
390  if (size == 1) {
392  }
393 }
394 
395 int64_t Indexer::NumReductionDims() const {
396  // All outputs have the same shape, so it's okay to use outputs_[0].
397  int64_t count = 0;
398  for (int64_t dim = 0; dim < ndims_; dim++) {
399  if (outputs_[0].byte_strides_[dim] == 0) {
400  count++;
401  }
402  }
403  return count;
404 }
405 
406 int64_t Indexer::NumWorkloads() const {
407  int64_t num_workloads = 1;
408  for (int64_t i = 0; i < ndims_; ++i) {
409  num_workloads *= primary_shape_[i];
410  }
411  return num_workloads;
412 }
413 
414 int64_t Indexer::NumOutputElements() const {
415  // All outputs have the same shape, so it's okay to use outputs_[0].
416  int64_t num_output_elements = 1;
417  for (int64_t i = 0; i < ndims_; ++i) {
418  if (outputs_[0].byte_strides_[i] != 0 || primary_shape_[i] == 0) {
419  num_output_elements *= primary_shape_[i];
420  }
421  }
422  return num_output_elements;
423 }
424 
426  if (ndims_ <= 1) {
427  return;
428  }
429 
430  auto can_coalesce = [&](int64_t dim0, int64_t dim1) {
431  auto shape0 = primary_shape_[dim0];
432  auto shape1 = primary_shape_[dim1];
433  if (shape0 == 1 || shape1 == 1) {
434  return true;
435  }
436  for (int64_t i = 0; i < num_inputs_; i++) {
437  auto& stride = inputs_[i].byte_strides_;
438  if (shape0 * stride[dim0] != stride[dim1]) {
439  return false;
440  }
441  }
442  for (int64_t i = 0; i < num_outputs_; i++) {
443  auto& stride = outputs_[i].byte_strides_;
444  if (shape0 * stride[dim0] != stride[dim1]) {
445  return false;
446  }
447  }
448 
449  return true;
450  };
451 
452  // Replace each operands stride at dim0 with its stride at dim1.
453  auto replace_stride = [&](int64_t dim0, int64_t dim1) {
454  for (int64_t i = 0; i < num_inputs_; i++) {
455  inputs_[i].byte_strides_[dim0] = inputs_[i].byte_strides_[dim1];
456  }
457  for (int64_t i = 0; i < num_outputs_; i++) {
458  outputs_[i].byte_strides_[dim0] = outputs_[i].byte_strides_[dim1];
459  }
460  };
461 
462  int64_t prev_dim = 0;
463  for (int64_t dim = 1; dim < ndims_; dim++) {
464  if (can_coalesce(prev_dim, dim)) {
465  if (primary_shape_[prev_dim] == 1) {
466  replace_stride(prev_dim, dim);
467  }
468  primary_shape_[prev_dim] *= primary_shape_[dim];
469  } else {
470  prev_dim++;
471  if (prev_dim != dim) {
472  replace_stride(prev_dim, dim);
473  primary_shape_[prev_dim] = primary_shape_[dim];
474  }
475  }
476  }
477 
478  ndims_ = prev_dim + 1;
479  for (int64_t i = 0; i < num_inputs_; i++) {
480  inputs_[i].ndims_ = ndims_;
481  }
482  for (int64_t i = 0; i < num_outputs_; i++) {
483  outputs_[i].ndims_ = ndims_;
484  }
485 
487 
489 }
490 
491 void Indexer::ReorderDimensions(const SizeVector& reduction_dims) {
492  if (ndims_ == 1) {
493  return;
494  }
495 
496  SizeVector permute(ndims_);
497  std::iota(permute.rbegin(), permute.rend(), 0);
498 
499  // Returns -1 / 0 / 1 indicates no_swap / tbd / swap dim0 with dim1.
500  auto ShouldSwap = [&](size_t dim0, size_t dim1) {
501  // Outputs
502  for (int64_t i = 0; i < num_outputs_; i++) {
503  int64_t stride0 = outputs_[i].byte_strides_[dim0];
504  int64_t stride1 = outputs_[i].byte_strides_[dim1];
505  if (stride0 == 0 && stride1 != 0) {
506  return -1;
507  } else if (stride1 == 0 && stride0 != 0) {
508  return 1;
509  } else if (stride0 != 0 && stride1 != 0) {
510  if (stride0 <= stride1) {
511  return -1;
512  } else {
513  return 1;
514  }
515  }
516  }
517 
518  // Inputs
519  for (int64_t i = 0; i < num_inputs_; i++) {
520  int64_t stride0 = inputs_[i].byte_strides_[dim0];
521  int64_t stride1 = inputs_[i].byte_strides_[dim1];
522  if (stride0 == 0 || stride1 == 0) {
523  continue;
524  } else if (stride0 <= stride1) {
525  return -1;
526  } else {
527  return 1;
528  }
529  }
530 
531  return 0;
532  };
533 
534  // Insertion sort with support for ambiguous comparisons
535  for (int i = 1; i < ndims_; i++) {
536  int dim1 = i;
537  for (int dim0 = i - 1; dim0 >= 0; dim0--) {
538  int comparison = ShouldSwap(permute[dim0], permute[dim1]);
539  if (comparison > 0) {
540  std::swap(permute[dim0], permute[dim1]);
541  dim1 = dim0;
542  } else if (comparison < 0) {
543  break;
544  }
545  }
546  }
547 
548  for (int64_t i = 0; i < num_inputs_; i++) {
549  inputs_[i].Permute(permute);
550  }
551  for (int64_t i = 0; i < num_outputs_; i++) {
552  outputs_[i].Permute(permute);
553  }
554 }
555 
557  int64_t stride = 1;
558  for (int64_t i = ndims_ - 1; i >= 0; --i) {
560  // Handles 0-sized dimensions
562  }
563 }
564 
566  for (int64_t i = 0; i < num_inputs_; ++i) {
568  }
569 
570  for (int64_t i = 0; i < num_outputs_; ++i) {
572  }
573 }
574 
576  int64_t dst_ndims,
577  const int64_t* dst_shape) {
578  int64_t src_ndims = src.ndims_;
579 
580  // Fill omitted dimensions.
581  int64_t ndims_omitted = dst_ndims - src_ndims;
582  for (int64_t i = src_ndims - 1; i >= 0; --i) {
583  src.shape_[ndims_omitted + i] = src.shape_[i];
584  src.byte_strides_[ndims_omitted + i] = src.byte_strides_[i];
585  }
586  for (int64_t i = 0; i < ndims_omitted; ++i) {
587  src.shape_[i] = 1;
588  src.byte_strides_[i] = 0;
589  }
590  src.ndims_ = dst_ndims;
591 
592  // Fill broadcasted dimensions.
593  for (int64_t i = 0; i < dst_ndims; ++i) {
594  // It is okay if src.shape_[i] != 1 && dst.shape[i] == 1 for
595  // reduction.
596  if (src.shape_[i] == 1 && dst_shape[i] != 1) {
597  src.byte_strides_[i] = 0;
598  }
599  }
600 }
601 
603  int64_t src_ndims,
604  const int64_t* src_shape,
605  const SizeVector& reduction_dims) {
606  if (dst.ndims_ != src_ndims) {
607  utility::LogError("Internal error, src ndims {} != dst ndims {}",
608  src_ndims, dst.ndims_);
609  }
610  for (int64_t i = 0; i < dst.ndims_; ++i) {
611  if (dst.shape_[i] == 1 && src_shape[i] != 1) {
612  dst.byte_strides_[i] = 0;
613  }
614  }
615 }
616 
617 #ifdef BUILD_ISPC_MODULE
618 ispc::Indexer Indexer::ToISPC() const {
619  ispc::Indexer ispc_indexer;
620 
621  ispc_indexer.num_inputs_ = NumInputs();
622  ispc_indexer.num_outputs_ = NumOutputs();
623  for (int64_t i = 0; i < NumInputs(); ++i) {
624  ispc_indexer.inputs_[i] = GetInput(i).ToISPC();
625  ispc_indexer.inputs_contiguous_[i] = GetInput(i).IsContiguous();
626  }
627  for (int64_t i = 0; i < NumOutputs(); ++i) {
628  ispc_indexer.outputs_[i] = GetOutput(i).ToISPC();
629  ispc_indexer.outputs_contiguous_[i] = GetOutput(i).IsContiguous();
630  }
631  for (int64_t i = 0; i < NumDims(); ++i) {
632  ispc_indexer.primary_shape_[i] = GetPrimaryShape()[i];
633  ispc_indexer.primary_strides_[i] = GetPrimaryStrides()[i];
634  }
635  ispc_indexer.ndims_ = NumDims();
636 
637  return ispc_indexer;
638 }
639 #endif
640 
642 
644  vec_.emplace_back(new Indexer(indexer));
645  vec_.emplace_back(nullptr);
646  ++(*this);
647 }
648 
649 Indexer& IndexerIterator::Iterator::operator*() const { return *vec_.back(); }
650 
652  vec_.pop_back();
653  while (!vec_.empty() && !vec_.back()->CanUse32BitIndexing()) {
654  auto& indexer = *vec_.back();
655  vec_.emplace_back(indexer.SplitLargestDim());
656  }
657  return *this;
658 }
659 
661  return this == &other || (vec_.empty() && other.vec_.empty());
662 }
664  return !(*this == other);
665 }
666 
668  return IndexerIterator::Iterator(indexer_);
669 }
670 
672  return IndexerIterator::Iterator();
673 }
674 
675 } // namespace core
676 } // namespace cloudViewer
Indexer indexer
int size
int count
size_t stride
bool copy
Definition: VtkUtils.cpp:74
std::string ToString() const
Definition: Dtype.h:65
IndexerIterator(const Indexer &indexer)
Definition: Indexer.cpp:641
void ReorderDimensions(const SizeVector &reduction_dims)
Definition: Indexer.cpp:491
std::unique_ptr< Indexer > SplitLargestDim()
Definition: Indexer.cpp:238
int64_t ndims_
Indexer's global number of dimensions.
Definition: Indexer.h:628
bool outputs_contiguous_[MAX_OUTPUTS]
Array of contiguous flags for all output TensorRefs.
Definition: Indexer.h:608
const int64_t * GetPrimaryShape() const
Definition: Indexer.h:316
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:312
const int64_t * GetPrimaryStrides() const
Definition: Indexer.h:321
int64_t num_inputs_
Number of input and output Tensors.
Definition: Indexer.h:595
void ShrinkDim(int64_t dim, int64_t start, int64_t size)
Definition: Indexer.cpp:364
int64_t primary_shape_[MAX_DIMS]
Definition: Indexer.h:621
TensorRef & GetOutput()
Definition: Indexer.h:378
void UpdateContiguousFlags()
Update input_contiguous_ and output_contiguous_.
Definition: Indexer.cpp:565
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:394
TensorRef inputs_[MAX_INPUTS]
Array of input TensorRefs.
Definition: Indexer.h:599
TensorRef outputs_[MAX_OUTPUTS]
Array of output TensorRefs.
Definition: Indexer.h:602
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:339
int64_t NumReductionDims() const
Returns the number of reduction dimensions.
Definition: Indexer.cpp:395
bool CanUse32BitIndexing() const
Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1.
Definition: Indexer.cpp:198
static void ReductionRestride(TensorRef &dst, int64_t src_ndims, const int64_t *src_shape, const SizeVector &reduction_dims)
Definition: Indexer.cpp:602
Indexer GetPerOutputIndexer(int64_t output_idx) const
Definition: Indexer.cpp:303
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:345
int64_t NumWorkloads() const
Definition: Indexer.cpp:406
int64_t NumOutputElements() const
Returns the number of output elements.
Definition: Indexer.cpp:414
int64_t NumOutputs() const
Number of output Tensors.
Definition: Indexer.h:342
static void BroadcastRestride(TensorRef &src, int64_t dst_ndims, const int64_t *dst_shape)
Definition: Indexer.cpp:575
bool inputs_contiguous_[MAX_INPUTS]
Array of contiguous flags for all input TensorRefs.
Definition: Indexer.h:605
int64_t primary_strides_[MAX_DIMS]
Definition: Indexer.h:625
IndexerIterator SplitTo32BitIndexing() const
Definition: Indexer.cpp:234
void UpdatePrimaryStrides()
Update primary_strides_ based on primary_shape_.
Definition: Indexer.cpp:556
#define LogError(...)
Definition: Logging.h:60
int max(int a, int b)
Definition: cutil_math.h:48
SizeVector ReductionShape(const SizeVector &src_shape, const SizeVector &dims, bool keepdim)
Returns the shape after reduction.
Definition: ShapeUtil.cpp:99
static constexpr int64_t MAX_OUTPUTS
Definition: Indexer.h:46
static constexpr int64_t MAX_DIMS
Definition: Indexer.h:38
const Dtype Bool
Definition: Dtype.cpp:52
static constexpr int64_t MAX_INPUTS
Definition: Indexer.h:42
Generic file read and write utility for python interface.
Definition: Eigen.h:85
void swap(cloudViewer::core::SmallVectorImpl< T > &LHS, cloudViewer::core::SmallVectorImpl< T > &RHS)
Implement std::swap in terms of SmallVector swap.
Definition: SmallVector.h:1370
bool operator==(const Iterator &other) const
Definition: Indexer.cpp:660
bool operator!=(const Iterator &other) const
Definition: Indexer.cpp:663
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:652
A minimalistic class that reference a Tensor.
Definition: Indexer.h:106
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:197
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:196
bool IsContiguous() const
Returns True if the underlying memory buffer is contiguous.
Definition: Indexer.h:164
void Permute(const SizeVector &dims)
Permute (dimension shuffle) the reference to a Tensor.
Definition: Indexer.h:132