ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
NonZeroSYCL.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <Logging.h>
9 
10 #include <numeric>
11 #include <oneapi/dpl/algorithm>
12 #include <oneapi/dpl/execution>
13 
17 
18 namespace cloudViewer {
19 namespace core {
20 namespace kernel {
21 
22 Tensor NonZeroSYCL(const Tensor& src) {
23  // Get flattened non-zero indices.
24  TensorIterator src_iter(src);
25  const int64_t num_elements = src.NumElements();
26  auto device = src.GetDevice();
27  Tensor indices = Tensor::Arange(0, num_elements, 1, core::Int64, device);
28  Tensor non_zero_indices(SizeVector({num_elements}), Int64, device);
29  int64_t *non_zero_indices_ptr = non_zero_indices.GetDataPtr<int64_t>(),
30  *indices_ptr = indices.GetDataPtr<int64_t>();
31  size_t num_non_zeros;
33  auto it = std::copy_if(
34  oneapi::dpl::execution::dpcpp_default, indices_ptr,
35  indices_ptr + num_elements, non_zero_indices_ptr,
36  [src_iter](int64_t index) {
37  auto src_ptr = static_cast<const scalar_t*>(
38  src_iter.GetPtr(index));
39  CLOUDVIEWER_ASSERT(src_ptr != nullptr && "Internal error.");
40  return *src_ptr != 0;
41  });
42  num_non_zeros = std::distance(non_zero_indices_ptr, it);
43  });
44 
45  // Transform flattened indices to indices in each dimension.
46  const auto num_dims = src.NumDims();
47  SizeVector shape = src.GetShape();
48  // MAX_DIMS: Maximum number of dimensions of TensorRef, defined in
49  // Indexer.h.
50  sycl::marray<int64_t, MAX_DIMS> shape_vec; // device copyable
51  if (shape.size() > MAX_DIMS) {
52  utility::LogError("Too many dimensions: {} > MAX_DIMS={}.",
53  shape.size(), MAX_DIMS);
54  }
55  for (auto k = 0; k < num_dims; ++k) shape_vec[k] = shape[k];
56  Tensor result({num_dims, static_cast<int64_t>(num_non_zeros)}, Int64,
57  device);
58  int64_t* result_ptr = result.GetDataPtr<int64_t>();
59  auto queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
60 
61  queue.parallel_for(num_non_zeros, [=](int64_t i) {
62  auto non_zero_index = non_zero_indices_ptr[i];
63  auto this_result_ptr =
64  result_ptr + i + (num_dims - 1) * num_non_zeros;
65  CLOUDVIEWER_ASSERT(this_result_ptr != nullptr &&
66  "Internal error.");
67  for (auto dim = num_dims - 1; dim >= 0;
68  dim--, this_result_ptr -= num_non_zeros) {
69  *this_result_ptr = non_zero_index % shape_vec[dim];
70  non_zero_index = non_zero_index / shape_vec[dim];
71  }
72  }).wait_and_throw();
73  return result;
74 }
75 
76 } // namespace kernel
77 } // namespace core
78 } // namespace cloudViewer
#define DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(DTYPE,...)
Definition: Dispatch.h:68
#define CLOUDVIEWER_ASSERT(...)
Definition: Macro.h:51
SYCL queue manager.
core::Tensor result
Definition: VtkUtils.cpp:76
static Tensor Arange(const Scalar start, const Scalar stop, const Scalar step=1, const Dtype dtype=core::Int64, const Device &device=core::Device("CPU:0"))
Create a 1D tensor with evenly spaced values in the given interval.
Definition: Tensor.cpp:436
Dtype GetDtype() const
Definition: Tensor.h:1164
int64_t NumElements() const
Definition: Tensor.h:1170
Device GetDevice() const override
Definition: Tensor.cpp:1435
static SYCLContext & GetInstance()
Get singleton instance.
Definition: SYCLContext.cpp:25
sycl::queue GetDefaultQueue(const Device &device)
Get the default SYCL queue given an CloudViewer device.
Definition: SYCLContext.cpp:43
#define LogError(...)
Definition: Logging.h:60
Tensor NonZeroSYCL(const Tensor &src)
Definition: NonZeroSYCL.cpp:22
static constexpr int64_t MAX_DIMS
Definition: Indexer.h:38
const Dtype Int64
Definition: Dtype.cpp:47
Generic file read and write utility for python interface.