ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
IndexReductionSYCL.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <Logging.h>
9 
14 
15 namespace cloudViewer {
16 namespace core {
17 namespace kernel {
18 
19 void IndexAddSYCL_(int64_t dim,
20  const Tensor& index,
21  const Tensor& src,
22  Tensor& dst) {
23  // index: [N,], src: [N, D], dst: [M, D]
24  // In Indexer, output shape defines the actual primary strides.
25  // However, in IndexAdd_, input dominates the iterations.
26  // So put dst (output) at indexer's input, and src (input) at output.
27  Indexer indexer({dst}, src, DtypePolicy::NONE);
28 
29  // Index is simply a 1D contiguous tensor, with a different stride
30  // behavior to src. So use raw pointer for simplicity.
31  auto index_ptr = index.GetDataPtr<int64_t>();
32 
33  int64_t broadcasting_elems = 1;
34  for (int64_t d = 1; d < src.NumDims(); ++d) {
35  broadcasting_elems *= src.GetShape(d);
36  }
37  sycl::queue queue =
39 
40  // TODO: Replace with SYCL reduction API
42  queue.parallel_for(index.GetLength(), [=](int64_t workload_idx) {
43  int64_t reduction_idx = workload_idx / broadcasting_elems;
44  int64_t broadcasting_idx = workload_idx % broadcasting_elems;
45 
46  const int64_t idx = index_ptr[reduction_idx];
47  int64_t dst_idx = idx * broadcasting_elems + broadcasting_idx;
48 
49  // Note input and output is switched here to adapt to the
50  // indexer
51  scalar_t* src_ptr = indexer.GetOutputPtr<scalar_t>(0, idx);
52  scalar_t* dst_ptr = indexer.GetInputPtr<scalar_t>(0, dst_idx);
53  sycl::atomic_ref<scalar_t, sycl::memory_order::acq_rel,
54  sycl::memory_scope::device>(*dst_ptr) +=
55  *src_ptr;
56  }).wait_and_throw();
57  });
58 }
59 
60 } // namespace kernel
61 } // namespace core
62 } // namespace cloudViewer
Indexer indexer
#define DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:78
SYCL queue manager.
int64_t NumDims() const
Definition: Tensor.h:1172
Dtype GetDtype() const
Definition: Tensor.h:1164
Device GetDevice() const override
Definition: Tensor.cpp:1435
SizeVector GetShape() const
Definition: Tensor.h:1127
static SYCLContext & GetInstance()
Get singleton instance.
Definition: SYCLContext.cpp:25
sycl::queue GetDefaultQueue(const Device &device)
Get the default SYCL queue given an CloudViewer device.
Definition: SYCLContext.cpp:43
void IndexAddSYCL_(int64_t dim, const Tensor &index, const Tensor &src, Tensor &dst)
Generic file read and write utility for python interface.