ACloudViewer
3.9.4
A Modern Library for 3D Data Processing
IndexReductionSYCL.cpp
Go to the documentation of this file.
1
// ----------------------------------------------------------------------------
2
// - CloudViewer: www.cloudViewer.org -
3
// ----------------------------------------------------------------------------
4
// Copyright (c) 2018-2024 www.cloudViewer.org
5
// SPDX-License-Identifier: MIT
6
// ----------------------------------------------------------------------------
7
8
#include <
Logging.h
>
9
10
#include "
cloudViewer/core/Dispatch.h
"
11
#include "
cloudViewer/core/Indexer.h
"
12
#include "
cloudViewer/core/SYCLContext.h
"
13
#include "
cloudViewer/core/Tensor.h
"
14
15
namespace
cloudViewer
{
16
namespace
core {
17
namespace
kernel {
18
19
void
IndexAddSYCL_
(int64_t dim,
20
const
Tensor
& index,
21
const
Tensor
& src,
22
Tensor
& dst) {
23
// index: [N,], src: [N, D], dst: [M, D]
24
// In Indexer, output shape defines the actual primary strides.
25
// However, in IndexAdd_, input dominates the iterations.
26
// So put dst (output) at indexer's input, and src (input) at output.
27
Indexer
indexer
({dst}, src,
DtypePolicy::NONE
);
28
29
// Index is simply a 1D contiguous tensor, with a different stride
30
// behavior to src. So use raw pointer for simplicity.
31
auto
index_ptr = index.
GetDataPtr
<int64_t>();
32
33
int64_t broadcasting_elems = 1;
34
for
(int64_t d = 1; d < src.
NumDims
(); ++d) {
35
broadcasting_elems *= src.
GetShape
(d);
36
}
37
sycl::queue queue =
38
sy::SYCLContext::GetInstance
().
GetDefaultQueue
(src.
GetDevice
());
39
40
// TODO: Replace with SYCL reduction API
41
DISPATCH_FLOAT_DTYPE_TO_TEMPLATE
(src.
GetDtype
(), [&]() {
42
queue.parallel_for(index.GetLength(), [=](int64_t workload_idx) {
43
int64_t reduction_idx = workload_idx / broadcasting_elems;
44
int64_t broadcasting_idx = workload_idx % broadcasting_elems;
45
46
const int64_t idx = index_ptr[reduction_idx];
47
int64_t dst_idx = idx * broadcasting_elems + broadcasting_idx;
48
49
// Note input and output is switched here to adapt to the
50
// indexer
51
scalar_t* src_ptr = indexer.GetOutputPtr<scalar_t>(0, idx);
52
scalar_t* dst_ptr = indexer.GetInputPtr<scalar_t>(0, dst_idx);
53
sycl::atomic_ref<scalar_t, sycl::memory_order::acq_rel,
54
sycl::memory_scope::device>(*dst_ptr) +=
55
*src_ptr;
56
}).wait_and_throw();
57
});
58
}
59
60
}
// namespace kernel
61
}
// namespace core
62
}
// namespace cloudViewer
indexer
Indexer indexer
Definition:
BinaryEWSYCL.cpp:30
Dispatch.h
DISPATCH_FLOAT_DTYPE_TO_TEMPLATE
#define DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition:
Dispatch.h:78
Indexer.h
SYCLContext.h
SYCL queue manager.
Tensor.h
cloudViewer::core::Indexer
Definition:
Indexer.h:262
cloudViewer::core::Tensor
Definition:
Tensor.h:32
cloudViewer::core::Tensor::NumDims
int64_t NumDims() const
Definition:
Tensor.h:1172
cloudViewer::core::Tensor::GetDtype
Dtype GetDtype() const
Definition:
Tensor.h:1164
cloudViewer::core::Tensor::GetDataPtr
T * GetDataPtr()
Definition:
Tensor.h:1144
cloudViewer::core::Tensor::GetDevice
Device GetDevice() const override
Definition:
Tensor.cpp:1435
cloudViewer::core::Tensor::GetShape
SizeVector GetShape() const
Definition:
Tensor.h:1127
cloudViewer::core::sy::SYCLContext::GetInstance
static SYCLContext & GetInstance()
Get singleton instance.
Definition:
SYCLContext.cpp:25
cloudViewer::core::sy::SYCLContext::GetDefaultQueue
sycl::queue GetDefaultQueue(const Device &device)
Get the default SYCL queue given an CloudViewer device.
Definition:
SYCLContext.cpp:43
Logging.h
cloudViewer::core::kernel::IndexAddSYCL_
void IndexAddSYCL_(int64_t dim, const Tensor &index, const Tensor &src, Tensor &dst)
Definition:
IndexReductionSYCL.cpp:19
cloudViewer::core::DtypePolicy::NONE
@ NONE
cloudViewer
Generic file read and write utility for python interface.
Definition:
AutoSegmentationTools.h:16
libs
cloudViewer
core
kernel
IndexReductionSYCL.cpp
Generated on Wed Jan 28 2026 09:00:54 for ACloudViewer by
1.9.1