29 struct UnaryElementKernel {
30 UnaryElementKernel(Indexer indexer_) :
indexer(indexer_) {}
31 void operator()(int64_t i) {}
37 template <
typename src_t,
typename dst_t>
38 struct CopyElementKernel :
public UnaryElementKernel {
39 using UnaryElementKernel::UnaryElementKernel;
40 void operator()(int64_t i) {
41 const src_t* src =
indexer.GetInputPtr<src_t>(0, i);
42 dst_t* dst =
indexer.GetOutputPtr<dst_t>(i);
43 *dst =
static_cast<dst_t
>(*src);
49 #define UNARY_ELEMENT_KERNEL(name, elem_op) \
50 template <typename src_t> \
51 struct name##ElementKernel : public UnaryElementKernel { \
52 using UnaryElementKernel::UnaryElementKernel; \
53 void operator()(int64_t i) { \
54 const src_t* src = indexer.GetInputPtr<src_t>(0, i); \
55 src_t* dst = indexer.GetOutputPtr<src_t>(i); \
56 *dst = static_cast<src_t>(elem_op(static_cast<double>(*src))); \
60 struct name##ElementKernel<float> : public UnaryElementKernel { \
61 using UnaryElementKernel::UnaryElementKernel; \
62 void operator()(int64_t i) { \
63 const float* src = indexer.GetInputPtr<float>(0, i); \
64 float* dst = indexer.GetOutputPtr<float>(i); \
65 *dst = elem_op(*src); \
79 #undef UNARY_ELEMENT_KERNEL
83 template <
typename scalar_t>
84 struct NegElementKernel :
public UnaryElementKernel {
85 using UnaryElementKernel::UnaryElementKernel;
86 void operator()(int64_t i) {
87 const scalar_t* src =
indexer.GetInputPtr<scalar_t>(0, i);
88 scalar_t* dst =
indexer.GetOutputPtr<scalar_t>(i);
95 #define UNARY_ELEMENT_KERNEL(name, elem_op) \
96 template <typename src_t> \
97 struct name##ElementKernel : public UnaryElementKernel { \
98 using UnaryElementKernel::UnaryElementKernel; \
99 void operator()(int64_t i) { \
100 const src_t* src = indexer.GetInputPtr<src_t>(0, i); \
101 bool* dst = indexer.GetOutputPtr<bool>(i); \
102 *dst = elem_op(static_cast<double>(*src)); \
106 struct name##ElementKernel<float> : public UnaryElementKernel { \
107 using UnaryElementKernel::UnaryElementKernel; \
108 void operator()(int64_t i) { \
109 const float* src = indexer.GetInputPtr<float>(0, i); \
110 bool* dst = indexer.GetOutputPtr<bool>(i); \
111 *dst = elem_op(*src); \
118 #undef UNARY_ELEMENT_KERNEL
120 template <
typename src_t,
typename dst_t >
121 struct LogicalNotElementKernel :
public UnaryElementKernel {
122 using UnaryElementKernel::UnaryElementKernel;
123 void operator()(int64_t i) {
124 const src_t* src =
indexer.GetInputPtr<src_t>(0, i);
125 dst_t* dst =
indexer.GetOutputPtr<dst_t>(i);
126 *dst =
static_cast<dst_t
>(!
static_cast<bool>(*src));
141 if (src_device.
IsSYCL() && dst_device.IsSYCL()) {
151 scalar_t scalar_element = src.
To(dst_dtype).Item<scalar_t>();
152 scalar_t* dst_ptr = dst.
GetDataPtr<scalar_t>();
153 queue.fill(dst_ptr, scalar_element, num_elements)
156 }
else if (src_device == dst_device) {
163 for (int64_t i = 0; i <
indexer.NumWorkloads(); ++i) {
164 const void* src_ptr =
indexer.GetInputPtr(0, i);
165 void* dst_ptr =
indexer.GetOutputPtr(i);
166 queue.memcpy(dst_ptr, src_ptr, object_byte_size);
168 queue.wait_and_throw();
171 using src_t = scalar_t;
173 using dst_t = scalar_t;
174 ParallelForSYCL<CopyElementKernel<src_t, dst_t>>(
182 }
else if (src_device.
IsCPU() && dst_device.IsSYCL() ||
183 src_device.
IsSYCL() && dst_device.IsCPU()) {
186 src_dtype == dst_dtype) {
195 dst_device.ToString());
206 if (dst_dtype == src_dtype) {
209 ParallelForSYCL<LogicalNotElementKernel<scalar_t, scalar_t>>(
212 }
else if (dst_dtype ==
Bool) {
215 ParallelForSYCL<LogicalNotElementKernel<scalar_t, bool>>(
220 "Boolean op's output type must be boolean or the "
221 "same type as the input.");
229 ParallelForSYCL<IsNanElementKernel<scalar_t>>(device,
indexer);
231 ParallelForSYCL<IsInfElementKernel<scalar_t>>(device,
indexer);
233 ParallelForSYCL<IsFiniteElementKernel<scalar_t>>(device,
242 ParallelForSYCL<SqrtElementKernel<scalar_t>>(device,
246 ParallelForSYCL<SinElementKernel<scalar_t>>(device,
250 ParallelForSYCL<CosElementKernel<scalar_t>>(device,
254 ParallelForSYCL<NegElementKernel<scalar_t>>(device,
258 ParallelForSYCL<ExpElementKernel<scalar_t>>(device,
262 ParallelForSYCL<AbsElementKernel<scalar_t>>(device,
266 ParallelForSYCL<FloorElementKernel<scalar_t>>(device,
270 ParallelForSYCL<CeilElementKernel<scalar_t>>(device,
274 ParallelForSYCL<RoundElementKernel<scalar_t>>(device,
278 ParallelForSYCL<TruncElementKernel<scalar_t>>(device,
#define DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(DTYPE,...)
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
#define UNARY_ELEMENT_KERNEL(name, elem_op)
bool IsCPU() const
Returns true iff device type is CPU.
std::string ToString() const
Returns string representation of device, e.g. "CPU:0", "CUDA:0".
bool IsSYCL() const
Returns true iff device type is SYCL GPU.
static void Memcpy(void *dst_ptr, const Device &dst_device, const void *src_ptr, const Device &src_device, size_t num_bytes)
int64_t NumElements() const
Tensor Contiguous() const
bool IsContiguous() const
void CopyFrom(const Tensor &other)
Copy Tensor values to current tensor from the source tensor.
int64_t NumElements() const
Device GetDevice() const override
SizeVector GetShape() const
Tensor To(Dtype dtype, bool copy=false) const
static SYCLContext & GetInstance()
Get singleton instance.
sycl::queue GetDefaultQueue(const Device &device)
Get the default SYCL queue given an CloudViewer device.
__host__ __device__ float2 fabs(float2 v)
void UnaryEWSYCL(const Tensor &src, Tensor &dst, UnaryEWOpCode op_code)
void CopySYCL(const Tensor &src, Tensor &dst)
MiniVec< float, N > floor(const MiniVec< float, N > &a)
MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Generic file read and write utility for python interface.