ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
UnaryEWCPU.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <Logging.h>
9 
10 #include <cmath>
11 #include <cstring>
12 
14 #include "cloudViewer/core/Dtype.h"
21 
22 #ifdef BUILD_ISPC_MODULE
23 #include "UnaryEWCPU_ispc.h"
24 #endif
25 
26 namespace cloudViewer {
27 namespace core {
28 namespace kernel {
29 
30 template <typename element_func_t>
31 static void LaunchUnaryEWKernel(const Indexer& indexer,
32  const element_func_t& element_func) {
33  ParallelFor(Device("CPU:0"), indexer.NumWorkloads(),
34  [&indexer, &element_func](int64_t i) {
35  element_func(indexer.GetInputPtr(0, i),
36  indexer.GetOutputPtr(i));
37  });
38 }
39 
40 template <typename src_t, typename dst_t, typename element_func_t>
41 static void LaunchUnaryEWKernel(const Indexer& indexer,
42  const element_func_t& element_func) {
43  ParallelFor(Device("CPU:0"), indexer.NumWorkloads(),
44  [&indexer, &element_func](int64_t i) {
45  element_func(indexer.GetInputPtr<src_t>(0, i),
46  indexer.GetOutputPtr<dst_t>(i));
47  });
48 }
49 
50 template <typename src_t,
51  typename dst_t,
52  typename element_func_t,
53  typename vec_func_t>
54 static void LaunchUnaryEWKernel(const Indexer& indexer,
55  const element_func_t& element_func,
56  const vec_func_t& vec_func) {
58  Device("CPU:0"), indexer.NumWorkloads(),
59  [&indexer, &element_func](int64_t i) {
60  element_func(indexer.GetInputPtr<src_t>(0, i),
61  indexer.GetOutputPtr<dst_t>(i));
62  },
63  vec_func);
64 }
65 
66 template <typename src_t, typename dst_t>
67 static void CPUCopyElementKernel(const void* src, void* dst) {
68  *static_cast<dst_t*>(dst) =
69  static_cast<dst_t>(*static_cast<const src_t*>(src));
70 }
71 
72 static void CPUCopyObjectElementKernel(const void* src,
73  void* dst,
74  int64_t object_byte_size) {
75  const char* src_bytes = static_cast<const char*>(src);
76  char* dst_bytes = static_cast<char*>(dst);
77  memcpy(dst_bytes, src_bytes, object_byte_size);
78 }
79 
80 template <typename scalar_t>
81 static void CPUSqrtElementKernel(const void* src, void* dst) {
82  *static_cast<scalar_t*>(dst) = static_cast<scalar_t>(
83  std::sqrt(*static_cast<const scalar_t*>(src)));
84 }
85 
86 template <typename scalar_t>
87 static void CPUSinElementKernel(const void* src, void* dst) {
88  *static_cast<scalar_t*>(dst) =
89  static_cast<scalar_t>(std::sin(*static_cast<const scalar_t*>(src)));
90 }
91 
92 template <typename scalar_t>
93 static void CPUCosElementKernel(const void* src, void* dst) {
94  *static_cast<scalar_t*>(dst) =
95  static_cast<scalar_t>(std::cos(*static_cast<const scalar_t*>(src)));
96 }
97 
98 template <typename scalar_t,
99  typename std::enable_if<std::is_integral<scalar_t>::value,
100  int>::type = 0>
101 static void CPUNegElementKernel(const void* src, void* dst) {
102  using signed_scalar_t = std::make_signed_t<scalar_t>;
103  *static_cast<scalar_t*>(dst) = static_cast<scalar_t>(
104  -static_cast<signed_scalar_t>(*static_cast<const scalar_t*>(src)));
105 }
106 
107 template <typename scalar_t,
108  typename std::enable_if<!std::is_integral<scalar_t>::value,
109  int>::type = 0>
110 static void CPUNegElementKernel(const void* src, void* dst) {
111  *static_cast<scalar_t*>(dst) = -*static_cast<const scalar_t*>(src);
112 }
113 
114 template <typename scalar_t>
115 static void CPUExpElementKernel(const void* src, void* dst) {
116  *static_cast<scalar_t*>(dst) =
117  static_cast<scalar_t>(std::exp(*static_cast<const scalar_t*>(src)));
118 }
119 
120 template <typename scalar_t>
121 static void CPUAbsElementKernel(const void* src, void* dst) {
122  *static_cast<scalar_t*>(dst) = static_cast<scalar_t>(
123  std::abs(static_cast<double>(*static_cast<const scalar_t*>(src))));
124 }
125 
126 template <typename scalar_t>
127 static void CPUIsNanElementKernel(const void* src, void* dst) {
128  *static_cast<bool*>(dst) =
129  std::isnan(static_cast<float>(*static_cast<const scalar_t*>(src)));
130 }
131 
132 template <typename scalar_t>
133 static void CPUIsInfElementKernel(const void* src, void* dst) {
134  *static_cast<bool*>(dst) =
135  std::isinf(static_cast<float>(*static_cast<const scalar_t*>(src)));
136 }
137 
138 template <typename scalar_t>
139 static void CPUIsFiniteElementKernel(const void* src, void* dst) {
140  *static_cast<bool*>(dst) = std::isfinite(
141  static_cast<float>(*static_cast<const scalar_t*>(src)));
142 }
143 
144 template <typename scalar_t>
145 static void CPUFloorElementKernel(const void* src, void* dst) {
146  *static_cast<scalar_t*>(dst) = static_cast<scalar_t>(std::floor(
147  static_cast<double>(*static_cast<const scalar_t*>(src))));
148 }
149 
150 template <typename scalar_t>
151 static void CPUCeilElementKernel(const void* src, void* dst) {
152  *static_cast<scalar_t*>(dst) = static_cast<scalar_t>(
153  std::ceil(static_cast<double>(*static_cast<const scalar_t*>(src))));
154 }
155 
156 template <typename scalar_t>
157 static void CPURoundElementKernel(const void* src, void* dst) {
158  *static_cast<scalar_t*>(dst) = static_cast<scalar_t>(std::round(
159  static_cast<double>(*static_cast<const scalar_t*>(src))));
160 }
161 
162 template <typename scalar_t>
163 static void CPUTruncElementKernel(const void* src, void* dst) {
164  *static_cast<scalar_t*>(dst) = static_cast<scalar_t>(std::trunc(
165  static_cast<double>(*static_cast<const scalar_t*>(src))));
166 }
167 
168 template <typename src_t, typename dst_t>
169 static void CPULogicalNotElementKernel(const void* src, void* dst) {
170  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
171  !static_cast<bool>(*static_cast<const src_t*>(src)));
172 }
173 
174 void CopyCPU(const Tensor& src, Tensor& dst) {
175  // src and dst have been checked to have the same shape, dtype, device
176  SizeVector shape = src.GetShape();
177  Dtype src_dtype = src.GetDtype();
178  Dtype dst_dtype = dst.GetDtype();
179  if (src.IsContiguous() && dst.IsContiguous() &&
180  src.GetShape() == dst.GetShape() && src_dtype == dst_dtype) {
182  src.GetDataPtr(), src.GetDevice(),
183  src_dtype.ByteSize() * shape.NumElements());
184  } else if (dst.NumElements() > 1 && dst.IsContiguous() &&
185  src.NumElements() == 1 && !src_dtype.IsObject()) {
186  int64_t num_elements = dst.NumElements();
187 
188  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(dst_dtype, [&]() {
189  scalar_t scalar_element = src.To(dst_dtype).Item<scalar_t>();
190  scalar_t* dst_ptr = static_cast<scalar_t*>(dst.GetDataPtr());
191  ParallelFor(Device("CPU:0"), num_elements,
192  [&](int64_t workload_idx) {
193  dst_ptr[workload_idx] = scalar_element;
194  });
195  });
196  } else {
197  Indexer indexer({src}, dst, DtypePolicy::NONE);
198  if (src.GetDtype().IsObject()) {
199  int64_t object_byte_size = src.GetDtype().ByteSize();
200  LaunchUnaryEWKernel(indexer, [&](const void* src, void* dst) {
201  CPUCopyObjectElementKernel(src, dst, object_byte_size);
202  });
203 
204  } else {
205  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(src_dtype, [&]() {
206  using src_t = scalar_t;
207  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(dst_dtype, [&]() {
208  using dst_t = scalar_t;
209  LaunchUnaryEWKernel<src_t, dst_t>(
210  indexer, CPUCopyElementKernel<src_t, dst_t>);
211  });
212  });
213  }
214  }
215 }
216 
217 void UnaryEWCPU(const Tensor& src, Tensor& dst, UnaryEWOpCode op_code) {
218  // src and dst have been changed to have the same shape, device
219  Dtype src_dtype = src.GetDtype();
220  Dtype dst_dtype = dst.GetDtype();
221 
222  if (op_code == UnaryEWOpCode::LogicalNot) {
223  if (dst_dtype == src_dtype) {
225 #ifdef BUILD_ISPC_MODULE
226  ispc::Indexer ispc_indexer = indexer.ToISPC();
227 #endif
228  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(src_dtype, [&]() {
229  LaunchUnaryEWKernel<scalar_t, scalar_t>(
230  indexer, CPULogicalNotElementKernel<scalar_t, scalar_t>,
232  scalar_t, CPULogicalNotElementKernel,
233  &ispc_indexer));
234  });
235  } else if (dst_dtype == core::Bool) {
237 #ifdef BUILD_ISPC_MODULE
238  ispc::Indexer ispc_indexer = indexer.ToISPC();
239 #endif
240  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(src_dtype, [&]() {
241  LaunchUnaryEWKernel<scalar_t, bool>(
242  indexer, CPULogicalNotElementKernel<scalar_t, bool>,
244  scalar_t, CPULogicalNotElementKernel_bool,
245  &ispc_indexer));
246  });
247  } else {
249  "Boolean op's output type must be boolean or the "
250  "same type as the input.");
251  }
252  } else if (op_code == UnaryEWOpCode::IsNan ||
253  op_code == UnaryEWOpCode::IsInf ||
254  op_code == UnaryEWOpCode::IsFinite) {
256 #ifdef BUILD_ISPC_MODULE
257  ispc::Indexer ispc_indexer = indexer.ToISPC();
258 #endif
259  DISPATCH_DTYPE_TO_TEMPLATE(src_dtype, [&]() {
260  if (op_code == UnaryEWOpCode::IsNan) {
261  LaunchUnaryEWKernel<scalar_t, bool>(
262  indexer, CPUIsNanElementKernel<scalar_t>,
265  &ispc_indexer));
266  } else if (op_code == UnaryEWOpCode::IsInf) {
267  // A vectorized isinf function is not defined, so use scalar
268  // version instead.
269  LaunchUnaryEWKernel<scalar_t, bool>(
270  indexer, CPUIsInfElementKernel<scalar_t>);
271  } else if (op_code == UnaryEWOpCode::IsFinite) {
272  // A vectorized isfinite function is not defined, so use scalar
273  // version instead.
274  LaunchUnaryEWKernel<scalar_t, bool>(
275  indexer, CPUIsFiniteElementKernel<scalar_t>);
276  }
277  });
278  } else {
280 #ifdef BUILD_ISPC_MODULE
281  ispc::Indexer ispc_indexer = indexer.ToISPC();
282 #endif
283  DISPATCH_DTYPE_TO_TEMPLATE(src_dtype, [&]() {
284  switch (op_code) {
285  case UnaryEWOpCode::Sqrt:
286  LaunchUnaryEWKernel<scalar_t, scalar_t>(
287  indexer, CPUSqrtElementKernel<scalar_t>,
289  scalar_t, CPUSqrtElementKernel,
290  &ispc_indexer));
291  break;
292  case UnaryEWOpCode::Sin:
293  LaunchUnaryEWKernel<scalar_t, scalar_t>(
294  indexer, CPUSinElementKernel<scalar_t>,
297  &ispc_indexer));
298  break;
299  case UnaryEWOpCode::Cos:
300  LaunchUnaryEWKernel<scalar_t, scalar_t>(
301  indexer, CPUCosElementKernel<scalar_t>,
304  &ispc_indexer));
305  break;
306  case UnaryEWOpCode::Neg:
307  LaunchUnaryEWKernel<scalar_t, scalar_t>(
308  indexer, CPUNegElementKernel<scalar_t>,
311  &ispc_indexer));
312  break;
313  case UnaryEWOpCode::Exp:
314  LaunchUnaryEWKernel<scalar_t, scalar_t>(
315  indexer, CPUExpElementKernel<scalar_t>,
318  &ispc_indexer));
319  break;
320  case UnaryEWOpCode::Abs:
321  LaunchUnaryEWKernel<scalar_t, scalar_t>(
322  indexer, CPUAbsElementKernel<scalar_t>,
325  &ispc_indexer));
326  break;
328  LaunchUnaryEWKernel<scalar_t, scalar_t>(
329  indexer, CPUFloorElementKernel<scalar_t>,
331  scalar_t, CPUFloorElementKernel,
332  &ispc_indexer));
333  break;
334  case UnaryEWOpCode::Ceil:
335  LaunchUnaryEWKernel<scalar_t, scalar_t>(
336  indexer, CPUCeilElementKernel<scalar_t>,
338  scalar_t, CPUCeilElementKernel,
339  &ispc_indexer));
340  break;
342  LaunchUnaryEWKernel<scalar_t, scalar_t>(
343  indexer, CPURoundElementKernel<scalar_t>,
345  scalar_t, CPURoundElementKernel,
346  &ispc_indexer));
347  break;
349  LaunchUnaryEWKernel<scalar_t, scalar_t>(
350  indexer, CPUTruncElementKernel<scalar_t>,
352  scalar_t, CPUTruncElementKernel,
353  &ispc_indexer));
354  break;
355  default:
356  utility::LogError("Unimplemented op_code for UnaryEWCPU");
357  break;
358  }
359  });
360  }
361 }
362 
363 } // namespace kernel
364 } // namespace core
365 } // namespace cloudViewer
Indexer indexer
#define DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(DTYPE,...)
Definition: Dispatch.h:68
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:31
char type
#define CLOUDVIEWER_TEMPLATE_VECTORIZED(T, ISPCKernel,...)
Definition: ParallelFor.h:246
bool IsObject() const
Definition: Dtype.h:63
int64_t ByteSize() const
Definition: Dtype.h:59
static void Memcpy(void *dst_ptr, const Device &dst_device, const void *src_ptr, const Device &src_device, size_t num_bytes)
bool IsContiguous() const
Definition: Tensor.h:1036
Dtype GetDtype() const
Definition: Tensor.h:1164
int64_t NumElements() const
Definition: Tensor.h:1170
Device GetDevice() const override
Definition: Tensor.cpp:1435
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739
#define LogError(...)
Definition: Logging.h:60
__host__ __device__ int2 abs(int2 v)
Definition: cutil_math.h:1267
static void CPURoundElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:157
static void CPULogicalNotElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:169
static void CPUIsNanElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:127
static void CPUNegElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:101
void UnaryEWCPU(const Tensor &src, Tensor &dst, UnaryEWOpCode op_code)
Definition: UnaryEWCPU.cpp:217
static void CPUFloorElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:145
static void CPUAbsElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:121
static void CPUIsFiniteElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:139
static void CPUIsInfElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:133
static void LaunchUnaryEWKernel(const Indexer &indexer, const element_func_t &element_func)
Definition: UnaryEWCPU.cpp:31
static void CPUCosElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:93
static void CPUExpElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:115
static void CPUCeilElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:151
static void CPUSqrtElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:81
static void CPUCopyElementKernel(const void *src, void *dst)
static void CPUTruncElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:163
static void CPUSinElementKernel(const void *src, void *dst)
Definition: UnaryEWCPU.cpp:87
void CopyCPU(const Tensor &src, Tensor &dst)
Definition: UnaryEWCPU.cpp:174
static void CPUCopyObjectElementKernel(const void *src, void *dst, int64_t object_byte_size)
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:111
const Dtype Bool
Definition: Dtype.cpp:52
MiniVec< float, N > floor(const MiniVec< float, N > &a)
Definition: MiniVec.h:75
MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Definition: MiniVec.h:89
Generic file read and write utility for python interface.