ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
BinaryEWCPU.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <Logging.h>
9 
11 #include "cloudViewer/core/Dtype.h"
18 
19 #ifdef BUILD_ISPC_MODULE
20 #include "BinaryEWCPU_ispc.h"
21 #endif
22 
23 namespace cloudViewer {
24 namespace core {
25 namespace kernel {
26 
27 template <typename src_t, typename dst_t, typename element_func_t>
29  const element_func_t& element_func) {
30  ParallelFor(Device("CPU:0"), indexer.NumWorkloads(),
31  [&indexer, &element_func](int64_t i) {
32  element_func(indexer.GetInputPtr<src_t>(0, i),
33  indexer.GetInputPtr<src_t>(1, i),
34  indexer.GetOutputPtr<dst_t>(i));
35  });
36 }
37 
38 template <typename src_t,
39  typename dst_t,
40  typename element_func_t,
41  typename vec_func_t>
43  const element_func_t& element_func,
44  const vec_func_t& vec_func) {
46  Device("CPU:0"), indexer.NumWorkloads(),
47  [&indexer, &element_func](int64_t i) {
48  element_func(indexer.GetInputPtr<src_t>(0, i),
49  indexer.GetInputPtr<src_t>(1, i),
50  indexer.GetOutputPtr<dst_t>(i));
51  },
52  vec_func);
53 }
54 
55 template <typename scalar_t>
56 static void CPUMaxElementKernel(const void* lhs, const void* rhs, void* dst) {
57  *static_cast<scalar_t*>(dst) = std::max(*static_cast<const scalar_t*>(lhs),
58  *static_cast<const scalar_t*>(rhs));
59 }
60 
61 template <typename scalar_t>
62 static void CPUMinElementKernel(const void* lhs, const void* rhs, void* dst) {
63  *static_cast<scalar_t*>(dst) = std::min(*static_cast<const scalar_t*>(lhs),
64  *static_cast<const scalar_t*>(rhs));
65 }
66 
67 template <typename scalar_t>
68 static void CPUAddElementKernel(const void* lhs, const void* rhs, void* dst) {
69  *static_cast<scalar_t*>(dst) = *static_cast<const scalar_t*>(lhs) +
70  *static_cast<const scalar_t*>(rhs);
71 }
72 
73 template <typename scalar_t>
74 static void CPUSubElementKernel(const void* lhs, const void* rhs, void* dst) {
75  *static_cast<scalar_t*>(dst) = *static_cast<const scalar_t*>(lhs) -
76  *static_cast<const scalar_t*>(rhs);
77 }
78 
79 template <typename scalar_t>
80 static void CPUMulElementKernel(const void* lhs, const void* rhs, void* dst) {
81  *static_cast<scalar_t*>(dst) = *static_cast<const scalar_t*>(lhs) *
82  *static_cast<const scalar_t*>(rhs);
83 }
84 
85 template <typename scalar_t>
86 static void CPUDivElementKernel(const void* lhs, const void* rhs, void* dst) {
87  *static_cast<scalar_t*>(dst) = *static_cast<const scalar_t*>(lhs) /
88  *static_cast<const scalar_t*>(rhs);
89 }
90 
91 template <typename src_t, typename dst_t>
92 static void CPULogicalAndElementKernel(const void* lhs,
93  const void* rhs,
94  void* dst) {
95  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
96  static_cast<bool>(*static_cast<const src_t*>(lhs)) &&
97  static_cast<bool>(*static_cast<const src_t*>(rhs)));
98 }
99 
100 template <typename src_t, typename dst_t>
101 static void CPULogicalOrElementKernel(const void* lhs,
102  const void* rhs,
103  void* dst) {
104  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
105  static_cast<bool>(*static_cast<const src_t*>(lhs)) ||
106  static_cast<bool>(*static_cast<const src_t*>(rhs)));
107 }
108 
109 template <typename src_t, typename dst_t>
110 static void CPULogicalXorElementKernel(const void* lhs,
111  const void* rhs,
112  void* dst) {
113  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
114  static_cast<bool>(*static_cast<const src_t*>(lhs)) !=
115  static_cast<bool>(*static_cast<const src_t*>(rhs)));
116 }
117 
118 template <typename src_t, typename dst_t>
119 static void CPUGtElementKernel(const void* lhs, const void* rhs, void* dst) {
120  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
121  *static_cast<const src_t*>(lhs) > *static_cast<const src_t*>(rhs));
122 }
123 
124 template <typename src_t, typename dst_t>
125 static void CPULtElementKernel(const void* lhs, const void* rhs, void* dst) {
126  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
127  *static_cast<const src_t*>(lhs) < *static_cast<const src_t*>(rhs));
128 }
129 
130 template <typename src_t, typename dst_t>
131 static void CPUGeqElementKernel(const void* lhs, const void* rhs, void* dst) {
132  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
133  *static_cast<const src_t*>(lhs) >= *static_cast<const src_t*>(rhs));
134 }
135 
136 template <typename src_t, typename dst_t>
137 static void CPULeqElementKernel(const void* lhs, const void* rhs, void* dst) {
138  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
139  *static_cast<const src_t*>(lhs) <= *static_cast<const src_t*>(rhs));
140 }
141 
142 template <typename src_t, typename dst_t>
143 static void CPUEqElementKernel(const void* lhs, const void* rhs, void* dst) {
144  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
145  *static_cast<const src_t*>(lhs) == *static_cast<const src_t*>(rhs));
146 }
147 
148 template <typename src_t, typename dst_t>
149 static void CPUNeqElementKernel(const void* lhs, const void* rhs, void* dst) {
150  *static_cast<dst_t*>(dst) = static_cast<dst_t>(
151  *static_cast<const src_t*>(lhs) != *static_cast<const src_t*>(rhs));
152 }
153 
154 void BinaryEWCPU(const Tensor& lhs,
155  const Tensor& rhs,
156  Tensor& dst,
157  BinaryEWOpCode op_code) {
158  Dtype src_dtype = lhs.GetDtype();
159  Dtype dst_dtype = dst.GetDtype();
160 
161  if (s_boolean_binary_ew_op_codes.find(op_code) !=
163  if (dst_dtype == src_dtype) {
164  // Inplace boolean op's output type is the same as the
165  // input. e.g. np.logical_and(a, b, out=a), where a, b are
166  // floats.
167  Indexer indexer({lhs, rhs}, dst, DtypePolicy::ALL_SAME);
168 #ifdef BUILD_ISPC_MODULE
169  ispc::Indexer ispc_indexer = indexer.ToISPC();
170 #endif
171  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(src_dtype, [&]() {
172  switch (op_code) {
174  LaunchBinaryEWKernel<scalar_t, scalar_t>(
175  indexer,
176  CPULogicalAndElementKernel<scalar_t, scalar_t>,
178  scalar_t, CPULogicalAndElementKernel,
179  &ispc_indexer));
180  break;
182  LaunchBinaryEWKernel<scalar_t, scalar_t>(
183  indexer,
184  CPULogicalOrElementKernel<scalar_t, scalar_t>,
186  scalar_t, CPULogicalOrElementKernel,
187  &ispc_indexer));
188  break;
190  LaunchBinaryEWKernel<scalar_t, scalar_t>(
191  indexer,
192  CPULogicalXorElementKernel<scalar_t, scalar_t>,
194  scalar_t, CPULogicalXorElementKernel,
195  &ispc_indexer));
196  break;
197  case BinaryEWOpCode::Gt:
198  LaunchBinaryEWKernel<scalar_t, scalar_t>(
199  indexer, CPUGtElementKernel<scalar_t, scalar_t>,
201  scalar_t, CPULogicalGtElementKernel,
202  &ispc_indexer));
203  break;
204  case BinaryEWOpCode::Lt:
205  LaunchBinaryEWKernel<scalar_t, scalar_t>(
206  indexer, CPULtElementKernel<scalar_t, scalar_t>,
208  scalar_t, CPULogicalLtElementKernel,
209  &ispc_indexer));
210  break;
211  case BinaryEWOpCode::Ge:
212  LaunchBinaryEWKernel<scalar_t, scalar_t>(
213  indexer,
214  CPUGeqElementKernel<scalar_t, scalar_t>,
216  scalar_t, CPULogicalGeqElementKernel,
217  &ispc_indexer));
218  break;
219  case BinaryEWOpCode::Le:
220  LaunchBinaryEWKernel<scalar_t, scalar_t>(
221  indexer,
222  CPULeqElementKernel<scalar_t, scalar_t>,
224  scalar_t, CPULogicalLeqElementKernel,
225  &ispc_indexer));
226  break;
227  case BinaryEWOpCode::Eq:
228  LaunchBinaryEWKernel<scalar_t, scalar_t>(
229  indexer, CPUEqElementKernel<scalar_t, scalar_t>,
231  scalar_t, CPULogicalEqElementKernel,
232  &ispc_indexer));
233  break;
234  case BinaryEWOpCode::Ne:
235  LaunchBinaryEWKernel<scalar_t, scalar_t>(
236  indexer,
237  CPUNeqElementKernel<scalar_t, scalar_t>,
239  scalar_t, CPULogicalNeqElementKernel,
240  &ispc_indexer));
241  break;
242  default:
243  break;
244  }
245  });
246  } else if (dst_dtype == core::Bool) {
247  // By default, output is boolean type.
248  Indexer indexer({lhs, rhs}, dst,
250 #ifdef BUILD_ISPC_MODULE
251  ispc::Indexer ispc_indexer = indexer.ToISPC();
252 #endif
253  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(src_dtype, [&]() {
254  switch (op_code) {
256  LaunchBinaryEWKernel<scalar_t, bool>(
257  indexer,
258  CPULogicalAndElementKernel<scalar_t, bool>,
260  scalar_t,
261  CPULogicalAndElementKernel_bool,
262  &ispc_indexer));
263  break;
265  LaunchBinaryEWKernel<scalar_t, bool>(
266  indexer,
267  CPULogicalOrElementKernel<scalar_t, bool>,
269  scalar_t,
270  CPULogicalOrElementKernel_bool,
271  &ispc_indexer));
272  break;
274  LaunchBinaryEWKernel<scalar_t, bool>(
275  indexer,
276  CPULogicalXorElementKernel<scalar_t, bool>,
278  scalar_t,
279  CPULogicalXorElementKernel_bool,
280  &ispc_indexer));
281  break;
282  case BinaryEWOpCode::Gt:
283  LaunchBinaryEWKernel<scalar_t, bool>(
284  indexer, CPUGtElementKernel<scalar_t, bool>,
286  scalar_t,
287  CPULogicalGtElementKernel_bool,
288  &ispc_indexer));
289  break;
290  case BinaryEWOpCode::Lt:
291  LaunchBinaryEWKernel<scalar_t, bool>(
292  indexer, CPULtElementKernel<scalar_t, bool>,
294  scalar_t,
295  CPULogicalLtElementKernel_bool,
296  &ispc_indexer));
297  break;
298  case BinaryEWOpCode::Ge:
299  LaunchBinaryEWKernel<scalar_t, bool>(
300  indexer, CPUGeqElementKernel<scalar_t, bool>,
302  scalar_t,
303  CPULogicalGeqElementKernel_bool,
304  &ispc_indexer));
305  break;
306  case BinaryEWOpCode::Le:
307  LaunchBinaryEWKernel<scalar_t, bool>(
308  indexer, CPULeqElementKernel<scalar_t, bool>,
310  scalar_t,
311  CPULogicalLeqElementKernel_bool,
312  &ispc_indexer));
313  break;
314  case BinaryEWOpCode::Eq:
315  LaunchBinaryEWKernel<scalar_t, bool>(
316  indexer, CPUEqElementKernel<scalar_t, bool>,
318  scalar_t,
319  CPULogicalEqElementKernel_bool,
320  &ispc_indexer));
321  break;
322  case BinaryEWOpCode::Ne:
323  LaunchBinaryEWKernel<scalar_t, bool>(
324  indexer, CPUNeqElementKernel<scalar_t, bool>,
326  scalar_t,
327  CPULogicalNeqElementKernel_bool,
328  &ispc_indexer));
329  break;
330  default:
331  break;
332  }
333  });
334  } else {
336  "Boolean op's output type must be boolean or the "
337  "same type as the input.");
338  }
339  } else if (op_code == BinaryEWOpCode::Maximum ||
340  op_code == BinaryEWOpCode::Minimum) {
341  Indexer indexer({lhs, rhs}, dst, DtypePolicy::ALL_SAME);
342  DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(src_dtype, [&]() {
343  switch (op_code) {
345  LaunchBinaryEWKernel<scalar_t, scalar_t>(
346  indexer, CPUMaxElementKernel<scalar_t>);
347  break;
349  LaunchBinaryEWKernel<scalar_t, scalar_t>(
350  indexer, CPUMinElementKernel<scalar_t>);
351  break;
352  default:
353  break;
354  }
355  });
356  } else {
357  Indexer indexer({lhs, rhs}, dst, DtypePolicy::ALL_SAME);
358 #ifdef BUILD_ISPC_MODULE
359  ispc::Indexer ispc_indexer = indexer.ToISPC();
360 #endif
361  DISPATCH_DTYPE_TO_TEMPLATE(src_dtype, [&]() {
362  switch (op_code) {
363  case BinaryEWOpCode::Add:
364  LaunchBinaryEWKernel<scalar_t, scalar_t>(
365  indexer, CPUAddElementKernel<scalar_t>,
368  &ispc_indexer));
369  break;
370  case BinaryEWOpCode::Sub:
371  LaunchBinaryEWKernel<scalar_t, scalar_t>(
372  indexer, CPUSubElementKernel<scalar_t>,
375  &ispc_indexer));
376  break;
377  case BinaryEWOpCode::Mul:
378  LaunchBinaryEWKernel<scalar_t, scalar_t>(
379  indexer, CPUMulElementKernel<scalar_t>,
382  &ispc_indexer));
383  break;
384  case BinaryEWOpCode::Div:
385  // The vectorized Div kernel causes a crash in the Python
386  // tests, so use scalar version instead.
387  LaunchBinaryEWKernel<scalar_t, scalar_t>(
388  indexer, CPUDivElementKernel<scalar_t>);
389  break;
390  default:
391  break;
392  }
393  });
394  }
395 }
396 
397 } // namespace kernel
398 } // namespace core
399 } // namespace cloudViewer
Indexer indexer
#define DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(DTYPE,...)
Definition: Dispatch.h:68
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:31
#define CLOUDVIEWER_TEMPLATE_VECTORIZED(T, ISPCKernel,...)
Definition: ParallelFor.h:246
Dtype GetDtype() const
Definition: Tensor.h:1164
#define LogError(...)
Definition: Logging.h:60
int min(int a, int b)
Definition: cutil_math.h:53
int max(int a, int b)
Definition: cutil_math.h:48
static void CPUGtElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPULogicalAndElementKernel(const void *lhs, const void *rhs, void *dst)
Definition: BinaryEWCPU.cpp:92
void BinaryEWCPU(const Tensor &lhs, const Tensor &rhs, Tensor &dst, BinaryEWOpCode op_code)
const std::unordered_set< BinaryEWOpCode, utility::hash_enum_class > s_boolean_binary_ew_op_codes
Definition: BinaryEW.cpp:22
static void LaunchBinaryEWKernel(const Indexer &indexer, const element_func_t &element_func)
Definition: BinaryEWCPU.cpp:28
static void CPUMaxElementKernel(const void *lhs, const void *rhs, void *dst)
Definition: BinaryEWCPU.cpp:56
static void CPUMulElementKernel(const void *lhs, const void *rhs, void *dst)
Definition: BinaryEWCPU.cpp:80
static void CPULogicalOrElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPUEqElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPUMinElementKernel(const void *lhs, const void *rhs, void *dst)
Definition: BinaryEWCPU.cpp:62
static void CPULtElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPUGeqElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPULeqElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPUNeqElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPULogicalXorElementKernel(const void *lhs, const void *rhs, void *dst)
static void CPUDivElementKernel(const void *lhs, const void *rhs, void *dst)
Definition: BinaryEWCPU.cpp:86
static void CPUSubElementKernel(const void *lhs, const void *rhs, void *dst)
Definition: BinaryEWCPU.cpp:74
static void CPUAddElementKernel(const void *lhs, const void *rhs, void *dst)
Definition: BinaryEWCPU.cpp:68
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:111
const Dtype Bool
Definition: Dtype.cpp:52
Generic file read and write utility for python interface.