22 #ifdef BUILD_ISPC_MODULE
23 #include "UnaryEWCPU_ispc.h"
30 template <
typename element_func_t>
32 const element_func_t& element_func) {
34 [&
indexer, &element_func](int64_t i) {
35 element_func(indexer.GetInputPtr(0, i),
36 indexer.GetOutputPtr(i));
40 template <
typename src_t,
typename dst_t,
typename element_func_t>
42 const element_func_t& element_func) {
44 [&
indexer, &element_func](int64_t i) {
45 element_func(indexer.GetInputPtr<src_t>(0, i),
46 indexer.GetOutputPtr<dst_t>(i));
50 template <
typename src_t,
52 typename element_func_t,
55 const element_func_t& element_func,
56 const vec_func_t& vec_func) {
59 [&
indexer, &element_func](int64_t i) {
60 element_func(indexer.GetInputPtr<src_t>(0, i),
61 indexer.GetOutputPtr<dst_t>(i));
66 template <
typename src_t,
typename dst_t>
68 *
static_cast<dst_t*
>(dst) =
69 static_cast<dst_t
>(*
static_cast<const src_t*
>(src));
74 int64_t object_byte_size) {
75 const char* src_bytes =
static_cast<const char*
>(src);
76 char* dst_bytes =
static_cast<char*
>(dst);
77 memcpy(dst_bytes, src_bytes, object_byte_size);
80 template <
typename scalar_t>
82 *
static_cast<scalar_t*
>(dst) =
static_cast<scalar_t
>(
83 std::sqrt(*
static_cast<const scalar_t*
>(src)));
86 template <
typename scalar_t>
88 *
static_cast<scalar_t*
>(dst) =
89 static_cast<scalar_t
>(std::sin(*
static_cast<const scalar_t*
>(src)));
92 template <
typename scalar_t>
94 *
static_cast<scalar_t*
>(dst) =
95 static_cast<scalar_t
>(std::cos(*
static_cast<const scalar_t*
>(src)));
98 template <
typename scalar_t,
99 typename std::enable_if<std::is_integral<scalar_t>::value,
102 using signed_scalar_t = std::make_signed_t<scalar_t>;
103 *
static_cast<scalar_t*
>(dst) =
static_cast<scalar_t
>(
104 -
static_cast<signed_scalar_t
>(*
static_cast<const scalar_t*
>(src)));
107 template <
typename scalar_t,
108 typename std::enable_if<!std::is_integral<scalar_t>::value,
111 *
static_cast<scalar_t*
>(dst) = -*
static_cast<const scalar_t*
>(src);
114 template <
typename scalar_t>
116 *
static_cast<scalar_t*
>(dst) =
117 static_cast<scalar_t
>(std::exp(*
static_cast<const scalar_t*
>(src)));
120 template <
typename scalar_t>
122 *
static_cast<scalar_t*
>(dst) =
static_cast<scalar_t
>(
123 std::abs(
static_cast<double>(*
static_cast<const scalar_t*
>(src))));
126 template <
typename scalar_t>
128 *
static_cast<bool*
>(dst) =
129 std::isnan(
static_cast<float>(*
static_cast<const scalar_t*
>(src)));
132 template <
typename scalar_t>
134 *
static_cast<bool*
>(dst) =
135 std::isinf(
static_cast<float>(*
static_cast<const scalar_t*
>(src)));
138 template <
typename scalar_t>
140 *
static_cast<bool*
>(dst) = std::isfinite(
141 static_cast<float>(*
static_cast<const scalar_t*
>(src)));
144 template <
typename scalar_t>
146 *
static_cast<scalar_t*
>(dst) =
static_cast<scalar_t
>(
std::floor(
147 static_cast<double>(*
static_cast<const scalar_t*
>(src))));
150 template <
typename scalar_t>
152 *
static_cast<scalar_t*
>(dst) =
static_cast<scalar_t
>(
153 std::ceil(
static_cast<double>(*
static_cast<const scalar_t*
>(src))));
156 template <
typename scalar_t>
158 *
static_cast<scalar_t*
>(dst) =
static_cast<scalar_t
>(std::round(
159 static_cast<double>(*
static_cast<const scalar_t*
>(src))));
162 template <
typename scalar_t>
164 *
static_cast<scalar_t*
>(dst) =
static_cast<scalar_t
>(std::trunc(
165 static_cast<double>(*
static_cast<const scalar_t*
>(src))));
168 template <
typename src_t,
typename dst_t>
170 *
static_cast<dst_t*
>(dst) =
static_cast<dst_t
>(
171 !
static_cast<bool>(*
static_cast<const src_t*
>(src)));
189 scalar_t scalar_element = src.
To(dst_dtype).Item<scalar_t>();
190 scalar_t* dst_ptr =
static_cast<scalar_t*
>(dst.
GetDataPtr());
192 [&](int64_t workload_idx) {
193 dst_ptr[workload_idx] = scalar_element;
206 using src_t = scalar_t;
208 using dst_t = scalar_t;
209 LaunchUnaryEWKernel<src_t, dst_t>(
210 indexer, CPUCopyElementKernel<src_t, dst_t>);
223 if (dst_dtype == src_dtype) {
225 #ifdef BUILD_ISPC_MODULE
226 ispc::Indexer ispc_indexer =
indexer.ToISPC();
229 LaunchUnaryEWKernel<scalar_t, scalar_t>(
230 indexer, CPULogicalNotElementKernel<scalar_t, scalar_t>,
237 #ifdef BUILD_ISPC_MODULE
238 ispc::Indexer ispc_indexer =
indexer.ToISPC();
241 LaunchUnaryEWKernel<scalar_t, bool>(
242 indexer, CPULogicalNotElementKernel<scalar_t, bool>,
244 scalar_t, CPULogicalNotElementKernel_bool,
249 "Boolean op's output type must be boolean or the "
250 "same type as the input.");
256 #ifdef BUILD_ISPC_MODULE
257 ispc::Indexer ispc_indexer =
indexer.ToISPC();
261 LaunchUnaryEWKernel<scalar_t, bool>(
262 indexer, CPUIsNanElementKernel<scalar_t>,
269 LaunchUnaryEWKernel<scalar_t, bool>(
270 indexer, CPUIsInfElementKernel<scalar_t>);
274 LaunchUnaryEWKernel<scalar_t, bool>(
275 indexer, CPUIsFiniteElementKernel<scalar_t>);
280 #ifdef BUILD_ISPC_MODULE
281 ispc::Indexer ispc_indexer =
indexer.ToISPC();
286 LaunchUnaryEWKernel<scalar_t, scalar_t>(
287 indexer, CPUSqrtElementKernel<scalar_t>,
293 LaunchUnaryEWKernel<scalar_t, scalar_t>(
294 indexer, CPUSinElementKernel<scalar_t>,
300 LaunchUnaryEWKernel<scalar_t, scalar_t>(
301 indexer, CPUCosElementKernel<scalar_t>,
307 LaunchUnaryEWKernel<scalar_t, scalar_t>(
308 indexer, CPUNegElementKernel<scalar_t>,
314 LaunchUnaryEWKernel<scalar_t, scalar_t>(
315 indexer, CPUExpElementKernel<scalar_t>,
321 LaunchUnaryEWKernel<scalar_t, scalar_t>(
322 indexer, CPUAbsElementKernel<scalar_t>,
328 LaunchUnaryEWKernel<scalar_t, scalar_t>(
329 indexer, CPUFloorElementKernel<scalar_t>,
335 LaunchUnaryEWKernel<scalar_t, scalar_t>(
336 indexer, CPUCeilElementKernel<scalar_t>,
342 LaunchUnaryEWKernel<scalar_t, scalar_t>(
343 indexer, CPURoundElementKernel<scalar_t>,
349 LaunchUnaryEWKernel<scalar_t, scalar_t>(
350 indexer, CPUTruncElementKernel<scalar_t>,
#define DISPATCH_DTYPE_TO_TEMPLATE_WITH_BOOL(DTYPE,...)
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
#define CLOUDVIEWER_TEMPLATE_VECTORIZED(T, ISPCKernel,...)
static void Memcpy(void *dst_ptr, const Device &dst_device, const void *src_ptr, const Device &src_device, size_t num_bytes)
int64_t NumElements() const
bool IsContiguous() const
int64_t NumElements() const
Device GetDevice() const override
SizeVector GetShape() const
Tensor To(Dtype dtype, bool copy=false) const
__host__ __device__ int2 abs(int2 v)
static void CPURoundElementKernel(const void *src, void *dst)
static void CPULogicalNotElementKernel(const void *src, void *dst)
static void CPUIsNanElementKernel(const void *src, void *dst)
static void CPUNegElementKernel(const void *src, void *dst)
void UnaryEWCPU(const Tensor &src, Tensor &dst, UnaryEWOpCode op_code)
static void CPUFloorElementKernel(const void *src, void *dst)
static void CPUAbsElementKernel(const void *src, void *dst)
static void CPUIsFiniteElementKernel(const void *src, void *dst)
static void CPUIsInfElementKernel(const void *src, void *dst)
static void LaunchUnaryEWKernel(const Indexer &indexer, const element_func_t &element_func)
static void CPUCosElementKernel(const void *src, void *dst)
static void CPUExpElementKernel(const void *src, void *dst)
static void CPUCeilElementKernel(const void *src, void *dst)
static void CPUSqrtElementKernel(const void *src, void *dst)
static void CPUCopyElementKernel(const void *src, void *dst)
static void CPUTruncElementKernel(const void *src, void *dst)
static void CPUSinElementKernel(const void *src, void *dst)
void CopyCPU(const Tensor &src, Tensor &dst)
static void CPUCopyObjectElementKernel(const void *src, void *dst, int64_t object_byte_size)
void ParallelFor(const Device &device, int64_t n, const func_t &func)
MiniVec< float, N > floor(const MiniVec< float, N > &a)
MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Generic file read and write utility for python interface.