26 context.hStream = core::cuda::GetStream();
27 context.nCudaDeviceId = core::cuda::GetDevice();
29 cudaDeviceProp device_prop;
31 cudaGetDeviceProperties(&device_prop, core::cuda::GetDevice()));
33 context.nMultiProcessorCount = device_prop.multiProcessorCount;
34 context.nMaxThreadsPerMultiProcessor =
35 device_prop.maxThreadsPerMultiProcessor;
36 context.nSharedMemPerBlock = device_prop.sharedMemPerBlock;
40 cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor,
41 core::cuda::GetDevice()));
42 context.nCudaDevAttrComputeCapabilityMajor = cc_major;
46 cudaDeviceGetAttribute(&cc_minor, cudaDevAttrComputeCapabilityMinor,
47 core::cuda::GetDevice()));
48 context.nCudaDevAttrComputeCapabilityMinor = cc_minor;
53 #if NPP_VERSION >= 11100
54 unsigned int stream_flags;
56 cudaStreamGetFlags(core::cuda::GetStream(), &stream_flags));
57 context.nStreamFlags = stream_flags;
66 "src_im and dst_im are not on the same device, got {} and {}.",
71 NppiSize size_ROI = {
static_cast<int>(dst_im.
GetShape(1)),
72 static_cast<int>(dst_im.
GetShape(0))};
77 static_cast<const npp_dtype *>(src_im.GetDataPtr()), \
78 src_im.GetStride(0) * dtype.ByteSize(), \
79 static_cast<npp_dtype *>(dst_im.GetDataPtr()), \
80 dst_im.GetStride(0) * dtype.ByteSize(), size_ROI, context
82 using npp_dtype = Npp8u;
83 nppiRGBToGray_8u_C3C1R_Ctx(
NPP_ARGS);
85 using npp_dtype = Npp16u;
86 nppiRGBToGray_16u_C3C1R_Ctx(
NPP_ARGS);
88 using npp_dtype = Npp32f;
89 nppiRGBToGray_32f_C3C1R_Ctx(
NPP_ARGS);
102 "src_im and dst_im are not on the same device, got {} and {}.",
109 NppiSize src_size = {
static_cast<int>(src_im.
GetShape(1)),
110 static_cast<int>(src_im.
GetShape(0))};
111 NppiRect src_roi = {0, 0,
static_cast<int>(src_im.
GetShape(1)),
112 static_cast<int>(src_im.
GetShape(0))};
115 NppiSize dst_size = {
static_cast<int>(dst_im.
GetShape(1)),
116 static_cast<int>(dst_im.
GetShape(0))};
117 NppiRect dst_roi = {0, 0,
static_cast<int>(dst_im.
GetShape(1)),
118 static_cast<int>(dst_im.
GetShape(0))};
120 static const std::unordered_map<t::geometry::Image::InterpType, int>
129 auto it = type_dict.find(interp_type);
130 if (it == type_dict.end()) {
132 static_cast<int>(interp_type));
138 static_cast<const npp_dtype *>(src_im.GetDataPtr()), \
139 src_im.GetStride(0) * dtype.ByteSize(), src_size, src_roi, \
140 static_cast<npp_dtype *>(dst_im.GetDataPtr()), \
141 dst_im.GetStride(0) * dtype.ByteSize(), dst_size, dst_roi, \
145 using npp_dtype = Npp8u;
148 }
else if (src_im.
GetShape(2) == 3) {
150 }
else if (src_im.
GetShape(2) == 4) {
154 using npp_dtype = Npp16u;
157 }
else if (src_im.
GetShape(2) == 3) {
159 }
else if (src_im.
GetShape(2) == 4) {
163 using npp_dtype = Npp32f;
166 }
else if (src_im.
GetShape(2) == 3) {
168 }
else if (src_im.
GetShape(2) == 4) {
181 "src_im and dst_im are not on the same device, got {} and {}.",
192 NppiSize mask_size = {kernel_size, kernel_size};
194 NppiSize src_size = {
static_cast<int>(src_im.
GetShape(1)),
195 static_cast<int>(src_im.
GetShape(0))};
196 NppiPoint src_offset = {0, 0};
199 NppiSize size_ROI = {
static_cast<int>(dst_im.
GetShape(1)),
200 static_cast<int>(dst_im.
GetShape(0))};
201 NppiPoint anchor = {kernel_size / 2, kernel_size / 2};
206 static_cast<const npp_dtype *>(src_im.GetDataPtr()), \
207 src_im.GetStride(0) * dtype.ByteSize(), src_size, src_offset, \
208 static_cast<npp_dtype *>(dst_im.GetDataPtr()), \
209 dst_im.GetStride(0) * dtype.ByteSize(), size_ROI, \
210 static_cast<const uint8_t *>(mask.GetDataPtr()), mask_size, \
211 anchor, NPP_BORDER_REPLICATE, context
213 using npp_dtype = Npp8u;
215 nppiDilateBorder_8u_C1R_Ctx(
NPP_ARGS);
216 }
else if (src_im.
GetShape(2) == 3) {
217 nppiDilateBorder_8u_C3R_Ctx(
NPP_ARGS);
218 }
else if (src_im.
GetShape(2) == 4) {
219 nppiDilateBorder_8u_C4R_Ctx(
NPP_ARGS);
222 using npp_dtype = Npp16u;
224 nppiDilateBorder_16u_C1R_Ctx(
NPP_ARGS);
225 }
else if (src_im.
GetShape(2) == 3) {
226 nppiDilateBorder_16u_C3R_Ctx(
NPP_ARGS);
227 }
else if (src_im.
GetShape(2) == 4) {
228 nppiDilateBorder_16u_C4R_Ctx(
NPP_ARGS);
231 using npp_dtype = Npp32f;
233 nppiDilateBorder_32f_C1R_Ctx(
NPP_ARGS);
234 }
else if (src_im.
GetShape(2) == 3) {
235 nppiDilateBorder_32f_C3R_Ctx(
NPP_ARGS);
236 }
else if (src_im.
GetShape(2) == 4) {
237 nppiDilateBorder_32f_C4R_Ctx(
NPP_ARGS);
251 "src_im and dst_im are not on the same device, got {} and {}.",
258 NppiSize src_size = {
static_cast<int>(src_im.
GetShape(1)),
259 static_cast<int>(src_im.
GetShape(0))};
260 NppiPoint src_offset = {0, 0};
263 NppiSize size_ROI = {
static_cast<int>(dst_im.
GetShape(1)),
264 static_cast<int>(dst_im.
GetShape(0))};
267 NppiSize kernel_size = {
static_cast<int>(kernel.
GetShape()[0]),
268 static_cast<int>(kernel.
GetShape()[1])};
269 NppiPoint anchor = {
static_cast<int>(kernel.
GetShape()[0] / 2),
270 static_cast<int>(kernel.
GetShape()[1] / 2)};
274 const float *kernel_ptr =
275 static_cast<const float *
>(kernel_flipped.
GetDataPtr());
280 static_cast<const npp_dtype *>(src_im.GetDataPtr()), \
281 src_im.GetStride(0) * dtype.ByteSize(), src_size, src_offset, \
282 static_cast<npp_dtype *>(dst_im.GetDataPtr()), \
283 dst_im.GetStride(0) * dtype.ByteSize(), size_ROI, kernel_ptr, \
284 kernel_size, anchor, NPP_BORDER_REPLICATE, context
286 using npp_dtype = Npp8u;
288 nppiFilterBorder32f_8u_C1R_Ctx(
NPP_ARGS);
289 }
else if (src_im.
GetShape(2) == 3) {
290 nppiFilterBorder32f_8u_C3R_Ctx(
NPP_ARGS);
291 }
else if (src_im.
GetShape(2) == 4) {
292 nppiFilterBorder32f_8u_C4R_Ctx(
NPP_ARGS);
295 using npp_dtype = Npp16u;
297 nppiFilterBorder32f_16u_C1R_Ctx(
NPP_ARGS);
298 }
else if (src_im.
GetShape(2) == 3) {
299 nppiFilterBorder32f_16u_C3R_Ctx(
NPP_ARGS);
300 }
else if (src_im.
GetShape(2) == 4) {
301 nppiFilterBorder32f_16u_C4R_Ctx(
NPP_ARGS);
304 using npp_dtype = Npp32f;
306 nppiFilterBorder_32f_C1R_Ctx(
NPP_ARGS);
307 }
else if (src_im.
GetShape(2) == 3) {
308 nppiFilterBorder_32f_C3R_Ctx(
NPP_ARGS);
309 }
else if (src_im.
GetShape(2) == 4) {
310 nppiFilterBorder_32f_C4R_Ctx(
NPP_ARGS);
323 float distance_sigma) {
326 "src_im and dst_im are not on the same device, got {} and {}.",
333 NppiSize src_size = {
static_cast<int>(src_im.
GetShape(1)),
334 static_cast<int>(src_im.
GetShape(0))};
335 NppiPoint src_offset = {0, 0};
338 NppiSize size_ROI = {
static_cast<int>(dst_im.
GetShape(1)),
339 static_cast<int>(dst_im.
GetShape(0))};
344 static_cast<const npp_dtype *>(src_im.GetDataPtr()), \
345 src_im.GetStride(0) * dtype.ByteSize(), src_size, src_offset, \
346 static_cast<npp_dtype *>(dst_im.GetDataPtr()), \
347 dst_im.GetStride(0) * dtype.ByteSize(), size_ROI, kernel_size / 2, \
348 1, value_sigma *value_sigma, distance_sigma *distance_sigma, \
349 NPP_BORDER_REPLICATE, context
351 using npp_dtype = Npp8u;
353 nppiFilterBilateralGaussBorder_8u_C1R_Ctx(
NPP_ARGS);
354 }
else if (src_im.
GetShape(2) == 3) {
355 nppiFilterBilateralGaussBorder_8u_C3R_Ctx(
NPP_ARGS);
358 using npp_dtype = Npp16u;
360 nppiFilterBilateralGaussBorder_16u_C1R_Ctx(
NPP_ARGS);
361 }
else if (src_im.
GetShape(2) == 3) {
362 nppiFilterBilateralGaussBorder_16u_C3R_Ctx(
NPP_ARGS);
365 using npp_dtype = Npp32f;
367 nppiFilterBilateralGaussBorder_32f_C1R_Ctx(
NPP_ARGS);
368 }
else if (src_im.
GetShape(2) == 3) {
369 nppiFilterBilateralGaussBorder_32f_C3R_Ctx(
NPP_ARGS);
384 "src_im and dst_im are not on the same device, got {} and {}.",
392 static_cast<float>(kernel_size / 2 + 1), 1.0f,
396 mask = mask / mask.
Sum({0});
397 mask = mask.
View({kernel_size, 1});
403 return Filter(src_im, dst_im, kernel);
413 "src_im, dst_im_dx, and dst_im_dy are not on the same device, "
414 "got {}, {} and {}.",
422 NppiSize src_size = {
static_cast<int>(src_im.
GetShape(1)),
423 static_cast<int>(src_im.
GetShape(0))};
424 NppiPoint src_offset = {0, 0};
427 NppiSize size_ROI = {
static_cast<int>(dst_im_dx.
GetShape(1)),
428 static_cast<int>(dst_im_dx.
GetShape(0))};
430 const static std::unordered_map<int, NppiMaskSize> kernel_size_dict = {
431 {3, NPP_MASK_SIZE_3_X_3},
432 {5, NPP_MASK_SIZE_5_X_5},
434 auto it = kernel_size_dict.find(kernel_size);
435 if (it == kernel_size_dict.end()) {
443 #define NPP_ARGS_DX \
444 static_cast<const npp_src_dtype *>(src_im.GetDataPtr()), \
445 src_im.GetStride(0) * dtype.ByteSize(), src_size, src_offset, \
446 static_cast<npp_dst_dtype *>(dst_im_dx.GetDataPtr()), \
447 dst_im_dx.GetStride(0) * dst_im_dx.GetDtype().ByteSize(), \
448 size_ROI, it->second, NPP_BORDER_REPLICATE, context
449 #define NPP_ARGS_DY \
450 static_cast<const npp_src_dtype *>(src_im.GetDataPtr()), \
451 src_im.GetStride(0) * dtype.ByteSize(), src_size, src_offset, \
452 static_cast<npp_dst_dtype *>(dst_im_dy.GetDataPtr()), \
453 dst_im_dy.GetStride(0) * dst_im_dy.GetDtype().ByteSize(), \
454 size_ROI, it->second, NPP_BORDER_REPLICATE, context
456 using npp_src_dtype = Npp8u;
457 using npp_dst_dtype = Npp16s;
458 nppiFilterSobelVertBorder_8u16s_C1R_Ctx(
NPP_ARGS_DX);
459 nppiFilterSobelHorizBorder_8u16s_C1R_Ctx(
NPP_ARGS_DY);
461 using npp_src_dtype = Npp32f;
462 using npp_dst_dtype = Npp32f;
463 nppiFilterSobelVertMaskBorder_32f_C1R_Ctx(
NPP_ARGS_DX);
464 nppiFilterSobelHorizMaskBorder_32f_C1R_Ctx(
NPP_ARGS_DY);
479 if (cuda_version < 10020) {
#define CLOUDVIEWER_CUDA_CHECK(err)
When CUDA is not enabled, this is a dummy class.
std::string ToString() const
Returns string representation of device, e.g. "CPU:0", "CUDA:0".
Tensor Reverse() const
Reverse a Tensor's elements by viewing the tensor as a 1D array.
Tensor Matmul(const Tensor &rhs) const
Tensor Sum(const SizeVector &dims, bool keepdim=false) const
static Tensor Arange(const Scalar start, const Scalar stop, const Scalar step=1, const Dtype dtype=core::Int64, const Device &device=core::Device("CPU:0"))
Create a 1D tensor with evenly spaced values in the given interval.
Tensor Neg_()
Element-wise negation of a tensor, in-place.
Tensor Exp() const
Element-wise exponential of a tensor, returning a new tensor.
Tensor View(const SizeVector &dst_shape) const
Device GetDevice() const override
static Tensor Ones(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with ones.
SizeVector GetShape() const
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
InterpType
Image interpolation algorithms.
@ Super
Super sampling interpolation (only downsample).
@ Lanczos
Lanczos filter interpolation.
@ Linear
Bilinear interpolation.
@ Nearest
Nearest neighbors interpolation.
@ Cubic
Bicubic interpolation.
static double dist(double x1, double y1, double x2, double y2)
static NppStreamContext MakeNPPContext()
void FilterSobel(const core::Tensor &src_im, core::Tensor &dst_im_dx, core::Tensor &dst_im_dy, int kernel_size)
void Resize(const cloudViewer::core::Tensor &src_im, cloudViewer::core::Tensor &dst_im, t::geometry::Image::InterpType interp_type)
void Dilate(const core::Tensor &src_im, core::Tensor &dst_im, int kernel_size)
void FilterGaussian(const core::Tensor &src_im, core::Tensor &dst_im, int kernel_size, float sigma)
void RGBToGray(const core::Tensor &src_im, core::Tensor &dst_im)
void Filter(const cloudViewer::core::Tensor &src_im, cloudViewer::core::Tensor &dst_im, const cloudViewer::core::Tensor &kernel)
void FilterBilateral(const core::Tensor &src_im, core::Tensor &dst_im, int kernel_size, float value_sigma, float distance_sigma)
Generic file read and write utility for python interface.