cpp_api/api/PointCloudImpl_8h_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 #include <Logging.h>


 #include <atomic>

 #include <vector>


 #include "cloudViewer/core/CUDAUtils.h"

 #include "cloudViewer/core/Dispatch.h"

 #include "cloudViewer/core/Dtype.h"

 #include "cloudViewer/core/MemoryManager.h"

 #include "cloudViewer/core/ParallelFor.h"

 #include "cloudViewer/core/SizeVector.h"

 #include "cloudViewer/core/Tensor.h"

 #include "cloudViewer/core/linalg/kernel/Matrix.h"

 #include "cloudViewer/core/linalg/kernel/SVD3x3.h"

 #include "cloudViewer/core/nns/NearestNeighborSearch.h"

 #include "cloudViewer/t/geometry/Utility.h"

 #include "cloudViewer/t/geometry/kernel/GeometryIndexer.h"

 #include "cloudViewer/t/geometry/kernel/GeometryMacros.h"

 #include "cloudViewer/t/geometry/kernel/PointCloud.h"


 namespace cloudViewer {

 namespace t {

 namespace geometry {

 namespace kernel {

 namespace pointcloud {


 #ifndef __CUDACC__

 using std::abs;

 using std::max;

 using std::min;

 using std::sqrt;

 #endif


 #if defined(__CUDACC__)

 void UnprojectCUDA

 #else

 void UnprojectCPU

 #endif

         (const core::Tensor& depth,

          utility::optional<std::reference_wrapper<const core::Tensor>>

                  image_colors,

          core::Tensor& points,

          utility::optional<std::reference_wrapper<core::Tensor>> colors,

          const core::Tensor& intrinsics,

          const core::Tensor& extrinsics,

          float depth_scale,

          float depth_max,

          int64_t stride) {


     const bool have_colors = image_colors.has_value();

     NDArrayIndexer depth_indexer(depth, 2);

     NDArrayIndexer image_colors_indexer;


     core::Tensor pose = t::geometry::InverseTransformation(extrinsics);

     TransformIndexer ti(intrinsics, pose, 1.0f);


     // Output

     int64_t rows_strided = depth_indexer.GetShape(0) / stride;

     int64_t cols_strided = depth_indexer.GetShape(1) / stride;


     points = core::Tensor({rows_strided * cols_strided, 3}, core::Float32,

                           depth.GetDevice());

     NDArrayIndexer point_indexer(points, 1);

     NDArrayIndexer colors_indexer;

     if (have_colors) {

         const auto& imcol = image_colors.value().get();

         image_colors_indexer = NDArrayIndexer{imcol, 2};

         colors.value().get() = core::Tensor({rows_strided * cols_strided, 3},

                                             core::Float32, imcol.GetDevice());

         colors_indexer = NDArrayIndexer(colors.value().get(), 1);

     }


     // Counter

 #if defined(__CUDACC__)

     core::Tensor count(std::vector<int>{0}, {}, core::Int32, depth.GetDevice());

     int* count_ptr = count.GetDataPtr<int>();

 #else

     std::atomic<int> count_atomic(0);

     std::atomic<int>* count_ptr = &count_atomic;

 #endif


     int64_t n = rows_strided * cols_strided;


     DISPATCH_DTYPE_TO_TEMPLATE(depth.GetDtype(), [&]() {

         core::ParallelFor(

                 depth.GetDevice(), n,

                 [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                     int64_t y = (workload_idx / cols_strided) * stride;

                     int64_t x = (workload_idx % cols_strided) * stride;


                     float d = *depth_indexer.GetDataPtr<scalar_t>(x, y) /

                               depth_scale;

                     if (d > 0 && d < depth_max) {

                         int idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);


                         float x_c = 0, y_c = 0, z_c = 0;

                         ti.Unproject(static_cast<float>(x),

                                      static_cast<float>(y), d, &x_c, &y_c,

                                      &z_c);


                         float* vertex = point_indexer.GetDataPtr<float>(idx);

                         ti.RigidTransform(x_c, y_c, z_c, vertex + 0, vertex + 1,

                                           vertex + 2);

                         if (have_colors) {

                             float* pcd_pixel =

                                     colors_indexer.GetDataPtr<float>(idx);

                             float* image_pixel =

                                     image_colors_indexer.GetDataPtr<float>(x,

                                                                            y);

                             *pcd_pixel = *image_pixel;

                             *(pcd_pixel + 1) = *(image_pixel + 1);

                             *(pcd_pixel + 2) = *(image_pixel + 2);

                         }

                     }

                 });

     });

 #if defined(__CUDACC__)

     int total_pts_count = count.Item<int>();

 #else

     int total_pts_count = (*count_ptr).load();

 #endif


 #ifdef __CUDACC__

     core::cuda::Synchronize();

 #endif

     points = points.Slice(0, 0, total_pts_count);

     if (have_colors) {

         colors.value().get() =

                 colors.value().get().Slice(0, 0, total_pts_count);

     }

 }


 #if defined(__CUDACC__)

 void GetPointMaskWithinAABBCUDA

 #else

 void GetPointMaskWithinAABBCPU

 #endif

         (const core::Tensor& points,

          const core::Tensor& min_bound,

          const core::Tensor& max_bound,

          core::Tensor& mask) {


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {

         const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();

         const int64_t n = points.GetLength();

         const scalar_t* min_bound_ptr = min_bound.GetDataPtr<scalar_t>();

         const scalar_t* max_bound_ptr = max_bound.GetDataPtr<scalar_t>();

         bool* mask_ptr = mask.GetDataPtr<bool>();


         core::ParallelFor(

                 points.GetDevice(), n,

                 [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                     const scalar_t x = points_ptr[3 * workload_idx + 0];

                     const scalar_t y = points_ptr[3 * workload_idx + 1];

                     const scalar_t z = points_ptr[3 * workload_idx + 2];


                     if (x >= min_bound_ptr[0] && x <= max_bound_ptr[0] &&

                         y >= min_bound_ptr[1] && y <= max_bound_ptr[1] &&

                         z >= min_bound_ptr[2] && z <= max_bound_ptr[2]) {

                         mask_ptr[workload_idx] = true;

                     } else {

                         mask_ptr[workload_idx] = false;

                     }

                 });

     });

 }


 #if defined(__CUDACC__)

 void GetPointMaskWithinOBBCUDA

 #else

 void GetPointMaskWithinOBBCPU

 #endif

         (const core::Tensor& points,

          const core::Tensor& center,

          const core::Tensor& rotation,

          const core::Tensor& extent,

          core::Tensor& mask) {

     const core::Tensor half_extent = extent.Div(2);

     // Since we will extract 3 rotation axis from matrix and use it inside

     // kernel, the transpose is needed.

     const core::Tensor rotation_t = rotation.Transpose(0, 1).Contiguous();

     const core::Tensor pd = points - center;

     const int64_t n = points.GetLength();


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {

         const scalar_t* pd_ptr = pd.GetDataPtr<scalar_t>();

         // const scalar_t* center_ptr = center.GetDataPtr<scalar_t>();

         const scalar_t* rotation_ptr = rotation_t.GetDataPtr<scalar_t>();

         const scalar_t* half_extent_ptr = half_extent.GetDataPtr<scalar_t>();

         bool* mask_ptr = mask.GetDataPtr<bool>();


         core::ParallelFor(points.GetDevice(), n,

                           [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                               int64_t idx = 3 * workload_idx;

                               if (abs(core::linalg::kernel::dot_3x1(

                                           pd_ptr + idx, rotation_ptr)) <=

                                           half_extent_ptr[0] &&

                                   abs(core::linalg::kernel::dot_3x1(

                                           pd_ptr + idx, rotation_ptr + 3)) <=

                                           half_extent_ptr[1] &&

                                   abs(core::linalg::kernel::dot_3x1(

                                           pd_ptr + idx, rotation_ptr + 6)) <=

                                           half_extent_ptr[2]) {

                                   mask_ptr[workload_idx] = true;

                               } else {

                                   mask_ptr[workload_idx] = false;

                               }

                           });

     });

 }


 #if defined(__CUDACC__)

 void NormalizeNormalsCUDA

 #else

 void NormalizeNormalsCPU

 #endif

         (core::Tensor& normals) {

     const core::Dtype dtype = normals.GetDtype();

     const int64_t n = normals.GetLength();


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         scalar_t* ptr = normals.GetDataPtr<scalar_t>();


         core::ParallelFor(normals.GetDevice(), n,

                           [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                               int64_t idx = 3 * workload_idx;

                               scalar_t x = ptr[idx];

                               scalar_t y = ptr[idx + 1];

                               scalar_t z = ptr[idx + 2];

                               scalar_t norm = sqrt(x * x + y * y + z * z);

                               if (norm > 0) {

                                   x /= norm;

                                   y /= norm;

                                   z /= norm;

                               }

                               ptr[idx] = x;

                               ptr[idx + 1] = y;

                               ptr[idx + 2] = z;

                           });

     });

 }


 #if defined(__CUDACC__)

 void OrientNormalsToAlignWithDirectionCUDA

 #else

 void OrientNormalsToAlignWithDirectionCPU

 #endif

         (core::Tensor& normals, const core::Tensor& direction) {

     const core::Dtype dtype = normals.GetDtype();

     const int64_t n = normals.GetLength();


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         scalar_t* ptr = normals.GetDataPtr<scalar_t>();

         const scalar_t* direction_ptr = direction.GetDataPtr<scalar_t>();


         core::ParallelFor(normals.GetDevice(), n,

                           [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                               int64_t idx = 3 * workload_idx;

                               scalar_t* normal = ptr + idx;

                               const scalar_t norm = sqrt(normal[0] * normal[0] +

                                                          normal[1] * normal[1] +

                                                          normal[2] * normal[2]);

                               if (norm == 0.0) {

                                   normal[0] = direction_ptr[0];

                                   normal[1] = direction_ptr[1];

                                   normal[2] = direction_ptr[2];

                               } else if (core::linalg::kernel::dot_3x1(

                                                  normal, direction_ptr) < 0) {

                                   normal[0] *= -1;

                                   normal[1] *= -1;

                                   normal[2] *= -1;

                               }

                           });

     });

 }


 #if defined(__CUDACC__)

 void OrientNormalsTowardsCameraLocationCUDA

 #else

 void OrientNormalsTowardsCameraLocationCPU

 #endif

         (const core::Tensor& points,

          core::Tensor& normals,

          const core::Tensor& camera) {

     const core::Dtype dtype = points.GetDtype();

     const int64_t n = normals.GetLength();


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();

         const scalar_t* camera_ptr = camera.GetDataPtr<scalar_t>();

         const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();


         core::ParallelFor(

                 normals.GetDevice(), n,

                 [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                     int64_t idx = 3 * workload_idx;

                     scalar_t* normal = normals_ptr + idx;

                     const scalar_t* point = points_ptr + idx;

                     const scalar_t reference[3] = {camera_ptr[0] - point[0],

                                                    camera_ptr[1] - point[1],

                                                    camera_ptr[2] - point[2]};

                     const scalar_t norm =

                             sqrt(normal[0] * normal[0] + normal[1] * normal[1] +

                                  normal[2] * normal[2]);

                     if (norm == 0.0) {

                         normal[0] = reference[0];

                         normal[1] = reference[1];

                         normal[2] = reference[2];

                         const scalar_t norm_new = sqrt(normal[0] * normal[0] +

                                                        normal[1] * normal[1] +

                                                        normal[2] * normal[2]);

                         if (norm_new == 0.0) {

                             normal[0] = 0.0;

                             normal[1] = 0.0;

                             normal[2] = 1.0;

                         } else {

                             normal[0] /= norm_new;

                             normal[1] /= norm_new;

                             normal[2] /= norm_new;

                         }

                     } else if (core::linalg::kernel::dot_3x1(normal,

                                                              reference) < 0) {

                         normal[0] *= -1;

                         normal[1] *= -1;

                         normal[2] *= -1;

                     }

                 });

     });

 }


 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE void GetCoordinateSystemOnPlane(const scalar_t* query,

                                                         scalar_t* u,

                                                         scalar_t* v) {

     // Unless the x and y coords are both close to zero, we can simply take

     // ( -y, x, 0 ) and normalize it. If both x and y are close to zero,

     // then the vector is close to the z-axis, so it's far from colinear to

     // the x-axis for instance. So we take the crossed product with (1,0,0)

     // and normalize it.

     if (!(abs(query[0] - query[2]) < 1e-6) ||

         !(abs(query[1] - query[2]) < 1e-6)) {

         const scalar_t norm2_inv =

                 1.0 / sqrt(query[0] * query[0] + query[1] * query[1]);

         v[0] = -1 * query[1] * norm2_inv;

         v[1] = query[0] * norm2_inv;

         v[2] = 0;

     } else {

         const scalar_t norm2_inv =

                 1.0 / sqrt(query[1] * query[1] + query[2] * query[2]);

         v[0] = 0;

         v[1] = -1 * query[2] * norm2_inv;

         v[2] = query[1] * norm2_inv;

     }


     core::linalg::kernel::cross_3x1(query, v, u);

 }


 template <typename scalar_t>

 inline CLOUDVIEWER_HOST_DEVICE void Swap(scalar_t* x, scalar_t* y) {

     scalar_t tmp = *x;

     *x = *y;

     *y = tmp;

 }


 template <typename scalar_t>

 inline CLOUDVIEWER_HOST_DEVICE void Heapify(scalar_t* arr, int n, int root) {

     int largest = root;

     int l = 2 * root + 1;

     int r = 2 * root + 2;


     if (l < n && arr[l] > arr[largest]) {

         largest = l;

     }

     if (r < n && arr[r] > arr[largest]) {

         largest = r;

     }

     if (largest != root) {

         Swap<scalar_t>(&arr[root], &arr[largest]);

         Heapify<scalar_t>(arr, n, largest);

     }

 }


 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE void HeapSort(scalar_t* arr, int n) {

     for (int i = n / 2 - 1; i >= 0; i--) Heapify(arr, n, i);


     for (int i = n - 1; i > 0; i--) {

         Swap<scalar_t>(&arr[0], &arr[i]);

         Heapify<scalar_t>(arr, i, 0);

     }

 }


 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE bool IsBoundaryPoints(const scalar_t* angles,

                                               int counts,

                                               double angle_threshold) {

     scalar_t diff;

     scalar_t max_diff = 0;

     // Compute the maximal angle difference between two consecutive angles.

     for (int i = 0; i < counts - 1; i++) {

         diff = angles[i + 1] - angles[i];

         max_diff = max(max_diff, diff);

     }


     // Get the angle difference between the last and the first.

     diff = 2 * M_PI - angles[counts - 1] + angles[0];

     max_diff = max(max_diff, diff);


     return max_diff > angle_threshold * M_PI / 180.0 ? true : false;

 }


 #if defined(__CUDACC__)

 void ComputeBoundaryPointsCUDA

 #else

 void ComputeBoundaryPointsCPU

 #endif

         (const core::Tensor& points,

          const core::Tensor& normals,

          const core::Tensor& indices,

          const core::Tensor& counts,

          core::Tensor& mask,

          double angle_threshold) {

     const int nn_size = indices.GetShape()[1];


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {

         const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();

         const scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();

         const int64_t n = points.GetLength();

         const int32_t* indices_ptr = indices.GetDataPtr<int32_t>();

         const int32_t* counts_ptr = counts.GetDataPtr<int32_t>();

         bool* mask_ptr = mask.GetDataPtr<bool>();


         core::Tensor angles = core::Tensor::Full(

                 indices.GetShape(), -10, points.GetDtype(), points.GetDevice());

         scalar_t* angles_ptr = angles.GetDataPtr<scalar_t>();


         core::ParallelFor(

                 points.GetDevice(), n,

                 [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                     scalar_t u[3], v[3];

                     GetCoordinateSystemOnPlane(normals_ptr + 3 * workload_idx,

                                                u, v);


                     // Ignore the point itself.

                     int indices_size = counts_ptr[workload_idx] - 1;

                     if (indices_size > 0) {

                         const scalar_t* query = points_ptr + 3 * workload_idx;

                         for (int i = 1; i < indices_size + 1; i++) {

                             const int idx = workload_idx * nn_size + i;


                             const scalar_t* point_ref =

                                     points_ptr + 3 * indices_ptr[idx];

                             const scalar_t delta[3] = {point_ref[0] - query[0],

                                                        point_ref[1] - query[1],

                                                        point_ref[2] - query[2]};

                             const scalar_t angle = atan2(

                                     core::linalg::kernel::dot_3x1(v, delta),

                                     core::linalg::kernel::dot_3x1(u, delta));


                             angles_ptr[idx] = angle;

                         }


                         // Sort the angles in ascending order.

                         HeapSort<scalar_t>(

                                 angles_ptr + workload_idx * nn_size + 1,

                                 indices_size);


                         mask_ptr[workload_idx] = IsBoundaryPoints<scalar_t>(

                                 angles_ptr + workload_idx * nn_size + 1,

                                 indices_size, angle_threshold);

                     }

                 });

     });

 }


 // This is a `two-pass` estimate method for covariance which is numerically more

 // robust than the `textbook` method generally used for covariance computation.

 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseRobustNormalizedCovarianceKernel(

         const scalar_t* points_ptr,

         const int32_t* indices_ptr,

         const int32_t& indices_count,

         scalar_t* covariance_ptr) {

     if (indices_count < 3) {

         covariance_ptr[0] = 1.0;

         covariance_ptr[1] = 0.0;

         covariance_ptr[2] = 0.0;

         covariance_ptr[3] = 0.0;

         covariance_ptr[4] = 1.0;

         covariance_ptr[5] = 0.0;

         covariance_ptr[6] = 0.0;

         covariance_ptr[7] = 0.0;

         covariance_ptr[8] = 1.0;

         return;

     }


     double centroid[3] = {0};

     for (int32_t i = 0; i < indices_count; ++i) {

         int32_t idx = 3 * indices_ptr[i];

         centroid[0] += points_ptr[idx];

         centroid[1] += points_ptr[idx + 1];

         centroid[2] += points_ptr[idx + 2];

     }


     centroid[0] /= indices_count;

     centroid[1] /= indices_count;

     centroid[2] /= indices_count;


     // cumulants must always be Float64 to ensure precision.

     double cumulants[6] = {0};

     for (int32_t i = 0; i < indices_count; ++i) {

         int32_t idx = 3 * indices_ptr[i];

         const double x = static_cast<double>(points_ptr[idx]) - centroid[0];

         const double y = static_cast<double>(points_ptr[idx + 1]) - centroid[1];

         const double z = static_cast<double>(points_ptr[idx + 2]) - centroid[2];


         cumulants[0] += x * x;

         cumulants[1] += y * y;

         cumulants[2] += z * z;


         cumulants[3] += x * y;

         cumulants[4] += x * z;

         cumulants[5] += y * z;

     }


     // Using Bessel's correction (dividing by (n - 1) instead of n).

     // Refer:

     // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance

     const double normalization_factor = static_cast<double>(indices_count - 1);

     for (int i = 0; i < 6; ++i) {

         cumulants[i] /= normalization_factor;

     }


     // Covariances(0, 0)

     covariance_ptr[0] = static_cast<scalar_t>(cumulants[0]);

     // Covariances(1, 1)

     covariance_ptr[4] = static_cast<scalar_t>(cumulants[1]);

     // Covariances(2, 2)

     covariance_ptr[8] = static_cast<scalar_t>(cumulants[2]);


     // Covariances(0, 1) = Covariances(1, 0)

     covariance_ptr[1] = static_cast<scalar_t>(cumulants[3]);

     covariance_ptr[3] = covariance_ptr[1];


     // Covariances(0, 2) = Covariances(2, 0)

     covariance_ptr[2] = static_cast<scalar_t>(cumulants[4]);

     covariance_ptr[6] = covariance_ptr[2];


     // Covariances(1, 2) = Covariances(2, 1)

     covariance_ptr[5] = static_cast<scalar_t>(cumulants[5]);

     covariance_ptr[7] = covariance_ptr[5];

 }


 #if defined(__CUDACC__)

 void EstimateCovariancesUsingHybridSearchCUDA

 #else

 void EstimateCovariancesUsingHybridSearchCPU

 #endif

         (const core::Tensor& points,

          core::Tensor& covariances,

          const double& radius,

          const int64_t& max_nn) {

     core::Dtype dtype = points.GetDtype();

     int64_t n = points.GetLength();


     core::nns::NearestNeighborSearch tree(points, core::Int32);

     bool check = tree.HybridIndex(radius);

     if (!check) {

         utility::LogError("Building FixedRadiusIndex failed.");

     }


     core::Tensor indices, distance, counts;

     std::tie(indices, distance, counts) =

             tree.HybridSearch(points, radius, max_nn);


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();

         int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();

         int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();

         scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();


         core::ParallelFor(

                 points.GetDevice(), n,

                 [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                     // NNS [Hybrid Search].

                     const int32_t neighbour_offset = max_nn * workload_idx;

                     // Count of valid correspondences per point.

                     const int32_t neighbour_count =

                             neighbour_counts_ptr[workload_idx];

                     // Covariance is of shape {3, 3}, so it has an

                     // offset factor of 9 x workload_idx.

                     const int32_t covariances_offset = 9 * workload_idx;


                     EstimatePointWiseRobustNormalizedCovarianceKernel(

                             points_ptr,

                             neighbour_indices_ptr + neighbour_offset,

                             neighbour_count,

                             covariances_ptr + covariances_offset);

                 });

     });


     core::cuda::Synchronize(points.GetDevice());

 }


 #if defined(__CUDACC__)

 void EstimateCovariancesUsingRadiusSearchCUDA

 #else

 void EstimateCovariancesUsingRadiusSearchCPU

 #endif

         (const core::Tensor& points,

          core::Tensor& covariances,

          const double& radius) {

     core::Dtype dtype = points.GetDtype();

     int64_t n = points.GetLength();


     core::nns::NearestNeighborSearch tree(points, core::Int32);

     bool check = tree.FixedRadiusIndex(radius);

     if (!check) {

         utility::LogError("Building Radius-Index failed.");

     }


     core::Tensor indices, distance, counts;

     std::tie(indices, distance, counts) =

             tree.FixedRadiusSearch(points, radius);


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();

         const int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();

         const int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();

         scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();


         core::ParallelFor(points.GetDevice(), n,

                           [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                               const int32_t neighbour_offset =

                                       neighbour_counts_ptr[workload_idx];

                               const int32_t neighbour_count =

                                       (neighbour_counts_ptr[workload_idx + 1] -

                                        neighbour_counts_ptr[workload_idx]);

                               // Covariance is of shape {3, 3}, so it has an

                               // offset factor of 9 x workload_idx.

                               const int32_t covariances_offset =

                                       9 * workload_idx;


                               EstimatePointWiseRobustNormalizedCovarianceKernel(

                                       points_ptr,

                                       neighbour_indices_ptr + neighbour_offset,

                                       neighbour_count,

                                       covariances_ptr + covariances_offset);

                           });

     });


     core::cuda::Synchronize(points.GetDevice());

 }


 #if defined(__CUDACC__)

 void EstimateCovariancesUsingKNNSearchCUDA

 #else

 void EstimateCovariancesUsingKNNSearchCPU

 #endif

         (const core::Tensor& points,

          core::Tensor& covariances,

          const int64_t& max_nn) {

     core::Dtype dtype = points.GetDtype();

     int64_t n = points.GetLength();


     core::nns::NearestNeighborSearch tree(points, core::Int32);

     bool check = tree.KnnIndex();

     if (!check) {

         utility::LogError("Building KNN-Index failed.");

     }


     core::Tensor indices, distance;

     std::tie(indices, distance) = tree.KnnSearch(points, max_nn);


     indices = indices.Contiguous();

     int32_t nn_count = static_cast<int32_t>(indices.GetShape()[1]);


     if (nn_count < 3) {

         utility::LogError(

                 "Not enough neighbors to compute Covariances / Normals. "

                 "Try "

                 "increasing the max_nn parameter.");

     }


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         auto points_ptr = points.GetDataPtr<scalar_t>();

         auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();

         auto covariances_ptr = covariances.GetDataPtr<scalar_t>();


         core::ParallelFor(

                 points.GetDevice(), n,

                 [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                     // NNS [KNN Search].

                     const int32_t neighbour_offset = nn_count * workload_idx;

                     // Covariance is of shape {3, 3}, so it has an offset

                     // factor of 9 x workload_idx.

                     const int32_t covariances_offset = 9 * workload_idx;


                     EstimatePointWiseRobustNormalizedCovarianceKernel(

                             points_ptr,

                             neighbour_indices_ptr + neighbour_offset, nn_count,

                             covariances_ptr + covariances_offset);

                 });

     });


     core::cuda::Synchronize(points.GetDevice());

 }


 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE void ComputeEigenvector0(const scalar_t* A,

                                                  const scalar_t eval0,

                                                  scalar_t* eigen_vector0) {

     scalar_t row0[3] = {A[0] - eval0, A[1], A[2]};

     scalar_t row1[3] = {A[1], A[4] - eval0, A[5]};

     scalar_t row2[3] = {A[2], A[5], A[8] - eval0};


     scalar_t r0xr1[3], r0xr2[3], r1xr2[3];


     core::linalg::kernel::cross_3x1(row0, row1, r0xr1);

     core::linalg::kernel::cross_3x1(row0, row2, r0xr2);

     core::linalg::kernel::cross_3x1(row1, row2, r1xr2);


     scalar_t d0 = core::linalg::kernel::dot_3x1(r0xr1, r0xr1);

     scalar_t d1 = core::linalg::kernel::dot_3x1(r0xr2, r0xr2);

     scalar_t d2 = core::linalg::kernel::dot_3x1(r1xr2, r1xr2);


     scalar_t dmax = d0;

     int imax = 0;

     if (d1 > dmax) {

         dmax = d1;

         imax = 1;

     }

     if (d2 > dmax) {

         imax = 2;

     }


     if (imax == 0) {

         scalar_t sqrt_d = sqrt(d0);

         eigen_vector0[0] = r0xr1[0] / sqrt_d;

         eigen_vector0[1] = r0xr1[1] / sqrt_d;

         eigen_vector0[2] = r0xr1[2] / sqrt_d;

         return;

     } else if (imax == 1) {

         scalar_t sqrt_d = sqrt(d1);

         eigen_vector0[0] = r0xr2[0] / sqrt_d;

         eigen_vector0[1] = r0xr2[1] / sqrt_d;

         eigen_vector0[2] = r0xr2[2] / sqrt_d;

         return;

     } else {

         scalar_t sqrt_d = sqrt(d2);

         eigen_vector0[0] = r1xr2[0] / sqrt_d;

         eigen_vector0[1] = r1xr2[1] / sqrt_d;

         eigen_vector0[2] = r1xr2[2] / sqrt_d;

         return;

     }

 }


 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE void ComputeEigenvector1(const scalar_t* A,

                                                  const scalar_t* evec0,

                                                  const scalar_t eval1,

                                                  scalar_t* eigen_vector1) {

     scalar_t U[3];

     if (abs(evec0[0]) > abs(evec0[1])) {

         scalar_t inv_length =

                 1.0 / sqrt(evec0[0] * evec0[0] + evec0[2] * evec0[2]);

         U[0] = -evec0[2] * inv_length;

         U[1] = 0.0;

         U[2] = evec0[0] * inv_length;

     } else {

         scalar_t inv_length =

                 1.0 / sqrt(evec0[1] * evec0[1] + evec0[2] * evec0[2]);

         U[0] = 0.0;

         U[1] = evec0[2] * inv_length;

         U[2] = -evec0[1] * inv_length;

     }

     scalar_t V[3], AU[3], AV[3];

     core::linalg::kernel::cross_3x1(evec0, U, V);

     core::linalg::kernel::matmul3x3_3x1(A, U, AU);

     core::linalg::kernel::matmul3x3_3x1(A, V, AV);


     scalar_t m00 = core::linalg::kernel::dot_3x1(U, AU) - eval1;

     scalar_t m01 = core::linalg::kernel::dot_3x1(U, AV);

     scalar_t m11 = core::linalg::kernel::dot_3x1(V, AV) - eval1;


     scalar_t absM00 = abs(m00);

     scalar_t absM01 = abs(m01);

     scalar_t absM11 = abs(m11);

     scalar_t max_abs_comp;


     if (absM00 >= absM11) {

         max_abs_comp = max(absM00, absM01);

         if (max_abs_comp > 0) {

             if (absM00 >= absM01) {

                 m01 /= m00;

                 m00 = 1 / sqrt(1 + m01 * m01);

                 m01 *= m00;

             } else {

                 m00 /= m01;

                 m01 = 1 / sqrt(1 + m00 * m00);

                 m00 *= m01;

             }

             eigen_vector1[0] = m01 * U[0] - m00 * V[0];

             eigen_vector1[1] = m01 * U[1] - m00 * V[1];

             eigen_vector1[2] = m01 * U[2] - m00 * V[2];

             return;

         } else {

             eigen_vector1[0] = U[0];

             eigen_vector1[1] = U[1];

             eigen_vector1[2] = U[2];

             return;

         }

     } else {

         max_abs_comp = max(absM11, absM01);

         if (max_abs_comp > 0) {

             if (absM11 >= absM01) {

                 m01 /= m11;

                 m11 = 1 / sqrt(1 + m01 * m01);

                 m01 *= m11;

             } else {

                 m11 /= m01;

                 m01 = 1 / sqrt(1 + m11 * m11);

                 m11 *= m01;

             }

             eigen_vector1[0] = m11 * U[0] - m01 * V[0];

             eigen_vector1[1] = m11 * U[1] - m01 * V[1];

             eigen_vector1[2] = m11 * U[2] - m01 * V[2];

             return;

         } else {

             eigen_vector1[0] = U[0];

             eigen_vector1[1] = U[1];

             eigen_vector1[2] = U[2];

             return;

         }

     }

 }


 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseNormalsWithFastEigen3x3(

         const scalar_t* covariance_ptr, scalar_t* normals_ptr) {

     // Based on:

     // https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf

     // which handles edge cases like points on a plane.

     scalar_t max_coeff = covariance_ptr[0];


     for (int i = 1; i < 9; ++i) {

         if (max_coeff < covariance_ptr[i]) {

             max_coeff = covariance_ptr[i];

         }

     }


     if (max_coeff == 0) {

         normals_ptr[0] = 0.0;

         normals_ptr[1] = 0.0;

         normals_ptr[2] = 0.0;

         return;

     }


     scalar_t A[9] = {0};


     for (int i = 0; i < 9; ++i) {

         A[i] = covariance_ptr[i] / max_coeff;

     }


     scalar_t norm = A[1] * A[1] + A[2] * A[2] + A[5] * A[5];


     if (norm > 0) {

         scalar_t eval[3];

         scalar_t evec0[3];

         scalar_t evec1[3];

         scalar_t evec2[3];


         scalar_t q = (A[0] + A[4] + A[8]) / 3.0;


         scalar_t b00 = A[0] - q;

         scalar_t b11 = A[4] - q;

         scalar_t b22 = A[8] - q;


         scalar_t p =

                 sqrt((b00 * b00 + b11 * b11 + b22 * b22 + norm * 2.0) / 6.0);


         scalar_t c00 = b11 * b22 - A[5] * A[5];

         scalar_t c01 = A[1] * b22 - A[5] * A[2];

         scalar_t c02 = A[1] * A[5] - b11 * A[2];

         scalar_t det = (b00 * c00 - A[1] * c01 + A[2] * c02) / (p * p * p);


         scalar_t half_det = det * 0.5;

         half_det = min(max(half_det, static_cast<scalar_t>(-1.0)),

                        static_cast<scalar_t>(1.0));


         scalar_t angle = acos(half_det) / 3.0;

         const scalar_t two_thrids_pi = 2.09439510239319549;


         scalar_t beta2 = cos(angle) * 2.0;

         scalar_t beta0 = cos(angle + two_thrids_pi) * 2.0;

         scalar_t beta1 = -(beta0 + beta2);


         eval[0] = q + p * beta0;

         eval[1] = q + p * beta1;

         eval[2] = q + p * beta2;


         if (half_det >= 0) {

             ComputeEigenvector0<scalar_t>(A, eval[2], evec2);


             if (eval[2] < eval[0] && eval[2] < eval[1]) {

                 normals_ptr[0] = evec2[0];

                 normals_ptr[1] = evec2[1];

                 normals_ptr[2] = evec2[2];


                 return;

             }


             ComputeEigenvector1<scalar_t>(A, evec2, eval[1], evec1);


             if (eval[1] < eval[0] && eval[1] < eval[2]) {

                 normals_ptr[0] = evec1[0];

                 normals_ptr[1] = evec1[1];

                 normals_ptr[2] = evec1[2];


                 return;

             }


             normals_ptr[0] = evec1[1] * evec2[2] - evec1[2] * evec2[1];

             normals_ptr[1] = evec1[2] * evec2[0] - evec1[0] * evec2[2];

             normals_ptr[2] = evec1[0] * evec2[1] - evec1[1] * evec2[0];


             return;

         } else {

             ComputeEigenvector0<scalar_t>(A, eval[0], evec0);


             if (eval[0] < eval[1] && eval[0] < eval[2]) {

                 normals_ptr[0] = evec0[0];

                 normals_ptr[1] = evec0[1];

                 normals_ptr[2] = evec0[2];

                 return;

             }


             ComputeEigenvector1<scalar_t>(A, evec0, eval[1], evec1);


             if (eval[1] < eval[0] && eval[1] < eval[2]) {

                 normals_ptr[0] = evec1[0];

                 normals_ptr[1] = evec1[1];

                 normals_ptr[2] = evec1[2];

                 return;

             }


             normals_ptr[0] = evec0[1] * evec1[2] - evec0[2] * evec1[1];

             normals_ptr[1] = evec0[2] * evec1[0] - evec0[0] * evec1[2];

             normals_ptr[2] = evec0[0] * evec1[1] - evec0[1] * evec1[0];

             return;

         }

     } else {

         if (covariance_ptr[0] < covariance_ptr[4] &&

             covariance_ptr[0] < covariance_ptr[8]) {

             normals_ptr[0] = 1.0;

             normals_ptr[1] = 0.0;

             normals_ptr[2] = 0.0;

             return;

         } else if (covariance_ptr[4] < covariance_ptr[0] &&

                    covariance_ptr[4] < covariance_ptr[8]) {

             normals_ptr[0] = 0.0;

             normals_ptr[1] = 1.0;

             normals_ptr[2] = 0.0;

             return;

         } else {

             normals_ptr[0] = 0.0;

             normals_ptr[1] = 0.0;

             normals_ptr[2] = 1.0;

             return;

         }

     }

 }


 #if defined(__CUDACC__)

 void EstimateNormalsFromCovariancesCUDA

 #else

 void EstimateNormalsFromCovariancesCPU

 #endif

         (const core::Tensor& covariances,

          core::Tensor& normals,

          const bool has_normals) {

     core::Dtype dtype = covariances.GetDtype();

     int64_t n = covariances.GetLength();


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         const scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();

         scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();


         core::ParallelFor(

                 covariances.GetDevice(), n,

                 [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                     int32_t covariances_offset = 9 * workload_idx;

                     int32_t normals_offset = 3 * workload_idx;

                     scalar_t normals_output[3] = {0};

                     EstimatePointWiseNormalsWithFastEigen3x3<scalar_t>(

                             covariances_ptr + covariances_offset,

                             normals_output);


                     if ((normals_output[0] * normals_output[0] +

                          normals_output[1] * normals_output[1] +

                          normals_output[2] * normals_output[2]) == 0.0 &&

                         !has_normals) {

                         normals_output[0] = 0.0;

                         normals_output[1] = 0.0;

                         normals_output[2] = 1.0;

                     }

                     if (has_normals) {

                         if ((normals_ptr[normals_offset] * normals_output[0] +

                              normals_ptr[normals_offset + 1] *

                                      normals_output[1] +

                              normals_ptr[normals_offset + 2] *

                                      normals_output[2]) < 0.0) {

                             normals_output[0] *= -1;

                             normals_output[1] *= -1;

                             normals_output[2] *= -1;

                         }

                     }


                     normals_ptr[normals_offset] = normals_output[0];

                     normals_ptr[normals_offset + 1] = normals_output[1];

                     normals_ptr[normals_offset + 2] = normals_output[2];

                 });

     });


     core::cuda::Synchronize(covariances.GetDevice());

 }


 template <typename scalar_t>

 CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseColorGradientKernel(

         const scalar_t* points_ptr,

         const scalar_t* normals_ptr,

         const scalar_t* colors_ptr,

         const int32_t& idx_offset,

         const int32_t* indices_ptr,

         const int32_t& indices_count,

         scalar_t* color_gradients_ptr) {

     if (indices_count < 4) {

         color_gradients_ptr[idx_offset] = 0;

         color_gradients_ptr[idx_offset + 1] = 0;

         color_gradients_ptr[idx_offset + 2] = 0;

     } else {

         scalar_t vt[3] = {points_ptr[idx_offset], points_ptr[idx_offset + 1],

                           points_ptr[idx_offset + 2]};


         scalar_t nt[3] = {normals_ptr[idx_offset], normals_ptr[idx_offset + 1],

                           normals_ptr[idx_offset + 2]};


         scalar_t it = (colors_ptr[idx_offset] + colors_ptr[idx_offset + 1] +

                        colors_ptr[idx_offset + 2]) /

                       3.0;


         scalar_t AtA[9] = {0};

         scalar_t Atb[3] = {0};


         // approximate image gradient of vt's tangential plane

         // projection (p') of a point p on a plane defined by

         // normal n, where o is the closest point to p on the

         // plane, is given by:

         // p' = p - [(p - o).dot(n)] * n p'

         // => p - [(p.dot(n) - s)] * n [where s = o.dot(n)]


         // Computing the scalar s.

         scalar_t s = vt[0] * nt[0] + vt[1] * nt[1] + vt[2] * nt[2];


         int i = 1;

         for (; i < indices_count; i++) {

             int64_t neighbour_idx_offset = 3 * indices_ptr[i];


             if (neighbour_idx_offset == -1) {

                 break;

             }


             scalar_t vt_adj[3] = {points_ptr[neighbour_idx_offset],

                                   points_ptr[neighbour_idx_offset + 1],

                                   points_ptr[neighbour_idx_offset + 2]};


             // p' = p - d * n [where d = p.dot(n) - s]

             // Computing the scalar d.

             scalar_t d = vt_adj[0] * nt[0] + vt_adj[1] * nt[1] +

                          vt_adj[2] * nt[2] - s;


             // Computing the p' (projection of the point).

             scalar_t vt_proj[3] = {vt_adj[0] - d * nt[0], vt_adj[1] - d * nt[1],

                                    vt_adj[2] - d * nt[2]};


             scalar_t it_adj = (colors_ptr[neighbour_idx_offset + 0] +

                                colors_ptr[neighbour_idx_offset + 1] +

                                colors_ptr[neighbour_idx_offset + 2]) /

                               3.0;


             scalar_t A[3] = {vt_proj[0] - vt[0], vt_proj[1] - vt[1],

                              vt_proj[2] - vt[2]};


             AtA[0] += A[0] * A[0];

             AtA[1] += A[1] * A[0];

             AtA[2] += A[2] * A[0];

             AtA[4] += A[1] * A[1];

             AtA[5] += A[2] * A[1];

             AtA[8] += A[2] * A[2];


             scalar_t b = it_adj - it;


             Atb[0] += A[0] * b;

             Atb[1] += A[1] * b;

             Atb[2] += A[2] * b;

         }


         // Orthogonal constraint.

         scalar_t A[3] = {(i - 1) * nt[0], (i - 1) * nt[1], (i - 1) * nt[2]};


         AtA[0] += A[0] * A[0];

         AtA[1] += A[0] * A[1];

         AtA[2] += A[0] * A[2];

         AtA[4] += A[1] * A[1];

         AtA[5] += A[1] * A[2];

         AtA[8] += A[2] * A[2];


         // Symmetry.

         AtA[3] = AtA[1];

         AtA[6] = AtA[2];

         AtA[7] = AtA[5];


         core::linalg::kernel::solve_svd3x3(AtA, Atb,

                                            color_gradients_ptr + idx_offset);

     }

 }


 #if defined(__CUDACC__)

 void EstimateColorGradientsUsingHybridSearchCUDA

 #else

 void EstimateColorGradientsUsingHybridSearchCPU

 #endif

         (const core::Tensor& points,

          const core::Tensor& normals,

          const core::Tensor& colors,

          core::Tensor& color_gradients,

          const double& radius,

          const int64_t& max_nn) {

     core::Dtype dtype = points.GetDtype();

     int64_t n = points.GetLength();


     core::nns::NearestNeighborSearch tree(points, core::Int32);


     bool check = tree.HybridIndex(radius);

     if (!check) {

         utility::LogError("NearestNeighborSearch::HybridIndex is not set.");

     }


     core::Tensor indices, distance, counts;

     std::tie(indices, distance, counts) =

             tree.HybridSearch(points, radius, max_nn);


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         auto points_ptr = points.GetDataPtr<scalar_t>();

         auto normals_ptr = normals.GetDataPtr<scalar_t>();

         auto colors_ptr = colors.GetDataPtr<scalar_t>();

         auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();

         auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();

         auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();


         core::ParallelFor(points.GetDevice(), n,

                           [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                               // NNS [Hybrid Search].

                               int32_t neighbour_offset = max_nn * workload_idx;

                               // Count of valid correspondences per point.

                               int32_t neighbour_count =

                                       neighbour_counts_ptr[workload_idx];

                               int32_t idx_offset = 3 * workload_idx;


                               EstimatePointWiseColorGradientKernel(

                                       points_ptr, normals_ptr, colors_ptr,

                                       idx_offset,

                                       neighbour_indices_ptr + neighbour_offset,

                                       neighbour_count, color_gradients_ptr);

                           });

     });


     core::cuda::Synchronize(points.GetDevice());

 }


 #if defined(__CUDACC__)

 void EstimateColorGradientsUsingKNNSearchCUDA

 #else

 void EstimateColorGradientsUsingKNNSearchCPU

 #endif

         (const core::Tensor& points,

          const core::Tensor& normals,

          const core::Tensor& colors,

          core::Tensor& color_gradients,

          const int64_t& max_nn) {

     core::Dtype dtype = points.GetDtype();

     int64_t n = points.GetLength();


     core::nns::NearestNeighborSearch tree(points, core::Int32);


     bool check = tree.KnnIndex();

     if (!check) {

         utility::LogError("KnnIndex is not set.");

     }


     core::Tensor indices, distance;

     std::tie(indices, distance) = tree.KnnSearch(points, max_nn);


     indices = indices.To(core::Int32).Contiguous();

     int64_t nn_count = indices.GetShape()[1];


     if (nn_count < 4) {

         utility::LogError(

                 "Not enough neighbors to compute Covariances / Normals. "

                 "Try "

                 "changing the search parameter.");

     }


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         auto points_ptr = points.GetDataPtr<scalar_t>();

         auto normals_ptr = normals.GetDataPtr<scalar_t>();

         auto colors_ptr = colors.GetDataPtr<scalar_t>();

         auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();

         auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();


         core::ParallelFor(points.GetDevice(), n,

                           [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                               int32_t neighbour_offset = max_nn * workload_idx;

                               int32_t idx_offset = 3 * workload_idx;


                               EstimatePointWiseColorGradientKernel(

                                       points_ptr, normals_ptr, colors_ptr,

                                       idx_offset,

                                       neighbour_indices_ptr + neighbour_offset,

                                       nn_count, color_gradients_ptr);

                           });

     });


     core::cuda::Synchronize(points.GetDevice());

 }


 #if defined(__CUDACC__)

 void EstimateColorGradientsUsingRadiusSearchCUDA

 #else

 void EstimateColorGradientsUsingRadiusSearchCPU

 #endif

         (const core::Tensor& points,

          const core::Tensor& normals,

          const core::Tensor& colors,

          core::Tensor& color_gradients,

          const double& radius) {

     core::Dtype dtype = points.GetDtype();

     int64_t n = points.GetLength();


     core::nns::NearestNeighborSearch tree(points, core::Int32);


     bool check = tree.FixedRadiusIndex(radius);

     if (!check) {

         utility::LogError("RadiusIndex is not set.");

     }


     core::Tensor indices, distance, counts;

     std::tie(indices, distance, counts) =

             tree.FixedRadiusSearch(points, radius);


     indices = indices.To(core::Int32).Contiguous();

     counts = counts.Contiguous();


     DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {

         auto points_ptr = points.GetDataPtr<scalar_t>();

         auto normals_ptr = normals.GetDataPtr<scalar_t>();

         auto colors_ptr = colors.GetDataPtr<scalar_t>();

         auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();

         auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();

         auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();


         core::ParallelFor(points.GetDevice(), n,

                           [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {

                               int32_t neighbour_offset =

                                       neighbour_counts_ptr[workload_idx];

                               // Count of valid correspondences per point.

                               const int32_t neighbour_count =

                                       (neighbour_counts_ptr[workload_idx + 1] -

                                        neighbour_counts_ptr[workload_idx]);

                               int32_t idx_offset = 3 * workload_idx;


                               EstimatePointWiseColorGradientKernel(

                                       points_ptr, normals_ptr, colors_ptr,

                                       idx_offset,

                                       neighbour_indices_ptr + neighbour_offset,

                                       neighbour_count, color_gradients_ptr);

                           });

     });


     core::cuda::Synchronize(points.GetDevice());

 }


 }  // namespace pointcloud

 }  // namespace kernel

 }  // namespace geometry

 }  // namespace t

 }  // namespace cloudViewer

CUDAUtils.h
Common CUDA utilities.

CLOUDVIEWER_HOST_DEVICE
#define CLOUDVIEWER_HOST_DEVICE
Definition: CUDAUtils.h:44

CLOUDVIEWER_DEVICE
#define CLOUDVIEWER_DEVICE
Definition: CUDAUtils.h:45

M_PI
constexpr double M_PI
Pi.
Definition: CVConst.h:19

Dispatch.h

DISPATCH_DTYPE_TO_TEMPLATE
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:31

DISPATCH_FLOAT_DTYPE_TO_TEMPLATE
#define DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:78

Dtype.h

normal
double normal[3]
Definition: FileIOFactory.cpp:53

has_normals
bool has_normals
Definition: FileIOFactory.cpp:151

count
int count
Definition: FileIOFactory.cpp:132

points
int points
Definition: FileIOFactory.cpp:144

GeometryIndexer.h

GeometryMacros.h

Matrix.h

MemoryManager.h

NearestNeighborSearch.h

ParallelFor.h

SVD3x3.h

SizeVector.h

Tensor.h

stride
size_t stride
Definition: TriangleMeshBuffers.cpp:166

Utility.h

cloudViewer::core::Dtype
Definition: Dtype.h:21

cloudViewer::core::Tensor
Definition: Tensor.h:32

cloudViewer::core::Tensor::Contiguous
Tensor Contiguous() const
Definition: Tensor.cpp:772

cloudViewer::core::Tensor::Transpose
Tensor Transpose(int64_t dim0, int64_t dim1) const
Transpose a Tensor by swapping dimension dim0 and dim1.
Definition: Tensor.cpp:1068

cloudViewer::core::Tensor::GetDataPtr
T * GetDataPtr()
Definition: Tensor.h:1144

cloudViewer::core::Tensor::GetShape
SizeVector GetShape() const
Definition: Tensor.h:1127

cloudViewer::core::Tensor::Div
Tensor Div(const Tensor &value) const
Divides a tensor and returns the resulting tensor.
Definition: Tensor.cpp:1205

cloudViewer::core::Tensor::To
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739

cloudViewer::core::nns::NearestNeighborSearch
A Class for nearest neighbor search.
Definition: NearestNeighborSearch.h:26

cloudViewer::core::nns::NearestNeighborSearch::FixedRadiusSearch
std::tuple< Tensor, Tensor, Tensor > FixedRadiusSearch(const Tensor &query_points, double radius, bool sort=true)
Definition: NearestNeighborSearch.cpp:98

cloudViewer::core::nns::NearestNeighborSearch::HybridIndex
bool HybridIndex(utility::optional< double > radius={})
Definition: NearestNeighborSearch.cpp:60

cloudViewer::core::nns::NearestNeighborSearch::FixedRadiusIndex
bool FixedRadiusIndex(utility::optional< double > radius={})
Definition: NearestNeighborSearch.cpp:40

cloudViewer::core::nns::NearestNeighborSearch::HybridSearch
std::tuple< Tensor, Tensor, Tensor > HybridSearch(const Tensor &query_points, const double radius, const int max_knn) const
Definition: NearestNeighborSearch.cpp:130

cloudViewer::core::nns::NearestNeighborSearch::KnnIndex
bool KnnIndex()
Definition: NearestNeighborSearch.cpp:23

cloudViewer::core::nns::NearestNeighborSearch::KnnSearch
std::pair< Tensor, Tensor > KnnSearch(const Tensor &query_points, int knn)
Definition: NearestNeighborSearch.cpp:79

cloudViewer::t::geometry::kernel::TArrayIndexer
Definition: GeometryIndexer.h:162

cloudViewer::t::geometry::kernel::TArrayIndexer::GetShape
CLOUDVIEWER_HOST_DEVICE index_t GetShape(int i) const
Definition: GeometryIndexer.h:314

cloudViewer::t::geometry::kernel::TransformIndexer
Helper class for converting coordinates/indices between 3D/3D, 3D/2D, 2D/3D.
Definition: GeometryIndexer.h:26

cloudViewer::utility::optional
Definition: Optional.h:244

colors
double colors[3]
Definition: TriangleMeshIO.cpp:53

normals
double normals[3]
Definition: TriangleMeshIO.cpp:54

LogError
#define LogError(...)
Definition: Logging.h:60

min
int min(int a, int b)
Definition: cutil_math.h:53

abs
__host__ __device__ int2 abs(int2 v)
Definition: cutil_math.h:1267

max
int max(int a, int b)
Definition: cutil_math.h:48

Logging.h

cloudViewer::core::cuda::Synchronize
void Synchronize()
Definition: CUDAUtils.cpp:59

cloudViewer::core::linalg::kernel::matmul3x3_3x1
static CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void matmul3x3_3x1(const scalar_t &m00, const scalar_t &m01, const scalar_t &m02, const scalar_t &m10, const scalar_t &m11, const scalar_t &m12, const scalar_t &m20, const scalar_t &m21, const scalar_t &m22, const scalar_t &v0, const scalar_t &v1, const scalar_t &v2, scalar_t &o0, scalar_t &o1, scalar_t &o2)
Definition: Matrix.h:18

cloudViewer::core::linalg::kernel::cross_3x1
CLOUDVIEWER_HOST_DEVICE CLOUDVIEWER_FORCE_INLINE void cross_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input, scalar_t *C_3x1_output)
Definition: Matrix.h:62

cloudViewer::core::linalg::kernel::solve_svd3x3
CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void solve_svd3x3(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *X_3x1)
Definition: SVD3x3.h:2167

cloudViewer::core::linalg::kernel::dot_3x1
CLOUDVIEWER_HOST_DEVICE CLOUDVIEWER_FORCE_INLINE scalar_t dot_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input)
Definition: Matrix.h:88

cloudViewer::core::ParallelFor
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:111

cloudViewer::core::Int32
const Dtype Int32
Definition: Dtype.cpp:46

cloudViewer::core::Float32
const Dtype Float32
Definition: Dtype.cpp:42

cloudViewer::t::geometry::kernel::pointcloud::Swap
CLOUDVIEWER_HOST_DEVICE void Swap(scalar_t *x, scalar_t *y)
Definition: PointCloudImpl.h:366

cloudViewer::t::geometry::kernel::pointcloud::EstimatePointWiseColorGradientKernel
CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseColorGradientKernel(const scalar_t *points_ptr, const scalar_t *normals_ptr, const scalar_t *colors_ptr, const int32_t &idx_offset, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *color_gradients_ptr)
Definition: PointCloudImpl.h:1036

cloudViewer::t::geometry::kernel::pointcloud::Heapify
CLOUDVIEWER_HOST_DEVICE void Heapify(scalar_t *arr, int n, int root)
Definition: PointCloudImpl.h:373

cloudViewer::t::geometry::kernel::pointcloud::ComputeBoundaryPointsCPU
void ComputeBoundaryPointsCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &indices, const core::Tensor &counts, core::Tensor &mask, double angle_threshold)
Definition: PointCloudImpl.h:424

cloudViewer::t::geometry::kernel::pointcloud::OrientNormalsTowardsCameraLocationCPU
void OrientNormalsTowardsCameraLocationCPU(const core::Tensor &points, core::Tensor &normals, const core::Tensor &camera)
Definition: PointCloudImpl.h:289

cloudViewer::t::geometry::kernel::pointcloud::EstimateNormalsFromCovariancesCPU
void EstimateNormalsFromCovariancesCPU(const core::Tensor &covariances, core::Tensor &normals, const bool has_normals)
Definition: PointCloudImpl.h:986

cloudViewer::t::geometry::kernel::pointcloud::GetPointMaskWithinOBBCPU
void GetPointMaskWithinOBBCPU(const core::Tensor &points, const core::Tensor &center, const core::Tensor &rotation, const core::Tensor &extent, core::Tensor &mask)
Definition: PointCloudImpl.h:180

cloudViewer::t::geometry::kernel::pointcloud::GetCoordinateSystemOnPlane
CLOUDVIEWER_HOST_DEVICE void GetCoordinateSystemOnPlane(const scalar_t *query, scalar_t *u, scalar_t *v)
Definition: PointCloudImpl.h:339

cloudViewer::t::geometry::kernel::pointcloud::EstimateColorGradientsUsingRadiusSearchCPU
void EstimateColorGradientsUsingRadiusSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius)
Definition: PointCloudImpl.h:1249

cloudViewer::t::geometry::kernel::pointcloud::HeapSort
CLOUDVIEWER_HOST_DEVICE void HeapSort(scalar_t *arr, int n)
Definition: PointCloudImpl.h:391

cloudViewer::t::geometry::kernel::pointcloud::EstimatePointWiseNormalsWithFastEigen3x3
CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseNormalsWithFastEigen3x3(const scalar_t *covariance_ptr, scalar_t *normals_ptr)
Definition: PointCloudImpl.h:846

cloudViewer::t::geometry::kernel::pointcloud::EstimateColorGradientsUsingKNNSearchCPU
void EstimateColorGradientsUsingKNNSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const int64_t &max_nn)
Definition: PointCloudImpl.h:1193

cloudViewer::t::geometry::kernel::pointcloud::NormalizeNormalsCPU
void NormalizeNormalsCPU(core::Tensor &normals)
Definition: PointCloudImpl.h:224

cloudViewer::t::geometry::kernel::pointcloud::OrientNormalsToAlignWithDirectionCPU
void OrientNormalsToAlignWithDirectionCPU(core::Tensor &normals, const core::Tensor &direction)
Definition: PointCloudImpl.h:255

cloudViewer::t::geometry::kernel::pointcloud::IsBoundaryPoints
CLOUDVIEWER_HOST_DEVICE bool IsBoundaryPoints(const scalar_t *angles, int counts, double angle_threshold)
Definition: PointCloudImpl.h:401

cloudViewer::t::geometry::kernel::pointcloud::UnprojectCPU
void UnprojectCPU(const core::Tensor &depth, utility::optional< std::reference_wrapper< const core::Tensor >> image_colors, core::Tensor &points, utility::optional< std::reference_wrapper< core::Tensor >> colors, const core::Tensor &intrinsics, const core::Tensor &extrinsics, float depth_scale, float depth_max, int64_t stride)
Definition: PointCloudImpl.h:46

cloudViewer::t::geometry::kernel::pointcloud::EstimateCovariancesUsingHybridSearchCPU
void EstimateCovariancesUsingHybridSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius, const int64_t &max_nn)
Definition: PointCloudImpl.h:566

cloudViewer::t::geometry::kernel::pointcloud::EstimateCovariancesUsingRadiusSearchCPU
void EstimateCovariancesUsingRadiusSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius)
Definition: PointCloudImpl.h:617

cloudViewer::t::geometry::kernel::pointcloud::EstimatePointWiseRobustNormalizedCovarianceKernel
CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseRobustNormalizedCovarianceKernel(const scalar_t *points_ptr, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *covariance_ptr)
Definition: PointCloudImpl.h:486

cloudViewer::t::geometry::kernel::pointcloud::EstimateCovariancesUsingKNNSearchCPU
void EstimateCovariancesUsingKNNSearchCPU(const core::Tensor &points, core::Tensor &covariances, const int64_t &max_nn)
Definition: PointCloudImpl.h:667

cloudViewer::t::geometry::kernel::pointcloud::EstimateColorGradientsUsingHybridSearchCPU
void EstimateColorGradientsUsingHybridSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius, const int64_t &max_nn)
Definition: PointCloudImpl.h:1140

cloudViewer::t::geometry::kernel::pointcloud::GetPointMaskWithinAABBCPU
void GetPointMaskWithinAABBCPU(const core::Tensor &points, const core::Tensor &min_bound, const core::Tensor &max_bound, core::Tensor &mask)
Definition: PointCloudImpl.h:145

cloudViewer::t::geometry::kernel::pointcloud::ComputeEigenvector0
CLOUDVIEWER_HOST_DEVICE void ComputeEigenvector0(const scalar_t *A, const scalar_t eval0, scalar_t *eigen_vector0)
Definition: PointCloudImpl.h:717

cloudViewer::t::geometry::kernel::pointcloud::ComputeEigenvector1
CLOUDVIEWER_HOST_DEVICE void ComputeEigenvector1(const scalar_t *A, const scalar_t *evec0, const scalar_t eval1, scalar_t *eigen_vector1)
Definition: PointCloudImpl.h:766

cloudViewer::t::geometry::kernel::NDArrayIndexer
TArrayIndexer< int64_t > NDArrayIndexer
Definition: GeometryIndexer.h:363

cloudViewer::t::geometry::InverseTransformation
core::Tensor InverseTransformation(const core::Tensor &T)
TODO(wei): find a proper place for such functionalities.
Definition: Utility.h:77

cloudViewer
Generic file read and write utility for python interface.
Definition: AutoSegmentationTools.h:16

point
Definition: lsd.c:149

PointCloud.h