ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
PointCloudImpl.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <Logging.h>
9 
10 #include <atomic>
11 #include <vector>
12 
15 #include "cloudViewer/core/Dtype.h"
27 
28 namespace cloudViewer {
29 namespace t {
30 namespace geometry {
31 namespace kernel {
32 namespace pointcloud {
33 
34 #ifndef __CUDACC__
35 using std::abs;
36 using std::max;
37 using std::min;
38 using std::sqrt;
39 #endif
40 
41 #if defined(__CUDACC__)
42 void UnprojectCUDA
43 #else
45 #endif
46  (const core::Tensor& depth,
48  image_colors,
51  const core::Tensor& intrinsics,
52  const core::Tensor& extrinsics,
53  float depth_scale,
54  float depth_max,
55  int64_t stride) {
56 
57  const bool have_colors = image_colors.has_value();
58  NDArrayIndexer depth_indexer(depth, 2);
59  NDArrayIndexer image_colors_indexer;
60 
62  TransformIndexer ti(intrinsics, pose, 1.0f);
63 
64  // Output
65  int64_t rows_strided = depth_indexer.GetShape(0) / stride;
66  int64_t cols_strided = depth_indexer.GetShape(1) / stride;
67 
68  points = core::Tensor({rows_strided * cols_strided, 3}, core::Float32,
69  depth.GetDevice());
70  NDArrayIndexer point_indexer(points, 1);
71  NDArrayIndexer colors_indexer;
72  if (have_colors) {
73  const auto& imcol = image_colors.value().get();
74  image_colors_indexer = NDArrayIndexer{imcol, 2};
75  colors.value().get() = core::Tensor({rows_strided * cols_strided, 3},
76  core::Float32, imcol.GetDevice());
77  colors_indexer = NDArrayIndexer(colors.value().get(), 1);
78  }
79 
80  // Counter
81 #if defined(__CUDACC__)
82  core::Tensor count(std::vector<int>{0}, {}, core::Int32, depth.GetDevice());
83  int* count_ptr = count.GetDataPtr<int>();
84 #else
85  std::atomic<int> count_atomic(0);
86  std::atomic<int>* count_ptr = &count_atomic;
87 #endif
88 
89  int64_t n = rows_strided * cols_strided;
90 
91  DISPATCH_DTYPE_TO_TEMPLATE(depth.GetDtype(), [&]() {
92  core::ParallelFor(
93  depth.GetDevice(), n,
94  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
95  int64_t y = (workload_idx / cols_strided) * stride;
96  int64_t x = (workload_idx % cols_strided) * stride;
97 
98  float d = *depth_indexer.GetDataPtr<scalar_t>(x, y) /
99  depth_scale;
100  if (d > 0 && d < depth_max) {
101  int idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
102 
103  float x_c = 0, y_c = 0, z_c = 0;
104  ti.Unproject(static_cast<float>(x),
105  static_cast<float>(y), d, &x_c, &y_c,
106  &z_c);
107 
108  float* vertex = point_indexer.GetDataPtr<float>(idx);
109  ti.RigidTransform(x_c, y_c, z_c, vertex + 0, vertex + 1,
110  vertex + 2);
111  if (have_colors) {
112  float* pcd_pixel =
113  colors_indexer.GetDataPtr<float>(idx);
114  float* image_pixel =
115  image_colors_indexer.GetDataPtr<float>(x,
116  y);
117  *pcd_pixel = *image_pixel;
118  *(pcd_pixel + 1) = *(image_pixel + 1);
119  *(pcd_pixel + 2) = *(image_pixel + 2);
120  }
121  }
122  });
123  });
124 #if defined(__CUDACC__)
125  int total_pts_count = count.Item<int>();
126 #else
127  int total_pts_count = (*count_ptr).load();
128 #endif
129 
130 #ifdef __CUDACC__
132 #endif
133  points = points.Slice(0, 0, total_pts_count);
134  if (have_colors) {
135  colors.value().get() =
136  colors.value().get().Slice(0, 0, total_pts_count);
137  }
138 }
139 
140 #if defined(__CUDACC__)
141 void GetPointMaskWithinAABBCUDA
142 #else
144 #endif
145  (const core::Tensor& points,
146  const core::Tensor& min_bound,
147  const core::Tensor& max_bound,
148  core::Tensor& mask) {
149 
150  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
151  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
152  const int64_t n = points.GetLength();
153  const scalar_t* min_bound_ptr = min_bound.GetDataPtr<scalar_t>();
154  const scalar_t* max_bound_ptr = max_bound.GetDataPtr<scalar_t>();
155  bool* mask_ptr = mask.GetDataPtr<bool>();
156 
157  core::ParallelFor(
158  points.GetDevice(), n,
159  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
160  const scalar_t x = points_ptr[3 * workload_idx + 0];
161  const scalar_t y = points_ptr[3 * workload_idx + 1];
162  const scalar_t z = points_ptr[3 * workload_idx + 2];
163 
164  if (x >= min_bound_ptr[0] && x <= max_bound_ptr[0] &&
165  y >= min_bound_ptr[1] && y <= max_bound_ptr[1] &&
166  z >= min_bound_ptr[2] && z <= max_bound_ptr[2]) {
167  mask_ptr[workload_idx] = true;
168  } else {
169  mask_ptr[workload_idx] = false;
170  }
171  });
172  });
173 }
174 
175 #if defined(__CUDACC__)
176 void GetPointMaskWithinOBBCUDA
177 #else
179 #endif
180  (const core::Tensor& points,
181  const core::Tensor& center,
182  const core::Tensor& rotation,
183  const core::Tensor& extent,
184  core::Tensor& mask) {
185  const core::Tensor half_extent = extent.Div(2);
186  // Since we will extract 3 rotation axis from matrix and use it inside
187  // kernel, the transpose is needed.
188  const core::Tensor rotation_t = rotation.Transpose(0, 1).Contiguous();
189  const core::Tensor pd = points - center;
190  const int64_t n = points.GetLength();
191 
192  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
193  const scalar_t* pd_ptr = pd.GetDataPtr<scalar_t>();
194  // const scalar_t* center_ptr = center.GetDataPtr<scalar_t>();
195  const scalar_t* rotation_ptr = rotation_t.GetDataPtr<scalar_t>();
196  const scalar_t* half_extent_ptr = half_extent.GetDataPtr<scalar_t>();
197  bool* mask_ptr = mask.GetDataPtr<bool>();
198 
199  core::ParallelFor(points.GetDevice(), n,
200  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
201  int64_t idx = 3 * workload_idx;
202  if (abs(core::linalg::kernel::dot_3x1(
203  pd_ptr + idx, rotation_ptr)) <=
204  half_extent_ptr[0] &&
205  abs(core::linalg::kernel::dot_3x1(
206  pd_ptr + idx, rotation_ptr + 3)) <=
207  half_extent_ptr[1] &&
208  abs(core::linalg::kernel::dot_3x1(
209  pd_ptr + idx, rotation_ptr + 6)) <=
210  half_extent_ptr[2]) {
211  mask_ptr[workload_idx] = true;
212  } else {
213  mask_ptr[workload_idx] = false;
214  }
215  });
216  });
217 }
218 
219 #if defined(__CUDACC__)
220 void NormalizeNormalsCUDA
221 #else
223 #endif
224  (core::Tensor& normals) {
225  const core::Dtype dtype = normals.GetDtype();
226  const int64_t n = normals.GetLength();
227 
228  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
229  scalar_t* ptr = normals.GetDataPtr<scalar_t>();
230 
231  core::ParallelFor(normals.GetDevice(), n,
232  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
233  int64_t idx = 3 * workload_idx;
234  scalar_t x = ptr[idx];
235  scalar_t y = ptr[idx + 1];
236  scalar_t z = ptr[idx + 2];
237  scalar_t norm = sqrt(x * x + y * y + z * z);
238  if (norm > 0) {
239  x /= norm;
240  y /= norm;
241  z /= norm;
242  }
243  ptr[idx] = x;
244  ptr[idx + 1] = y;
245  ptr[idx + 2] = z;
246  });
247  });
248 }
249 
250 #if defined(__CUDACC__)
251 void OrientNormalsToAlignWithDirectionCUDA
252 #else
254 #endif
255  (core::Tensor& normals, const core::Tensor& direction) {
256  const core::Dtype dtype = normals.GetDtype();
257  const int64_t n = normals.GetLength();
258 
259  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
260  scalar_t* ptr = normals.GetDataPtr<scalar_t>();
261  const scalar_t* direction_ptr = direction.GetDataPtr<scalar_t>();
262 
263  core::ParallelFor(normals.GetDevice(), n,
264  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
265  int64_t idx = 3 * workload_idx;
266  scalar_t* normal = ptr + idx;
267  const scalar_t norm = sqrt(normal[0] * normal[0] +
268  normal[1] * normal[1] +
269  normal[2] * normal[2]);
270  if (norm == 0.0) {
271  normal[0] = direction_ptr[0];
272  normal[1] = direction_ptr[1];
273  normal[2] = direction_ptr[2];
275  normal, direction_ptr) < 0) {
276  normal[0] *= -1;
277  normal[1] *= -1;
278  normal[2] *= -1;
279  }
280  });
281  });
282 }
283 
284 #if defined(__CUDACC__)
285 void OrientNormalsTowardsCameraLocationCUDA
286 #else
288 #endif
289  (const core::Tensor& points,
291  const core::Tensor& camera) {
292  const core::Dtype dtype = points.GetDtype();
293  const int64_t n = normals.GetLength();
294 
295  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
296  scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
297  const scalar_t* camera_ptr = camera.GetDataPtr<scalar_t>();
298  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
299 
301  normals.GetDevice(), n,
302  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
303  int64_t idx = 3 * workload_idx;
304  scalar_t* normal = normals_ptr + idx;
305  const scalar_t* point = points_ptr + idx;
306  const scalar_t reference[3] = {camera_ptr[0] - point[0],
307  camera_ptr[1] - point[1],
308  camera_ptr[2] - point[2]};
309  const scalar_t norm =
310  sqrt(normal[0] * normal[0] + normal[1] * normal[1] +
311  normal[2] * normal[2]);
312  if (norm == 0.0) {
313  normal[0] = reference[0];
314  normal[1] = reference[1];
315  normal[2] = reference[2];
316  const scalar_t norm_new = sqrt(normal[0] * normal[0] +
317  normal[1] * normal[1] +
318  normal[2] * normal[2]);
319  if (norm_new == 0.0) {
320  normal[0] = 0.0;
321  normal[1] = 0.0;
322  normal[2] = 1.0;
323  } else {
324  normal[0] /= norm_new;
325  normal[1] /= norm_new;
326  normal[2] /= norm_new;
327  }
329  reference) < 0) {
330  normal[0] *= -1;
331  normal[1] *= -1;
332  normal[2] *= -1;
333  }
334  });
335  });
336 }
337 
338 template <typename scalar_t>
340  scalar_t* u,
341  scalar_t* v) {
342  // Unless the x and y coords are both close to zero, we can simply take
343  // ( -y, x, 0 ) and normalize it. If both x and y are close to zero,
344  // then the vector is close to the z-axis, so it's far from colinear to
345  // the x-axis for instance. So we take the crossed product with (1,0,0)
346  // and normalize it.
347  if (!(abs(query[0] - query[2]) < 1e-6) ||
348  !(abs(query[1] - query[2]) < 1e-6)) {
349  const scalar_t norm2_inv =
350  1.0 / sqrt(query[0] * query[0] + query[1] * query[1]);
351  v[0] = -1 * query[1] * norm2_inv;
352  v[1] = query[0] * norm2_inv;
353  v[2] = 0;
354  } else {
355  const scalar_t norm2_inv =
356  1.0 / sqrt(query[1] * query[1] + query[2] * query[2]);
357  v[0] = 0;
358  v[1] = -1 * query[2] * norm2_inv;
359  v[2] = query[1] * norm2_inv;
360  }
361 
362  core::linalg::kernel::cross_3x1(query, v, u);
363 }
364 
365 template <typename scalar_t>
366 inline CLOUDVIEWER_HOST_DEVICE void Swap(scalar_t* x, scalar_t* y) {
367  scalar_t tmp = *x;
368  *x = *y;
369  *y = tmp;
370 }
371 
372 template <typename scalar_t>
373 inline CLOUDVIEWER_HOST_DEVICE void Heapify(scalar_t* arr, int n, int root) {
374  int largest = root;
375  int l = 2 * root + 1;
376  int r = 2 * root + 2;
377 
378  if (l < n && arr[l] > arr[largest]) {
379  largest = l;
380  }
381  if (r < n && arr[r] > arr[largest]) {
382  largest = r;
383  }
384  if (largest != root) {
385  Swap<scalar_t>(&arr[root], &arr[largest]);
386  Heapify<scalar_t>(arr, n, largest);
387  }
388 }
389 
390 template <typename scalar_t>
391 CLOUDVIEWER_HOST_DEVICE void HeapSort(scalar_t* arr, int n) {
392  for (int i = n / 2 - 1; i >= 0; i--) Heapify(arr, n, i);
393 
394  for (int i = n - 1; i > 0; i--) {
395  Swap<scalar_t>(&arr[0], &arr[i]);
396  Heapify<scalar_t>(arr, i, 0);
397  }
398 }
399 
400 template <typename scalar_t>
401 CLOUDVIEWER_HOST_DEVICE bool IsBoundaryPoints(const scalar_t* angles,
402  int counts,
403  double angle_threshold) {
404  scalar_t diff;
405  scalar_t max_diff = 0;
406  // Compute the maximal angle difference between two consecutive angles.
407  for (int i = 0; i < counts - 1; i++) {
408  diff = angles[i + 1] - angles[i];
409  max_diff = max(max_diff, diff);
410  }
411 
412  // Get the angle difference between the last and the first.
413  diff = 2 * M_PI - angles[counts - 1] + angles[0];
414  max_diff = max(max_diff, diff);
415 
416  return max_diff > angle_threshold * M_PI / 180.0 ? true : false;
417 }
418 
419 #if defined(__CUDACC__)
420 void ComputeBoundaryPointsCUDA
421 #else
423 #endif
424  (const core::Tensor& points,
425  const core::Tensor& normals,
426  const core::Tensor& indices,
427  const core::Tensor& counts,
428  core::Tensor& mask,
429  double angle_threshold) {
430  const int nn_size = indices.GetShape()[1];
431 
432  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
433  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
434  const scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
435  const int64_t n = points.GetLength();
436  const int32_t* indices_ptr = indices.GetDataPtr<int32_t>();
437  const int32_t* counts_ptr = counts.GetDataPtr<int32_t>();
438  bool* mask_ptr = mask.GetDataPtr<bool>();
439 
440  core::Tensor angles = core::Tensor::Full(
441  indices.GetShape(), -10, points.GetDtype(), points.GetDevice());
442  scalar_t* angles_ptr = angles.GetDataPtr<scalar_t>();
443 
444  core::ParallelFor(
445  points.GetDevice(), n,
446  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
447  scalar_t u[3], v[3];
448  GetCoordinateSystemOnPlane(normals_ptr + 3 * workload_idx,
449  u, v);
450 
451  // Ignore the point itself.
452  int indices_size = counts_ptr[workload_idx] - 1;
453  if (indices_size > 0) {
454  const scalar_t* query = points_ptr + 3 * workload_idx;
455  for (int i = 1; i < indices_size + 1; i++) {
456  const int idx = workload_idx * nn_size + i;
457 
458  const scalar_t* point_ref =
459  points_ptr + 3 * indices_ptr[idx];
460  const scalar_t delta[3] = {point_ref[0] - query[0],
461  point_ref[1] - query[1],
462  point_ref[2] - query[2]};
463  const scalar_t angle = atan2(
464  core::linalg::kernel::dot_3x1(v, delta),
465  core::linalg::kernel::dot_3x1(u, delta));
466 
467  angles_ptr[idx] = angle;
468  }
469 
470  // Sort the angles in ascending order.
471  HeapSort<scalar_t>(
472  angles_ptr + workload_idx * nn_size + 1,
473  indices_size);
474 
475  mask_ptr[workload_idx] = IsBoundaryPoints<scalar_t>(
476  angles_ptr + workload_idx * nn_size + 1,
477  indices_size, angle_threshold);
478  }
479  });
480  });
481 }
482 
483 // This is a `two-pass` estimate method for covariance which is numerically more
484 // robust than the `textbook` method generally used for covariance computation.
485 template <typename scalar_t>
487  const scalar_t* points_ptr,
488  const int32_t* indices_ptr,
489  const int32_t& indices_count,
490  scalar_t* covariance_ptr) {
491  if (indices_count < 3) {
492  covariance_ptr[0] = 1.0;
493  covariance_ptr[1] = 0.0;
494  covariance_ptr[2] = 0.0;
495  covariance_ptr[3] = 0.0;
496  covariance_ptr[4] = 1.0;
497  covariance_ptr[5] = 0.0;
498  covariance_ptr[6] = 0.0;
499  covariance_ptr[7] = 0.0;
500  covariance_ptr[8] = 1.0;
501  return;
502  }
503 
504  double centroid[3] = {0};
505  for (int32_t i = 0; i < indices_count; ++i) {
506  int32_t idx = 3 * indices_ptr[i];
507  centroid[0] += points_ptr[idx];
508  centroid[1] += points_ptr[idx + 1];
509  centroid[2] += points_ptr[idx + 2];
510  }
511 
512  centroid[0] /= indices_count;
513  centroid[1] /= indices_count;
514  centroid[2] /= indices_count;
515 
516  // cumulants must always be Float64 to ensure precision.
517  double cumulants[6] = {0};
518  for (int32_t i = 0; i < indices_count; ++i) {
519  int32_t idx = 3 * indices_ptr[i];
520  const double x = static_cast<double>(points_ptr[idx]) - centroid[0];
521  const double y = static_cast<double>(points_ptr[idx + 1]) - centroid[1];
522  const double z = static_cast<double>(points_ptr[idx + 2]) - centroid[2];
523 
524  cumulants[0] += x * x;
525  cumulants[1] += y * y;
526  cumulants[2] += z * z;
527 
528  cumulants[3] += x * y;
529  cumulants[4] += x * z;
530  cumulants[5] += y * z;
531  }
532 
533  // Using Bessel's correction (dividing by (n - 1) instead of n).
534  // Refer:
535  // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
536  const double normalization_factor = static_cast<double>(indices_count - 1);
537  for (int i = 0; i < 6; ++i) {
538  cumulants[i] /= normalization_factor;
539  }
540 
541  // Covariances(0, 0)
542  covariance_ptr[0] = static_cast<scalar_t>(cumulants[0]);
543  // Covariances(1, 1)
544  covariance_ptr[4] = static_cast<scalar_t>(cumulants[1]);
545  // Covariances(2, 2)
546  covariance_ptr[8] = static_cast<scalar_t>(cumulants[2]);
547 
548  // Covariances(0, 1) = Covariances(1, 0)
549  covariance_ptr[1] = static_cast<scalar_t>(cumulants[3]);
550  covariance_ptr[3] = covariance_ptr[1];
551 
552  // Covariances(0, 2) = Covariances(2, 0)
553  covariance_ptr[2] = static_cast<scalar_t>(cumulants[4]);
554  covariance_ptr[6] = covariance_ptr[2];
555 
556  // Covariances(1, 2) = Covariances(2, 1)
557  covariance_ptr[5] = static_cast<scalar_t>(cumulants[5]);
558  covariance_ptr[7] = covariance_ptr[5];
559 }
560 
561 #if defined(__CUDACC__)
562 void EstimateCovariancesUsingHybridSearchCUDA
563 #else
565 #endif
566  (const core::Tensor& points,
567  core::Tensor& covariances,
568  const double& radius,
569  const int64_t& max_nn) {
570  core::Dtype dtype = points.GetDtype();
571  int64_t n = points.GetLength();
572 
574  bool check = tree.HybridIndex(radius);
575  if (!check) {
576  utility::LogError("Building FixedRadiusIndex failed.");
577  }
578 
579  core::Tensor indices, distance, counts;
580  std::tie(indices, distance, counts) =
581  tree.HybridSearch(points, radius, max_nn);
582 
583  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
584  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
585  int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
586  int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
587  scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
588 
590  points.GetDevice(), n,
591  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
592  // NNS [Hybrid Search].
593  const int32_t neighbour_offset = max_nn * workload_idx;
594  // Count of valid correspondences per point.
595  const int32_t neighbour_count =
596  neighbour_counts_ptr[workload_idx];
597  // Covariance is of shape {3, 3}, so it has an
598  // offset factor of 9 x workload_idx.
599  const int32_t covariances_offset = 9 * workload_idx;
600 
602  points_ptr,
603  neighbour_indices_ptr + neighbour_offset,
604  neighbour_count,
605  covariances_ptr + covariances_offset);
606  });
607  });
608 
609  core::cuda::Synchronize(points.GetDevice());
610 }
611 
612 #if defined(__CUDACC__)
613 void EstimateCovariancesUsingRadiusSearchCUDA
614 #else
616 #endif
617  (const core::Tensor& points,
618  core::Tensor& covariances,
619  const double& radius) {
620  core::Dtype dtype = points.GetDtype();
621  int64_t n = points.GetLength();
622 
624  bool check = tree.FixedRadiusIndex(radius);
625  if (!check) {
626  utility::LogError("Building Radius-Index failed.");
627  }
628 
629  core::Tensor indices, distance, counts;
630  std::tie(indices, distance, counts) =
631  tree.FixedRadiusSearch(points, radius);
632 
633  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
634  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
635  const int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
636  const int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
637  scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
638 
639  core::ParallelFor(points.GetDevice(), n,
640  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
641  const int32_t neighbour_offset =
642  neighbour_counts_ptr[workload_idx];
643  const int32_t neighbour_count =
644  (neighbour_counts_ptr[workload_idx + 1] -
645  neighbour_counts_ptr[workload_idx]);
646  // Covariance is of shape {3, 3}, so it has an
647  // offset factor of 9 x workload_idx.
648  const int32_t covariances_offset =
649  9 * workload_idx;
650 
652  points_ptr,
653  neighbour_indices_ptr + neighbour_offset,
654  neighbour_count,
655  covariances_ptr + covariances_offset);
656  });
657  });
658 
659  core::cuda::Synchronize(points.GetDevice());
660 }
661 
662 #if defined(__CUDACC__)
663 void EstimateCovariancesUsingKNNSearchCUDA
664 #else
666 #endif
667  (const core::Tensor& points,
668  core::Tensor& covariances,
669  const int64_t& max_nn) {
670  core::Dtype dtype = points.GetDtype();
671  int64_t n = points.GetLength();
672 
674  bool check = tree.KnnIndex();
675  if (!check) {
676  utility::LogError("Building KNN-Index failed.");
677  }
678 
679  core::Tensor indices, distance;
680  std::tie(indices, distance) = tree.KnnSearch(points, max_nn);
681 
682  indices = indices.Contiguous();
683  int32_t nn_count = static_cast<int32_t>(indices.GetShape()[1]);
684 
685  if (nn_count < 3) {
687  "Not enough neighbors to compute Covariances / Normals. "
688  "Try "
689  "increasing the max_nn parameter.");
690  }
691 
692  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
693  auto points_ptr = points.GetDataPtr<scalar_t>();
694  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
695  auto covariances_ptr = covariances.GetDataPtr<scalar_t>();
696 
698  points.GetDevice(), n,
699  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
700  // NNS [KNN Search].
701  const int32_t neighbour_offset = nn_count * workload_idx;
702  // Covariance is of shape {3, 3}, so it has an offset
703  // factor of 9 x workload_idx.
704  const int32_t covariances_offset = 9 * workload_idx;
705 
707  points_ptr,
708  neighbour_indices_ptr + neighbour_offset, nn_count,
709  covariances_ptr + covariances_offset);
710  });
711  });
712 
713  core::cuda::Synchronize(points.GetDevice());
714 }
715 
716 template <typename scalar_t>
718  const scalar_t eval0,
719  scalar_t* eigen_vector0) {
720  scalar_t row0[3] = {A[0] - eval0, A[1], A[2]};
721  scalar_t row1[3] = {A[1], A[4] - eval0, A[5]};
722  scalar_t row2[3] = {A[2], A[5], A[8] - eval0};
723 
724  scalar_t r0xr1[3], r0xr2[3], r1xr2[3];
725 
726  core::linalg::kernel::cross_3x1(row0, row1, r0xr1);
727  core::linalg::kernel::cross_3x1(row0, row2, r0xr2);
728  core::linalg::kernel::cross_3x1(row1, row2, r1xr2);
729 
730  scalar_t d0 = core::linalg::kernel::dot_3x1(r0xr1, r0xr1);
731  scalar_t d1 = core::linalg::kernel::dot_3x1(r0xr2, r0xr2);
732  scalar_t d2 = core::linalg::kernel::dot_3x1(r1xr2, r1xr2);
733 
734  scalar_t dmax = d0;
735  int imax = 0;
736  if (d1 > dmax) {
737  dmax = d1;
738  imax = 1;
739  }
740  if (d2 > dmax) {
741  imax = 2;
742  }
743 
744  if (imax == 0) {
745  scalar_t sqrt_d = sqrt(d0);
746  eigen_vector0[0] = r0xr1[0] / sqrt_d;
747  eigen_vector0[1] = r0xr1[1] / sqrt_d;
748  eigen_vector0[2] = r0xr1[2] / sqrt_d;
749  return;
750  } else if (imax == 1) {
751  scalar_t sqrt_d = sqrt(d1);
752  eigen_vector0[0] = r0xr2[0] / sqrt_d;
753  eigen_vector0[1] = r0xr2[1] / sqrt_d;
754  eigen_vector0[2] = r0xr2[2] / sqrt_d;
755  return;
756  } else {
757  scalar_t sqrt_d = sqrt(d2);
758  eigen_vector0[0] = r1xr2[0] / sqrt_d;
759  eigen_vector0[1] = r1xr2[1] / sqrt_d;
760  eigen_vector0[2] = r1xr2[2] / sqrt_d;
761  return;
762  }
763 }
764 
765 template <typename scalar_t>
767  const scalar_t* evec0,
768  const scalar_t eval1,
769  scalar_t* eigen_vector1) {
770  scalar_t U[3];
771  if (abs(evec0[0]) > abs(evec0[1])) {
772  scalar_t inv_length =
773  1.0 / sqrt(evec0[0] * evec0[0] + evec0[2] * evec0[2]);
774  U[0] = -evec0[2] * inv_length;
775  U[1] = 0.0;
776  U[2] = evec0[0] * inv_length;
777  } else {
778  scalar_t inv_length =
779  1.0 / sqrt(evec0[1] * evec0[1] + evec0[2] * evec0[2]);
780  U[0] = 0.0;
781  U[1] = evec0[2] * inv_length;
782  U[2] = -evec0[1] * inv_length;
783  }
784  scalar_t V[3], AU[3], AV[3];
785  core::linalg::kernel::cross_3x1(evec0, U, V);
788 
789  scalar_t m00 = core::linalg::kernel::dot_3x1(U, AU) - eval1;
790  scalar_t m01 = core::linalg::kernel::dot_3x1(U, AV);
791  scalar_t m11 = core::linalg::kernel::dot_3x1(V, AV) - eval1;
792 
793  scalar_t absM00 = abs(m00);
794  scalar_t absM01 = abs(m01);
795  scalar_t absM11 = abs(m11);
796  scalar_t max_abs_comp;
797 
798  if (absM00 >= absM11) {
799  max_abs_comp = max(absM00, absM01);
800  if (max_abs_comp > 0) {
801  if (absM00 >= absM01) {
802  m01 /= m00;
803  m00 = 1 / sqrt(1 + m01 * m01);
804  m01 *= m00;
805  } else {
806  m00 /= m01;
807  m01 = 1 / sqrt(1 + m00 * m00);
808  m00 *= m01;
809  }
810  eigen_vector1[0] = m01 * U[0] - m00 * V[0];
811  eigen_vector1[1] = m01 * U[1] - m00 * V[1];
812  eigen_vector1[2] = m01 * U[2] - m00 * V[2];
813  return;
814  } else {
815  eigen_vector1[0] = U[0];
816  eigen_vector1[1] = U[1];
817  eigen_vector1[2] = U[2];
818  return;
819  }
820  } else {
821  max_abs_comp = max(absM11, absM01);
822  if (max_abs_comp > 0) {
823  if (absM11 >= absM01) {
824  m01 /= m11;
825  m11 = 1 / sqrt(1 + m01 * m01);
826  m01 *= m11;
827  } else {
828  m11 /= m01;
829  m01 = 1 / sqrt(1 + m11 * m11);
830  m11 *= m01;
831  }
832  eigen_vector1[0] = m11 * U[0] - m01 * V[0];
833  eigen_vector1[1] = m11 * U[1] - m01 * V[1];
834  eigen_vector1[2] = m11 * U[2] - m01 * V[2];
835  return;
836  } else {
837  eigen_vector1[0] = U[0];
838  eigen_vector1[1] = U[1];
839  eigen_vector1[2] = U[2];
840  return;
841  }
842  }
843 }
844 
845 template <typename scalar_t>
847  const scalar_t* covariance_ptr, scalar_t* normals_ptr) {
848  // Based on:
849  // https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf
850  // which handles edge cases like points on a plane.
851  scalar_t max_coeff = covariance_ptr[0];
852 
853  for (int i = 1; i < 9; ++i) {
854  if (max_coeff < covariance_ptr[i]) {
855  max_coeff = covariance_ptr[i];
856  }
857  }
858 
859  if (max_coeff == 0) {
860  normals_ptr[0] = 0.0;
861  normals_ptr[1] = 0.0;
862  normals_ptr[2] = 0.0;
863  return;
864  }
865 
866  scalar_t A[9] = {0};
867 
868  for (int i = 0; i < 9; ++i) {
869  A[i] = covariance_ptr[i] / max_coeff;
870  }
871 
872  scalar_t norm = A[1] * A[1] + A[2] * A[2] + A[5] * A[5];
873 
874  if (norm > 0) {
875  scalar_t eval[3];
876  scalar_t evec0[3];
877  scalar_t evec1[3];
878  scalar_t evec2[3];
879 
880  scalar_t q = (A[0] + A[4] + A[8]) / 3.0;
881 
882  scalar_t b00 = A[0] - q;
883  scalar_t b11 = A[4] - q;
884  scalar_t b22 = A[8] - q;
885 
886  scalar_t p =
887  sqrt((b00 * b00 + b11 * b11 + b22 * b22 + norm * 2.0) / 6.0);
888 
889  scalar_t c00 = b11 * b22 - A[5] * A[5];
890  scalar_t c01 = A[1] * b22 - A[5] * A[2];
891  scalar_t c02 = A[1] * A[5] - b11 * A[2];
892  scalar_t det = (b00 * c00 - A[1] * c01 + A[2] * c02) / (p * p * p);
893 
894  scalar_t half_det = det * 0.5;
895  half_det = min(max(half_det, static_cast<scalar_t>(-1.0)),
896  static_cast<scalar_t>(1.0));
897 
898  scalar_t angle = acos(half_det) / 3.0;
899  const scalar_t two_thrids_pi = 2.09439510239319549;
900 
901  scalar_t beta2 = cos(angle) * 2.0;
902  scalar_t beta0 = cos(angle + two_thrids_pi) * 2.0;
903  scalar_t beta1 = -(beta0 + beta2);
904 
905  eval[0] = q + p * beta0;
906  eval[1] = q + p * beta1;
907  eval[2] = q + p * beta2;
908 
909  if (half_det >= 0) {
910  ComputeEigenvector0<scalar_t>(A, eval[2], evec2);
911 
912  if (eval[2] < eval[0] && eval[2] < eval[1]) {
913  normals_ptr[0] = evec2[0];
914  normals_ptr[1] = evec2[1];
915  normals_ptr[2] = evec2[2];
916 
917  return;
918  }
919 
920  ComputeEigenvector1<scalar_t>(A, evec2, eval[1], evec1);
921 
922  if (eval[1] < eval[0] && eval[1] < eval[2]) {
923  normals_ptr[0] = evec1[0];
924  normals_ptr[1] = evec1[1];
925  normals_ptr[2] = evec1[2];
926 
927  return;
928  }
929 
930  normals_ptr[0] = evec1[1] * evec2[2] - evec1[2] * evec2[1];
931  normals_ptr[1] = evec1[2] * evec2[0] - evec1[0] * evec2[2];
932  normals_ptr[2] = evec1[0] * evec2[1] - evec1[1] * evec2[0];
933 
934  return;
935  } else {
936  ComputeEigenvector0<scalar_t>(A, eval[0], evec0);
937 
938  if (eval[0] < eval[1] && eval[0] < eval[2]) {
939  normals_ptr[0] = evec0[0];
940  normals_ptr[1] = evec0[1];
941  normals_ptr[2] = evec0[2];
942  return;
943  }
944 
945  ComputeEigenvector1<scalar_t>(A, evec0, eval[1], evec1);
946 
947  if (eval[1] < eval[0] && eval[1] < eval[2]) {
948  normals_ptr[0] = evec1[0];
949  normals_ptr[1] = evec1[1];
950  normals_ptr[2] = evec1[2];
951  return;
952  }
953 
954  normals_ptr[0] = evec0[1] * evec1[2] - evec0[2] * evec1[1];
955  normals_ptr[1] = evec0[2] * evec1[0] - evec0[0] * evec1[2];
956  normals_ptr[2] = evec0[0] * evec1[1] - evec0[1] * evec1[0];
957  return;
958  }
959  } else {
960  if (covariance_ptr[0] < covariance_ptr[4] &&
961  covariance_ptr[0] < covariance_ptr[8]) {
962  normals_ptr[0] = 1.0;
963  normals_ptr[1] = 0.0;
964  normals_ptr[2] = 0.0;
965  return;
966  } else if (covariance_ptr[4] < covariance_ptr[0] &&
967  covariance_ptr[4] < covariance_ptr[8]) {
968  normals_ptr[0] = 0.0;
969  normals_ptr[1] = 1.0;
970  normals_ptr[2] = 0.0;
971  return;
972  } else {
973  normals_ptr[0] = 0.0;
974  normals_ptr[1] = 0.0;
975  normals_ptr[2] = 1.0;
976  return;
977  }
978  }
979 }
980 
981 #if defined(__CUDACC__)
982 void EstimateNormalsFromCovariancesCUDA
983 #else
985 #endif
986  (const core::Tensor& covariances,
988  const bool has_normals) {
989  core::Dtype dtype = covariances.GetDtype();
990  int64_t n = covariances.GetLength();
991 
992  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
993  const scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
994  scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
995 
997  covariances.GetDevice(), n,
998  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
999  int32_t covariances_offset = 9 * workload_idx;
1000  int32_t normals_offset = 3 * workload_idx;
1001  scalar_t normals_output[3] = {0};
1002  EstimatePointWiseNormalsWithFastEigen3x3<scalar_t>(
1003  covariances_ptr + covariances_offset,
1004  normals_output);
1005 
1006  if ((normals_output[0] * normals_output[0] +
1007  normals_output[1] * normals_output[1] +
1008  normals_output[2] * normals_output[2]) == 0.0 &&
1009  !has_normals) {
1010  normals_output[0] = 0.0;
1011  normals_output[1] = 0.0;
1012  normals_output[2] = 1.0;
1013  }
1014  if (has_normals) {
1015  if ((normals_ptr[normals_offset] * normals_output[0] +
1016  normals_ptr[normals_offset + 1] *
1017  normals_output[1] +
1018  normals_ptr[normals_offset + 2] *
1019  normals_output[2]) < 0.0) {
1020  normals_output[0] *= -1;
1021  normals_output[1] *= -1;
1022  normals_output[2] *= -1;
1023  }
1024  }
1025 
1026  normals_ptr[normals_offset] = normals_output[0];
1027  normals_ptr[normals_offset + 1] = normals_output[1];
1028  normals_ptr[normals_offset + 2] = normals_output[2];
1029  });
1030  });
1031 
1032  core::cuda::Synchronize(covariances.GetDevice());
1033 }
1034 
1035 template <typename scalar_t>
1037  const scalar_t* points_ptr,
1038  const scalar_t* normals_ptr,
1039  const scalar_t* colors_ptr,
1040  const int32_t& idx_offset,
1041  const int32_t* indices_ptr,
1042  const int32_t& indices_count,
1043  scalar_t* color_gradients_ptr) {
1044  if (indices_count < 4) {
1045  color_gradients_ptr[idx_offset] = 0;
1046  color_gradients_ptr[idx_offset + 1] = 0;
1047  color_gradients_ptr[idx_offset + 2] = 0;
1048  } else {
1049  scalar_t vt[3] = {points_ptr[idx_offset], points_ptr[idx_offset + 1],
1050  points_ptr[idx_offset + 2]};
1051 
1052  scalar_t nt[3] = {normals_ptr[idx_offset], normals_ptr[idx_offset + 1],
1053  normals_ptr[idx_offset + 2]};
1054 
1055  scalar_t it = (colors_ptr[idx_offset] + colors_ptr[idx_offset + 1] +
1056  colors_ptr[idx_offset + 2]) /
1057  3.0;
1058 
1059  scalar_t AtA[9] = {0};
1060  scalar_t Atb[3] = {0};
1061 
1062  // approximate image gradient of vt's tangential plane
1063  // projection (p') of a point p on a plane defined by
1064  // normal n, where o is the closest point to p on the
1065  // plane, is given by:
1066  // p' = p - [(p - o).dot(n)] * n p'
1067  // => p - [(p.dot(n) - s)] * n [where s = o.dot(n)]
1068 
1069  // Computing the scalar s.
1070  scalar_t s = vt[0] * nt[0] + vt[1] * nt[1] + vt[2] * nt[2];
1071 
1072  int i = 1;
1073  for (; i < indices_count; i++) {
1074  int64_t neighbour_idx_offset = 3 * indices_ptr[i];
1075 
1076  if (neighbour_idx_offset == -1) {
1077  break;
1078  }
1079 
1080  scalar_t vt_adj[3] = {points_ptr[neighbour_idx_offset],
1081  points_ptr[neighbour_idx_offset + 1],
1082  points_ptr[neighbour_idx_offset + 2]};
1083 
1084  // p' = p - d * n [where d = p.dot(n) - s]
1085  // Computing the scalar d.
1086  scalar_t d = vt_adj[0] * nt[0] + vt_adj[1] * nt[1] +
1087  vt_adj[2] * nt[2] - s;
1088 
1089  // Computing the p' (projection of the point).
1090  scalar_t vt_proj[3] = {vt_adj[0] - d * nt[0], vt_adj[1] - d * nt[1],
1091  vt_adj[2] - d * nt[2]};
1092 
1093  scalar_t it_adj = (colors_ptr[neighbour_idx_offset + 0] +
1094  colors_ptr[neighbour_idx_offset + 1] +
1095  colors_ptr[neighbour_idx_offset + 2]) /
1096  3.0;
1097 
1098  scalar_t A[3] = {vt_proj[0] - vt[0], vt_proj[1] - vt[1],
1099  vt_proj[2] - vt[2]};
1100 
1101  AtA[0] += A[0] * A[0];
1102  AtA[1] += A[1] * A[0];
1103  AtA[2] += A[2] * A[0];
1104  AtA[4] += A[1] * A[1];
1105  AtA[5] += A[2] * A[1];
1106  AtA[8] += A[2] * A[2];
1107 
1108  scalar_t b = it_adj - it;
1109 
1110  Atb[0] += A[0] * b;
1111  Atb[1] += A[1] * b;
1112  Atb[2] += A[2] * b;
1113  }
1114 
1115  // Orthogonal constraint.
1116  scalar_t A[3] = {(i - 1) * nt[0], (i - 1) * nt[1], (i - 1) * nt[2]};
1117 
1118  AtA[0] += A[0] * A[0];
1119  AtA[1] += A[0] * A[1];
1120  AtA[2] += A[0] * A[2];
1121  AtA[4] += A[1] * A[1];
1122  AtA[5] += A[1] * A[2];
1123  AtA[8] += A[2] * A[2];
1124 
1125  // Symmetry.
1126  AtA[3] = AtA[1];
1127  AtA[6] = AtA[2];
1128  AtA[7] = AtA[5];
1129 
1131  color_gradients_ptr + idx_offset);
1132  }
1133 }
1134 
1135 #if defined(__CUDACC__)
1136 void EstimateColorGradientsUsingHybridSearchCUDA
1137 #else
1139 #endif
1140  (const core::Tensor& points,
1141  const core::Tensor& normals,
1142  const core::Tensor& colors,
1143  core::Tensor& color_gradients,
1144  const double& radius,
1145  const int64_t& max_nn) {
1146  core::Dtype dtype = points.GetDtype();
1147  int64_t n = points.GetLength();
1148 
1150 
1151  bool check = tree.HybridIndex(radius);
1152  if (!check) {
1153  utility::LogError("NearestNeighborSearch::HybridIndex is not set.");
1154  }
1155 
1156  core::Tensor indices, distance, counts;
1157  std::tie(indices, distance, counts) =
1158  tree.HybridSearch(points, radius, max_nn);
1159 
1160  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1161  auto points_ptr = points.GetDataPtr<scalar_t>();
1162  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1163  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1164  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1165  auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
1166  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1167 
1168  core::ParallelFor(points.GetDevice(), n,
1169  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
1170  // NNS [Hybrid Search].
1171  int32_t neighbour_offset = max_nn * workload_idx;
1172  // Count of valid correspondences per point.
1173  int32_t neighbour_count =
1174  neighbour_counts_ptr[workload_idx];
1175  int32_t idx_offset = 3 * workload_idx;
1176 
1178  points_ptr, normals_ptr, colors_ptr,
1179  idx_offset,
1180  neighbour_indices_ptr + neighbour_offset,
1181  neighbour_count, color_gradients_ptr);
1182  });
1183  });
1184 
1185  core::cuda::Synchronize(points.GetDevice());
1186 }
1187 
1188 #if defined(__CUDACC__)
1189 void EstimateColorGradientsUsingKNNSearchCUDA
1190 #else
1192 #endif
1193  (const core::Tensor& points,
1194  const core::Tensor& normals,
1195  const core::Tensor& colors,
1196  core::Tensor& color_gradients,
1197  const int64_t& max_nn) {
1198  core::Dtype dtype = points.GetDtype();
1199  int64_t n = points.GetLength();
1200 
1202 
1203  bool check = tree.KnnIndex();
1204  if (!check) {
1205  utility::LogError("KnnIndex is not set.");
1206  }
1207 
1208  core::Tensor indices, distance;
1209  std::tie(indices, distance) = tree.KnnSearch(points, max_nn);
1210 
1211  indices = indices.To(core::Int32).Contiguous();
1212  int64_t nn_count = indices.GetShape()[1];
1213 
1214  if (nn_count < 4) {
1216  "Not enough neighbors to compute Covariances / Normals. "
1217  "Try "
1218  "changing the search parameter.");
1219  }
1220 
1221  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1222  auto points_ptr = points.GetDataPtr<scalar_t>();
1223  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1224  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1225  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1226  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1227 
1228  core::ParallelFor(points.GetDevice(), n,
1229  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
1230  int32_t neighbour_offset = max_nn * workload_idx;
1231  int32_t idx_offset = 3 * workload_idx;
1232 
1234  points_ptr, normals_ptr, colors_ptr,
1235  idx_offset,
1236  neighbour_indices_ptr + neighbour_offset,
1237  nn_count, color_gradients_ptr);
1238  });
1239  });
1240 
1241  core::cuda::Synchronize(points.GetDevice());
1242 }
1243 
1244 #if defined(__CUDACC__)
1245 void EstimateColorGradientsUsingRadiusSearchCUDA
1246 #else
1248 #endif
1249  (const core::Tensor& points,
1250  const core::Tensor& normals,
1251  const core::Tensor& colors,
1252  core::Tensor& color_gradients,
1253  const double& radius) {
1254  core::Dtype dtype = points.GetDtype();
1255  int64_t n = points.GetLength();
1256 
1258 
1259  bool check = tree.FixedRadiusIndex(radius);
1260  if (!check) {
1261  utility::LogError("RadiusIndex is not set.");
1262  }
1263 
1264  core::Tensor indices, distance, counts;
1265  std::tie(indices, distance, counts) =
1266  tree.FixedRadiusSearch(points, radius);
1267 
1268  indices = indices.To(core::Int32).Contiguous();
1269  counts = counts.Contiguous();
1270 
1271  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1272  auto points_ptr = points.GetDataPtr<scalar_t>();
1273  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1274  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1275  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1276  auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
1277  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1278 
1279  core::ParallelFor(points.GetDevice(), n,
1280  [=] CLOUDVIEWER_DEVICE(int64_t workload_idx) {
1281  int32_t neighbour_offset =
1282  neighbour_counts_ptr[workload_idx];
1283  // Count of valid correspondences per point.
1284  const int32_t neighbour_count =
1285  (neighbour_counts_ptr[workload_idx + 1] -
1286  neighbour_counts_ptr[workload_idx]);
1287  int32_t idx_offset = 3 * workload_idx;
1288 
1290  points_ptr, normals_ptr, colors_ptr,
1291  idx_offset,
1292  neighbour_indices_ptr + neighbour_offset,
1293  neighbour_count, color_gradients_ptr);
1294  });
1295  });
1296 
1297  core::cuda::Synchronize(points.GetDevice());
1298 }
1299 
1300 } // namespace pointcloud
1301 } // namespace kernel
1302 } // namespace geometry
1303 } // namespace t
1304 } // namespace cloudViewer
Common CUDA utilities.
#define CLOUDVIEWER_HOST_DEVICE
Definition: CUDAUtils.h:44
#define CLOUDVIEWER_DEVICE
Definition: CUDAUtils.h:45
constexpr double M_PI
Pi.
Definition: CVConst.h:19
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:31
#define DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:78
double normal[3]
bool has_normals
int count
int points
size_t stride
Tensor Contiguous() const
Definition: Tensor.cpp:772
Tensor Transpose(int64_t dim0, int64_t dim1) const
Transpose a Tensor by swapping dimension dim0 and dim1.
Definition: Tensor.cpp:1068
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor Div(const Tensor &value) const
Divides a tensor and returns the resulting tensor.
Definition: Tensor.cpp:1205
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739
A Class for nearest neighbor search.
std::tuple< Tensor, Tensor, Tensor > FixedRadiusSearch(const Tensor &query_points, double radius, bool sort=true)
bool HybridIndex(utility::optional< double > radius={})
bool FixedRadiusIndex(utility::optional< double > radius={})
std::tuple< Tensor, Tensor, Tensor > HybridSearch(const Tensor &query_points, const double radius, const int max_knn) const
std::pair< Tensor, Tensor > KnnSearch(const Tensor &query_points, int knn)
CLOUDVIEWER_HOST_DEVICE index_t GetShape(int i) const
Helper class for converting coordinates/indices between 3D/3D, 3D/2D, 2D/3D.
double colors[3]
double normals[3]
#define LogError(...)
Definition: Logging.h:60
int min(int a, int b)
Definition: cutil_math.h:53
__host__ __device__ int2 abs(int2 v)
Definition: cutil_math.h:1267
int max(int a, int b)
Definition: cutil_math.h:48
static CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void matmul3x3_3x1(const scalar_t &m00, const scalar_t &m01, const scalar_t &m02, const scalar_t &m10, const scalar_t &m11, const scalar_t &m12, const scalar_t &m20, const scalar_t &m21, const scalar_t &m22, const scalar_t &v0, const scalar_t &v1, const scalar_t &v2, scalar_t &o0, scalar_t &o1, scalar_t &o2)
Definition: Matrix.h:18
CLOUDVIEWER_HOST_DEVICE CLOUDVIEWER_FORCE_INLINE void cross_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input, scalar_t *C_3x1_output)
Definition: Matrix.h:62
CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void solve_svd3x3(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *X_3x1)
Definition: SVD3x3.h:2167
CLOUDVIEWER_HOST_DEVICE CLOUDVIEWER_FORCE_INLINE scalar_t dot_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input)
Definition: Matrix.h:88
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:111
const Dtype Int32
Definition: Dtype.cpp:46
const Dtype Float32
Definition: Dtype.cpp:42
CLOUDVIEWER_HOST_DEVICE void Swap(scalar_t *x, scalar_t *y)
CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseColorGradientKernel(const scalar_t *points_ptr, const scalar_t *normals_ptr, const scalar_t *colors_ptr, const int32_t &idx_offset, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *color_gradients_ptr)
CLOUDVIEWER_HOST_DEVICE void Heapify(scalar_t *arr, int n, int root)
void ComputeBoundaryPointsCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &indices, const core::Tensor &counts, core::Tensor &mask, double angle_threshold)
void OrientNormalsTowardsCameraLocationCPU(const core::Tensor &points, core::Tensor &normals, const core::Tensor &camera)
void EstimateNormalsFromCovariancesCPU(const core::Tensor &covariances, core::Tensor &normals, const bool has_normals)
void GetPointMaskWithinOBBCPU(const core::Tensor &points, const core::Tensor &center, const core::Tensor &rotation, const core::Tensor &extent, core::Tensor &mask)
CLOUDVIEWER_HOST_DEVICE void GetCoordinateSystemOnPlane(const scalar_t *query, scalar_t *u, scalar_t *v)
void EstimateColorGradientsUsingRadiusSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius)
CLOUDVIEWER_HOST_DEVICE void HeapSort(scalar_t *arr, int n)
CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseNormalsWithFastEigen3x3(const scalar_t *covariance_ptr, scalar_t *normals_ptr)
void EstimateColorGradientsUsingKNNSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const int64_t &max_nn)
void NormalizeNormalsCPU(core::Tensor &normals)
void OrientNormalsToAlignWithDirectionCPU(core::Tensor &normals, const core::Tensor &direction)
CLOUDVIEWER_HOST_DEVICE bool IsBoundaryPoints(const scalar_t *angles, int counts, double angle_threshold)
void UnprojectCPU(const core::Tensor &depth, utility::optional< std::reference_wrapper< const core::Tensor >> image_colors, core::Tensor &points, utility::optional< std::reference_wrapper< core::Tensor >> colors, const core::Tensor &intrinsics, const core::Tensor &extrinsics, float depth_scale, float depth_max, int64_t stride)
void EstimateCovariancesUsingHybridSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius, const int64_t &max_nn)
void EstimateCovariancesUsingRadiusSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius)
CLOUDVIEWER_HOST_DEVICE void EstimatePointWiseRobustNormalizedCovarianceKernel(const scalar_t *points_ptr, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *covariance_ptr)
void EstimateCovariancesUsingKNNSearchCPU(const core::Tensor &points, core::Tensor &covariances, const int64_t &max_nn)
void EstimateColorGradientsUsingHybridSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius, const int64_t &max_nn)
void GetPointMaskWithinAABBCPU(const core::Tensor &points, const core::Tensor &min_bound, const core::Tensor &max_bound, core::Tensor &mask)
CLOUDVIEWER_HOST_DEVICE void ComputeEigenvector0(const scalar_t *A, const scalar_t eval0, scalar_t *eigen_vector0)
CLOUDVIEWER_HOST_DEVICE void ComputeEigenvector1(const scalar_t *A, const scalar_t *evec0, const scalar_t eval1, scalar_t *eigen_vector1)
TArrayIndexer< int64_t > NDArrayIndexer
core::Tensor InverseTransformation(const core::Tensor &T)
TODO(wei): find a proper place for such functionalities.
Definition: Utility.h:77
Generic file read and write utility for python interface.
Definition: lsd.c:149