9 #include <tbb/parallel_for.h>
10 #include <tbb/parallel_reduce.h>
29 template <
typename scalar_t,
typename func_t>
31 const scalar_t *source_points_ptr,
32 const scalar_t *target_points_ptr,
33 const scalar_t *target_normals_ptr,
34 const int64_t *correspondence_indices,
37 func_t GetWeightFromRobustKernel) {
42 std::vector<scalar_t> A_1x29(29, 0.0);
45 std::vector<scalar_t> zeros_29(29, 0.0);
46 A_1x29 = tbb::parallel_reduce(
47 tbb::blocked_range<int>(0, n), zeros_29,
48 [&](tbb::blocked_range<int> r, std::vector<scalar_t> A_reduction) {
49 for (
int workload_idx = r.begin(); workload_idx < r.end();
52 scalar_t *A_reduction = A_1x29.data();
53 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
54 num_threads(utility::EstimateMaxThreads())
55 for (int workload_idx = 0; workload_idx < n; ++workload_idx) {
60 bool valid = kernel::GetJacobianPointToPlane<scalar_t>(
61 workload_idx, source_points_ptr, target_points_ptr,
62 target_normals_ptr, correspondence_indices, J_ij,
65 scalar_t w = GetWeightFromRobustKernel(r);
70 for (int j = 0; j < 6; ++j) {
71 for (int k = 0; k <= j; ++k) {
72 A_reduction[i] += J_ij[j] * w * J_ij[k];
75 A_reduction[21 + j] += J_ij[j] * w * r;
85 [&](std::vector<scalar_t> a, std::vector<scalar_t> b) {
86 std::vector<scalar_t>
result(29);
87 for (
int j = 0; j < 29; ++j) {
94 for (
int i = 0; i < 29; ++i) {
95 global_sum[i] = A_1x29[i];
114 scalar_t *global_sum_ptr = global_sum.
GetDataPtr<scalar_t>();
119 kernel::ComputePosePointToPlaneKernelCPU(
120 source_points.GetDataPtr<scalar_t>(),
121 target_points.GetDataPtr<scalar_t>(),
122 target_normals.GetDataPtr<scalar_t>(),
123 correspondence_indices.GetDataPtr<int64_t>(), n,
124 global_sum_ptr, GetWeightFromRobustKernel);
131 template <
typename scalar_t,
typename funct_t>
133 const scalar_t *source_points_ptr,
134 const scalar_t *source_colors_ptr,
135 const scalar_t *target_points_ptr,
136 const scalar_t *target_normals_ptr,
137 const scalar_t *target_colors_ptr,
138 const scalar_t *target_color_gradients_ptr,
139 const int64_t *correspondence_indices,
140 const scalar_t &sqrt_lambda_geometric,
141 const scalar_t &sqrt_lambda_photometric,
143 scalar_t *global_sum,
144 funct_t GetWeightFromRobustKernel) {
149 std::vector<scalar_t> A_1x29(29, 0.0);
152 std::vector<scalar_t> zeros_29(29, 0.0);
153 A_1x29 = tbb::parallel_reduce(
154 tbb::blocked_range<int>(0, n), zeros_29,
155 [&](tbb::blocked_range<int> r, std::vector<scalar_t> A_reduction) {
156 for (
int workload_idx = r.begin(); workload_idx < r.end();
159 scalar_t *A_reduction = A_1x29.data();
160 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
161 num_threads(utility::EstimateMaxThreads())
162 for (int workload_idx = 0; workload_idx < n; ++workload_idx) {
164 scalar_t J_G[6] = {0}, J_I[6] = {0};
165 scalar_t r_G = 0, r_I = 0;
167 bool valid = GetJacobianColoredICP<scalar_t>(
168 workload_idx, source_points_ptr, source_colors_ptr,
169 target_points_ptr, target_normals_ptr,
170 target_colors_ptr, target_color_gradients_ptr,
171 correspondence_indices, sqrt_lambda_geometric,
172 sqrt_lambda_photometric, J_G, J_I, r_G, r_I);
174 scalar_t w_G = GetWeightFromRobustKernel(r_G);
175 scalar_t w_I = GetWeightFromRobustKernel(r_I);
180 for (int j = 0; j < 6; ++j) {
181 for (int k = 0; k <= j; ++k) {
182 A_reduction[i] += J_G[j] * w_G * J_G[k] +
183 J_I[j] * w_I * J_I[k];
186 A_reduction[21 + j] +=
187 J_G[j] * w_G * r_G + J_I[j] * w_I * r_I;
189 A_reduction[27] += r_G * r_G + r_I * r_I;
190 A_reduction[28] += 1;
197 [&](std::vector<scalar_t> a, std::vector<scalar_t> b) {
198 std::vector<scalar_t>
result(29);
199 for (
int j = 0; j < 29; ++j) {
206 for (
int i = 0; i < 29; ++i) {
207 global_sum[i] = A_1x29[i];
224 const double &lambda_geometric) {
230 scalar_t sqrt_lambda_geometric =
231 static_cast<scalar_t
>(sqrt(lambda_geometric));
232 scalar_t sqrt_lambda_photometric =
233 static_cast<scalar_t
>(sqrt(1.0 - lambda_geometric));
237 kernel::ComputePoseColoredICPKernelCPU(
238 source_points.GetDataPtr<scalar_t>(),
239 source_colors.GetDataPtr<scalar_t>(),
240 target_points.GetDataPtr<scalar_t>(),
241 target_normals.GetDataPtr<scalar_t>(),
242 target_colors.GetDataPtr<scalar_t>(),
243 target_color_gradients.GetDataPtr<scalar_t>(),
244 correspondence_indices.GetDataPtr<int64_t>(),
245 sqrt_lambda_geometric, sqrt_lambda_photometric, n,
246 global_sum.GetDataPtr<scalar_t>(),
247 GetWeightFromRobustKernel);
254 template <
typename scalar_t,
typename funct1_t,
typename funct2_t>
256 const scalar_t *source_points_ptr,
257 const scalar_t *source_dopplers_ptr,
258 const scalar_t *source_directions_ptr,
259 const scalar_t *target_points_ptr,
260 const scalar_t *target_normals_ptr,
261 const int64_t *correspondence_indices,
262 const scalar_t *R_S_to_V,
263 const scalar_t *r_v_to_s_in_V,
264 const scalar_t *v_s_in_S,
265 const bool reject_dynamic_outliers,
266 const scalar_t doppler_outlier_threshold,
267 const scalar_t sqrt_lambda_geometric,
268 const scalar_t sqrt_lambda_doppler,
269 const scalar_t sqrt_lambda_doppler_by_dt,
271 scalar_t *global_sum,
272 funct1_t GetWeightFromRobustKernelFirst,
273 funct2_t GetWeightFromRobustKernelSecond
279 std::vector<scalar_t> A_1x29(29, 0.0);
282 std::vector<scalar_t> zeros_29(29, 0.0);
283 A_1x29 = tbb::parallel_reduce(
284 tbb::blocked_range<int>(0, n), zeros_29,
285 [&](tbb::blocked_range<int> r, std::vector<scalar_t> A_reduction) {
286 for (
int workload_idx = r.begin(); workload_idx < r.end();
289 scalar_t *A_reduction = A_1x29.data();
290 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
291 num_threads(utility::EstimateMaxThreads())
292 for (int workload_idx = 0; workload_idx < n; ++workload_idx) {
294 scalar_t J_G[6] = {0}, J_D[6] = {0};
295 scalar_t r_G = 0, r_D = 0;
297 bool valid = GetJacobianDopplerICP<scalar_t>(
298 workload_idx, source_points_ptr,
299 source_dopplers_ptr, source_directions_ptr,
300 target_points_ptr, target_normals_ptr,
301 correspondence_indices, R_S_to_V, r_v_to_s_in_V,
302 v_s_in_S, reject_dynamic_outliers,
303 doppler_outlier_threshold, sqrt_lambda_geometric,
304 sqrt_lambda_doppler, sqrt_lambda_doppler_by_dt, J_G,
307 scalar_t w_G = GetWeightFromRobustKernelFirst(r_G);
308 scalar_t w_D = GetWeightFromRobustKernelSecond(r_D);
313 for (int j = 0; j < 6; ++j) {
314 for (int k = 0; k <= j; ++k) {
315 A_reduction[i] += J_G[j] * w_G * J_G[k] +
316 J_D[j] * w_D * J_D[k];
319 A_reduction[21 + j] +=
320 J_G[j] * w_G * r_G + J_D[j] * w_D * r_D;
322 A_reduction[27] += r_G * r_G + r_D * r_D;
323 A_reduction[28] += 1;
330 [&](std::vector<scalar_t> a, std::vector<scalar_t> b) {
331 std::vector<scalar_t>
result(29);
332 for (
int j = 0; j < 29; ++j) {
339 for (
int i = 0; i < 29; ++i) {
340 global_sum[i] = A_1x29[i];
344 template <
typename scalar_t>
346 const scalar_t *r_v_to_s_in_V,
347 const scalar_t *w_v_in_V,
348 const scalar_t *v_v_in_V,
349 scalar_t *v_s_in_S) {
371 const bool reject_dynamic_outliers,
372 const double doppler_outlier_threshold,
375 const double lambda_doppler) {
382 const scalar_t sqrt_lambda_geometric =
383 sqrt(1.0 -
static_cast<scalar_t
>(lambda_doppler));
384 const scalar_t sqrt_lambda_doppler = sqrt(lambda_doppler);
385 const scalar_t sqrt_lambda_doppler_by_dt =
386 sqrt_lambda_doppler /
static_cast<scalar_t
>(period);
388 PreComputeForDopplerICPKernelCPU<scalar_t>(
393 v_s_in_S.GetDataPtr<scalar_t>());
396 scalar_t, kernel_geometric.
type_,
399 kernel::ComputePoseDopplerICPKernelCPU(
400 source_points.GetDataPtr<scalar_t>(),
401 source_dopplers.GetDataPtr<scalar_t>(),
402 source_directions.GetDataPtr<scalar_t>(),
403 target_points.GetDataPtr<scalar_t>(),
404 target_normals.GetDataPtr<scalar_t>(),
405 correspondence_indices.GetDataPtr<int64_t>(),
406 R_S_to_V.GetDataPtr<scalar_t>(),
407 r_v_to_s_in_V.GetDataPtr<scalar_t>(),
408 v_s_in_S.GetDataPtr<scalar_t>(),
409 reject_dynamic_outliers,
410 static_cast<scalar_t>(doppler_outlier_threshold),
411 sqrt_lambda_geometric, sqrt_lambda_doppler,
412 sqrt_lambda_doppler_by_dt, n,
413 global_sum.GetDataPtr<scalar_t>(),
414 GetWeightFromRobustKernelFirst,
415 GetWeightFromRobustKernelSecond
423 template <
typename scalar_t>
425 const scalar_t *target_points_ptr,
426 const int64_t *correspondence_indices,
437 std::vector<scalar_t> mean_1x7(7, 0.0);
439 std::vector<scalar_t> zeros_7(7, 0.0);
441 mean_1x7 = tbb::parallel_reduce(
442 tbb::blocked_range<int>(0, n), zeros_7,
443 [&](tbb::blocked_range<int> r,
444 std::vector<scalar_t> mean_reduction) {
445 for (
int workload_idx = r.begin(); workload_idx < r.end();
447 if (correspondence_indices[workload_idx] != -1) {
449 3 * correspondence_indices[workload_idx];
451 source_points_ptr[3 * workload_idx];
453 source_points_ptr[3 * workload_idx + 1];
455 source_points_ptr[3 * workload_idx + 2];
457 mean_reduction[3] += target_points_ptr[target_idx];
458 mean_reduction[4] += target_points_ptr[target_idx + 1];
459 mean_reduction[5] += target_points_ptr[target_idx + 2];
461 mean_reduction[6] += 1;
464 return mean_reduction;
467 [&](std::vector<scalar_t> a, std::vector<scalar_t> b) {
468 std::vector<scalar_t>
result(7);
469 for (
int j = 0; j < 7; ++j) {
475 if (mean_1x7[6] == 0) {
479 for (
int i = 0; i < 6; ++i) {
480 mean_1x7[i] = mean_1x7[i] / mean_1x7[6];
484 std::vector<scalar_t> sxy_1x9(9, 0.0);
486 std::vector<scalar_t> zeros_9(9, 0.0);
488 sxy_1x9 = tbb::parallel_reduce(
489 tbb::blocked_range<int>(0, n), zeros_9,
490 [&](tbb::blocked_range<int> r,
491 std::vector<scalar_t> sxy_1x9_reduction) {
492 for (
int workload_idx = r.begin(); workload_idx < r.end();
494 if (correspondence_indices[workload_idx] != -1) {
495 for (int i = 0; i < 9; ++i) {
496 const int row = i % 3;
497 const int col = i / 3;
498 const int source_idx = 3 * workload_idx + row;
499 const int target_idx =
500 3 * correspondence_indices[workload_idx] +
502 sxy_1x9_reduction[i] +=
503 (source_points_ptr[source_idx] -
505 (target_points_ptr[target_idx] -
510 return sxy_1x9_reduction;
513 [&](std::vector<scalar_t> a, std::vector<scalar_t> b) {
514 std::vector<scalar_t>
result(9);
515 for (
int j = 0; j < 9; ++j) {
522 scalar_t *source_mean_ptr = source_mean.
GetDataPtr<scalar_t>();
525 scalar_t *target_mean_ptr = target_mean.
GetDataPtr<scalar_t>();
528 scalar_t *sxy_ptr = Sxy.
GetDataPtr<scalar_t>();
535 for (
int j = 0; j < 3; ++j) {
536 for (
int k = 0; k < 3; ++k) {
537 sxy_ptr[j * 3 + k] = sxy_1x9[i] / mean_1x7[6];
540 source_mean_ptr[j] = mean_1x7[j];
541 target_mean_ptr[j] = mean_1x7[j + 3];
544 inlier_count =
static_cast<int64_t
>(mean_1x7[6]);
558 const scalar_t *source_points_ptr =
560 const scalar_t *target_points_ptr =
562 const int64_t *correspondence_indices = corres.
GetDataPtr<int64_t>();
567 correspondence_indices, n, dtype, device, Sxy,
568 target_mean, source_mean, inlier_count);
572 std::tie(U, D, VT) = Sxy.
SVD();
574 if (U.Det() * (VT.T()).Det() < 0) {
583 template <
typename scalar_t>
585 const int64_t *correspondence_indices,
587 scalar_t *global_sum) {
589 std::vector<scalar_t> AtA(21, 0.0);
592 std::vector<scalar_t> zeros_21(21, 0.0);
593 AtA = tbb::parallel_reduce(
594 tbb::blocked_range<int>(0, n), zeros_21,
595 [&](tbb::blocked_range<int> r, std::vector<scalar_t> A_reduction) {
596 for (
int workload_idx = r.begin(); workload_idx < r.end();
599 scalar_t *A_reduction = AtA.data();
600 #pragma omp parallel for reduction(+ : A_reduction[ : 21]) schedule(static) \
601 num_threads(utility::EstimateMaxThreads())
602 for (int workload_idx = 0; workload_idx < n; workload_idx++) {
604 scalar_t J_x[6] = {0}, J_y[6] = {0}, J_z[6] = {0};
606 bool valid = GetInformationJacobians<scalar_t>(
607 workload_idx, target_points_ptr,
608 correspondence_indices, J_x, J_y, J_z);
612 for (int j = 0; j < 6; ++j) {
613 for (int k = 0; k <= j; ++k) {
614 A_reduction[i] += J_x[j] * J_x[k] +
626 [&](std::vector<scalar_t> a, std::vector<scalar_t> b) {
627 std::vector<scalar_t>
result(21);
628 for (
int j = 0; j < 21; ++j) {
635 for (
int i = 0; i < 21; ++i) {
636 global_sum[i] = AtA[i];
645 int n = correspondence_indices.
GetLength();
650 scalar_t *global_sum_ptr = global_sum.
GetDataPtr<scalar_t>();
654 correspondence_indices.
GetDataPtr<int64_t>(), n,
659 double *sum_ptr = global_sum_cpu.
GetDataPtr<
double>();
662 double *GTG_ptr = information_matrix.
GetDataPtr<
double>();
665 for (
int j = 0; j < 6; j++) {
666 for (
int k = 0; k <= j; k++) {
667 GTG_ptr[j * 6 + k] = GTG_ptr[k * 6 + j] = sum_ptr[i];
#define DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(DTYPE,...)
#define DISPATCH_ROBUST_KERNEL_FUNCTION(METHOD, scalar_t, scaling_parameter, shape_parameter,...)
#define DISPATCH_DUAL_ROBUST_KERNEL_FUNCTION(scalar_t, METHOD_1, scaling_parameter_1, METHOD_2, scaling_parameter_2,...)
Tensor Matmul(const Tensor &rhs) const
int64_t GetLength() const
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
Tensor Reshape(const SizeVector &dst_shape) const
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
std::tuple< Tensor, Tensor, Tensor > SVD() const
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Tensor To(Dtype dtype, bool copy=false) const
double shape_parameter_
Shape parameter.
RobustKernelMethod type_
Loss type.
double scaling_parameter_
Scaling parameter.
static void ComputePoseDopplerICPKernelCPU(const scalar_t *source_points_ptr, const scalar_t *source_dopplers_ptr, const scalar_t *source_directions_ptr, const scalar_t *target_points_ptr, const scalar_t *target_normals_ptr, const int64_t *correspondence_indices, const scalar_t *R_S_to_V, const scalar_t *r_v_to_s_in_V, const scalar_t *v_s_in_S, const bool reject_dynamic_outliers, const scalar_t doppler_outlier_threshold, const scalar_t sqrt_lambda_geometric, const scalar_t sqrt_lambda_doppler, const scalar_t sqrt_lambda_doppler_by_dt, const int n, scalar_t *global_sum, funct1_t GetWeightFromRobustKernelFirst, funct2_t GetWeightFromRobustKernelSecond)
static void Get3x3SxyLinearSystem(const scalar_t *source_points_ptr, const scalar_t *target_points_ptr, const int64_t *correspondence_indices, const int &n, const core::Dtype &dtype, const core::Device &device, core::Tensor &Sxy, core::Tensor &target_mean, core::Tensor &source_mean, int &inlier_count)
void ComputePoseColoredICPCPU(const core::Tensor &source_points, const core::Tensor &source_colors, const core::Tensor &target_points, const core::Tensor &target_normals, const core::Tensor &target_colors, const core::Tensor &target_color_gradients, const core::Tensor &correspondence_indices, core::Tensor &pose, float &residual, int &inlier_count, const core::Dtype &dtype, const core::Device &device, const registration::RobustKernel &kernel, const double &lambda_geometric)
void ComputePosePointToPlaneCPU(const core::Tensor &source_points, const core::Tensor &target_points, const core::Tensor &target_normals, const core::Tensor &correspondence_indices, core::Tensor &pose, float &residual, int &inlier_count, const core::Dtype &dtype, const core::Device &device, const registration::RobustKernel &kernel)
void ComputeRtPointToPointCPU(const core::Tensor &source_points, const core::Tensor &target_points, const core::Tensor &corres, core::Tensor &R, core::Tensor &t, int &inlier_count, const core::Dtype &dtype, const core::Device &device)
static void ComputePosePointToPlaneKernelCPU(const scalar_t *source_points_ptr, const scalar_t *target_points_ptr, const scalar_t *target_normals_ptr, const int64_t *correspondence_indices, const int n, scalar_t *global_sum, func_t GetWeightFromRobustKernel)
void PreComputeForDopplerICPKernelCPU(const scalar_t *R_S_to_V, const scalar_t *r_v_to_s_in_V, const scalar_t *w_v_in_V, const scalar_t *v_v_in_V, scalar_t *v_s_in_S)
void ComputePoseDopplerICPCPU(const core::Tensor &source_points, const core::Tensor &source_dopplers, const core::Tensor &source_directions, const core::Tensor &target_points, const core::Tensor &target_normals, const core::Tensor &correspondence_indices, core::Tensor &output_pose, float &residual, int &inlier_count, const core::Dtype &dtype, const core::Device &device, const core::Tensor &R_S_to_V, const core::Tensor &r_v_to_s_in_V, const core::Tensor &w_v_in_V, const core::Tensor &v_v_in_V, const double period, const bool reject_dynamic_outliers, const double doppler_outlier_threshold, const registration::RobustKernel &kernel_geometric, const registration::RobustKernel &kernel_doppler, const double lambda_doppler)
void ComputeInformationMatrixCPU(const core::Tensor &target_points, const core::Tensor &correspondence_indices, core::Tensor &information_matrix, const core::Dtype &dtype, const core::Device &device)
void DecodeAndSolve6x6(const core::Tensor &A_reduction, core::Tensor &delta, float &inlier_residual, int &inlier_count)
Decodes a 6x6 linear system from a compressed 29x1 tensor.
void ComputeInformationMatrixKernelCPU(const scalar_t *target_points_ptr, const int64_t *correspondence_indices, const int n, scalar_t *global_sum)
CLOUDVIEWER_HOST_DEVICE void PreComputeForDopplerICP(const scalar_t *R_S_to_V, const scalar_t *r_v_to_s_in_V, const scalar_t *w_v_in_V, const scalar_t *v_v_in_V, scalar_t *v_s_in_S)
static void ComputePoseColoredICPKernelCPU(const scalar_t *source_points_ptr, const scalar_t *source_colors_ptr, const scalar_t *target_points_ptr, const scalar_t *target_normals_ptr, const scalar_t *target_colors_ptr, const scalar_t *target_color_gradients_ptr, const int64_t *correspondence_indices, const scalar_t &sqrt_lambda_geometric, const scalar_t &sqrt_lambda_photometric, const int n, scalar_t *global_sum, funct_t GetWeightFromRobustKernel)
Generic file read and write utility for python interface.