ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
RGBDOdometryCPU.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <Parallel.h>
9 #include <tbb/parallel_for.h>
10 #include <tbb/parallel_reduce.h>
11 
19 
20 namespace cloudViewer {
21 namespace t {
22 namespace pipelines {
23 namespace kernel {
24 namespace odometry {
25 
26 void ComputeOdometryInformationMatrixCPU(const core::Tensor& source_vertex_map,
27  const core::Tensor& target_vertex_map,
28  const core::Tensor& intrinsic,
29  const core::Tensor& source_to_target,
30  const float square_dist_thr,
31  core::Tensor& information) {
32  NDArrayIndexer source_vertex_indexer(source_vertex_map, 2);
33  NDArrayIndexer target_vertex_indexer(target_vertex_map, 2);
34 
35  core::Tensor trans = source_to_target;
36  t::geometry::kernel::TransformIndexer ti(intrinsic, trans);
37 
38  // Output
39  int64_t rows = source_vertex_indexer.GetShape(0);
40  int64_t cols = source_vertex_indexer.GetShape(1);
41 
42  core::Device device = source_vertex_map.GetDevice();
43 
44  int64_t n = rows * cols;
45 
46  std::vector<float> A_1x21(21, 0.0);
47 
48 #ifdef _MSC_VER
49  std::vector<float> zeros_21(21, 0.0);
50  A_1x21 = tbb::parallel_reduce(
51  tbb::blocked_range<int>(0, n), zeros_21,
52  [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
53  for (int workload_idx = r.begin(); workload_idx < r.end();
54  workload_idx++) {
55 #else
56  float* A_reduction = A_1x21.data();
57 #pragma omp parallel for reduction(+ : A_reduction[ : 21]) schedule(static) \
58  num_threads(utility::EstimateMaxThreads())
59  for (int workload_idx = 0; workload_idx < n; workload_idx++) {
60 #endif
61  int y = workload_idx / cols;
62  int x = workload_idx % cols;
63 
64  float J_x[6], J_y[6], J_z[6];
65  float rx, ry, rz;
66 
67  bool valid = GetJacobianPointToPoint(
68  x, y, square_dist_thr, source_vertex_indexer,
69  target_vertex_indexer, ti, J_x, J_y, J_z, rx, ry,
70  rz);
71 
72  if (valid) {
73  for (int i = 0, j = 0; j < 6; j++) {
74  for (int k = 0; k <= j; k++) {
75  A_reduction[i] += J_x[j] * J_x[k];
76  A_reduction[i] += J_y[j] * J_y[k];
77  A_reduction[i] += J_z[j] * J_z[k];
78  i++;
79  }
80  }
81  }
82  }
83 #ifdef _MSC_VER
84  return A_reduction;
85  },
86  // TBB: Defining reduction operation.
87  [&](std::vector<float> a, std::vector<float> b) {
88  std::vector<float> result(21);
89  for (int j = 0; j < 21; j++) {
90  result[j] = a[j] + b[j];
91  }
92  return result;
93  });
94 #endif
95  core::Tensor A_reduction_tensor(A_1x21, {21}, core::Float32, device);
96  float* reduction_ptr = A_reduction_tensor.GetDataPtr<float>();
97 
98  information = core::Tensor::Empty({6, 6}, core::Float64, device);
99  double* info_ptr = information.GetDataPtr<double>();
100 
101  for (int j = 0; j < 6; j++) {
102  const int64_t reduction_idx = ((j * (j + 1)) / 2);
103  for (int k = 0; k <= j; k++) {
104  info_ptr[j * 6 + k] = reduction_ptr[reduction_idx + k];
105  info_ptr[k * 6 + j] = reduction_ptr[reduction_idx + k];
106  }
107  }
108 }
109 
111  const core::Tensor& source_depth,
112  const core::Tensor& target_depth,
113  const core::Tensor& source_intensity,
114  const core::Tensor& target_intensity,
115  const core::Tensor& target_intensity_dx,
116  const core::Tensor& target_intensity_dy,
117  const core::Tensor& source_vertex_map,
118  const core::Tensor& intrinsics,
119  const core::Tensor& init_source_to_target,
120  core::Tensor& delta,
121  float& inlier_residual,
122  int& inlier_count,
123  const float depth_outlier_trunc,
124  const float intensity_huber_delta) {
125  NDArrayIndexer source_depth_indexer(source_depth, 2);
126  NDArrayIndexer target_depth_indexer(target_depth, 2);
127 
128  NDArrayIndexer source_intensity_indexer(source_intensity, 2);
129  NDArrayIndexer target_intensity_indexer(target_intensity, 2);
130 
131  NDArrayIndexer target_intensity_dx_indexer(target_intensity_dx, 2);
132  NDArrayIndexer target_intensity_dy_indexer(target_intensity_dy, 2);
133 
134  NDArrayIndexer source_vertex_indexer(source_vertex_map, 2);
135 
136  core::Tensor trans = init_source_to_target;
137  t::geometry::kernel::TransformIndexer ti(intrinsics, trans);
138 
139  // Output
140  int64_t rows = source_vertex_indexer.GetShape(0);
141  int64_t cols = source_vertex_indexer.GetShape(1);
142 
143  core::Device device = source_vertex_map.GetDevice();
144 
145  int64_t n = rows * cols;
146 
147  std::vector<float> A_1x29(29, 0.0);
148 
149 #ifdef _MSC_VER
150  std::vector<float> zeros_29(29, 0.0);
151  A_1x29 = tbb::parallel_reduce(
152  tbb::blocked_range<int>(0, n), zeros_29,
153  [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
154  for (int workload_idx = r.begin(); workload_idx < r.end();
155  workload_idx++) {
156 #else
157  float* A_reduction = A_1x29.data();
158 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
159  num_threads(utility::EstimateMaxThreads())
160  for (int workload_idx = 0; workload_idx < n; workload_idx++) {
161 #endif
162  int y = workload_idx / cols;
163  int x = workload_idx % cols;
164 
165  float J_I[6];
166  float r_I;
167 
168  bool valid = GetJacobianIntensity(
169  x, y, depth_outlier_trunc, source_depth_indexer,
170  target_depth_indexer, source_intensity_indexer,
171  target_intensity_indexer,
172  target_intensity_dx_indexer,
173  target_intensity_dy_indexer, source_vertex_indexer,
174  ti, J_I, r_I);
175 
176  if (valid) {
177  float d_huber = HuberDeriv(r_I, intensity_huber_delta);
178  float r_huber = HuberLoss(r_I, intensity_huber_delta);
179 
180  for (int i = 0, j = 0; j < 6; j++) {
181  for (int k = 0; k <= j; k++) {
182  A_reduction[i] += J_I[j] * J_I[k];
183  i++;
184  }
185  A_reduction[21 + j] += J_I[j] * d_huber;
186  }
187  A_reduction[27] += r_huber;
188  A_reduction[28] += 1;
189  }
190  }
191 #ifdef _MSC_VER
192  return A_reduction;
193  },
194  // TBB: Defining reduction operation.
195  [&](std::vector<float> a, std::vector<float> b) {
196  std::vector<float> result(29);
197  for (int j = 0; j < 29; j++) {
198  result[j] = a[j] + b[j];
199  }
200  return result;
201  });
202 #endif
203  core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device);
204  DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count);
205 }
206 
208  const core::Tensor& target_depth,
209  const core::Tensor& source_intensity,
210  const core::Tensor& target_intensity,
211  const core::Tensor& target_depth_dx,
212  const core::Tensor& target_depth_dy,
213  const core::Tensor& target_intensity_dx,
214  const core::Tensor& target_intensity_dy,
215  const core::Tensor& source_vertex_map,
216  const core::Tensor& intrinsics,
217  const core::Tensor& init_source_to_target,
218  core::Tensor& delta,
219  float& inlier_residual,
220  int& inlier_count,
221  const float depth_outlier_trunc,
222  const float depth_huber_delta,
223  const float intensity_huber_delta) {
224  NDArrayIndexer source_depth_indexer(source_depth, 2);
225  NDArrayIndexer target_depth_indexer(target_depth, 2);
226 
227  NDArrayIndexer source_intensity_indexer(source_intensity, 2);
228  NDArrayIndexer target_intensity_indexer(target_intensity, 2);
229 
230  NDArrayIndexer target_depth_dx_indexer(target_depth_dx, 2);
231  NDArrayIndexer target_depth_dy_indexer(target_depth_dy, 2);
232  NDArrayIndexer target_intensity_dx_indexer(target_intensity_dx, 2);
233  NDArrayIndexer target_intensity_dy_indexer(target_intensity_dy, 2);
234 
235  NDArrayIndexer source_vertex_indexer(source_vertex_map, 2);
236 
237  core::Tensor trans = init_source_to_target;
238  t::geometry::kernel::TransformIndexer ti(intrinsics, trans);
239 
240  // Output
241  int64_t rows = source_vertex_indexer.GetShape(0);
242  int64_t cols = source_vertex_indexer.GetShape(1);
243 
244  core::Device device = source_vertex_map.GetDevice();
245 
246  int64_t n = rows * cols;
247 
248  std::vector<float> A_1x29(29, 0.0);
249 
250 #ifdef _MSC_VER
251  std::vector<float> zeros_29(29, 0.0);
252  A_1x29 = tbb::parallel_reduce(
253  tbb::blocked_range<int>(0, n), zeros_29,
254  [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
255  for (int workload_idx = r.begin(); workload_idx < r.end();
256  workload_idx++) {
257 #else
258  float* A_reduction = A_1x29.data();
259 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
260  num_threads(utility::EstimateMaxThreads())
261  for (int workload_idx = 0; workload_idx < n; workload_idx++) {
262 #endif
263  int y = workload_idx / cols;
264  int x = workload_idx % cols;
265 
266  float J_I[6], J_D[6];
267  float r_I, r_D;
268 
269  bool valid = GetJacobianHybrid(
270  x, y, depth_outlier_trunc, source_depth_indexer,
271  target_depth_indexer, source_intensity_indexer,
272  target_intensity_indexer, target_depth_dx_indexer,
273  target_depth_dy_indexer,
274  target_intensity_dx_indexer,
275  target_intensity_dy_indexer, source_vertex_indexer,
276  ti, J_I, J_D, r_I, r_D);
277 
278  if (valid) {
279  float d_huber_I =
280  HuberDeriv(r_I, intensity_huber_delta);
281  float d_huber_D = HuberDeriv(r_D, depth_huber_delta);
282 
283  float r_huber_I = HuberLoss(r_I, intensity_huber_delta);
284  float r_huber_D = HuberLoss(r_D, depth_huber_delta);
285 
286  for (int i = 0, j = 0; j < 6; j++) {
287  for (int k = 0; k <= j; k++) {
288  A_reduction[i] +=
289  J_I[j] * J_I[k] + J_D[j] * J_D[k];
290  i++;
291  }
292  A_reduction[21 + j] +=
293  J_I[j] * d_huber_I + J_D[j] * d_huber_D;
294  }
295  A_reduction[27] += r_huber_I + r_huber_D;
296  A_reduction[28] += 1;
297  }
298  }
299 #ifdef _MSC_VER
300  return A_reduction;
301  },
302  // TBB: Defining reduction operation.
303  [&](std::vector<float> a, std::vector<float> b) {
304  std::vector<float> result(29);
305  for (int j = 0; j < 29; j++) {
306  result[j] = a[j] + b[j];
307  }
308  return result;
309  });
310 #endif
311  core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device);
312  DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count);
313 }
314 
316  const core::Tensor& source_vertex_map,
317  const core::Tensor& target_vertex_map,
318  const core::Tensor& target_normal_map,
319  const core::Tensor& intrinsics,
320  const core::Tensor& init_source_to_target,
321  core::Tensor& delta,
322  float& inlier_residual,
323  int& inlier_count,
324  const float depth_outlier_trunc,
325  const float depth_huber_delta) {
326  NDArrayIndexer source_vertex_indexer(source_vertex_map, 2);
327  NDArrayIndexer target_vertex_indexer(target_vertex_map, 2);
328  NDArrayIndexer target_normal_indexer(target_normal_map, 2);
329 
330  core::Tensor trans = init_source_to_target;
331  t::geometry::kernel::TransformIndexer ti(intrinsics, trans);
332 
333  // Output
334  int64_t rows = source_vertex_indexer.GetShape(0);
335  int64_t cols = source_vertex_indexer.GetShape(1);
336 
337  core::Device device = source_vertex_map.GetDevice();
338 
339  int64_t n = rows * cols;
340 
341  std::vector<float> A_1x29(29, 0.0);
342 
343 #ifdef _MSC_VER
344  std::vector<float> zeros_29(29, 0.0);
345  A_1x29 = tbb::parallel_reduce(
346  tbb::blocked_range<int>(0, n), zeros_29,
347  [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
348  for (int workload_idx = r.begin(); workload_idx < r.end();
349  workload_idx++) {
350 #else
351  float* A_reduction = A_1x29.data();
352 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
353  num_threads(utility::EstimateMaxThreads())
354  for (int workload_idx = 0; workload_idx < n; workload_idx++) {
355 #endif
356  int y = workload_idx / cols;
357  int x = workload_idx % cols;
358 
359  float J_ij[6];
360  float r;
361 
362  bool valid = GetJacobianPointToPlane(
363  x, y, depth_outlier_trunc, source_vertex_indexer,
364  target_vertex_indexer, target_normal_indexer, ti,
365  J_ij, r);
366 
367  if (valid) {
368  float d_huber = HuberDeriv(r, depth_huber_delta);
369  float r_huber = HuberLoss(r, depth_huber_delta);
370  for (int i = 0, j = 0; j < 6; j++) {
371  for (int k = 0; k <= j; k++) {
372  A_reduction[i] += J_ij[j] * J_ij[k];
373  i++;
374  }
375  A_reduction[21 + j] += J_ij[j] * d_huber;
376  }
377  A_reduction[27] += r_huber;
378  A_reduction[28] += 1;
379  }
380  }
381 #ifdef _MSC_VER
382  return A_reduction;
383  },
384  // TBB: Defining reduction operation.
385  [&](std::vector<float> a, std::vector<float> b) {
386  std::vector<float> result(29);
387  for (int j = 0; j < 29; j++) {
388  result[j] = a[j] + b[j];
389  }
390  return result;
391  });
392 #endif
393  core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device);
394  DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count);
395 }
396 
397 } // namespace odometry
398 } // namespace kernel
399 } // namespace pipelines
400 } // namespace t
401 } // namespace cloudViewer
core::Tensor result
Definition: VtkUtils.cpp:76
Device GetDevice() const override
Definition: Tensor.cpp:1435
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
Definition: Tensor.cpp:400
Helper class for converting coordinates/indices between 3D/3D, 3D/2D, 2D/3D.
const Dtype Float64
Definition: Dtype.cpp:43
const Dtype Float32
Definition: Dtype.cpp:42
TArrayIndexer< int64_t > NDArrayIndexer
void ComputeOdometryInformationMatrixCPU(const core::Tensor &source_vertex_map, const core::Tensor &target_vertex_map, const core::Tensor &intrinsic, const core::Tensor &source_to_target, const float square_dist_thr, core::Tensor &information)
void ComputeOdometryResultHybridCPU(const core::Tensor &source_depth, const core::Tensor &target_depth, const core::Tensor &source_intensity, const core::Tensor &target_intensity, const core::Tensor &target_depth_dx, const core::Tensor &target_depth_dy, const core::Tensor &target_intensity_dx, const core::Tensor &target_intensity_dy, const core::Tensor &source_vertex_map, const core::Tensor &intrinsics, const core::Tensor &init_source_to_target, core::Tensor &delta, float &inlier_residual, int &inlier_count, const float depth_outlier_trunc, const float depth_huber_delta, const float intensity_huber_delta)
void ComputeOdometryResultPointToPlaneCPU(const core::Tensor &source_vertex_map, const core::Tensor &target_vertex_map, const core::Tensor &target_normal_map, const core::Tensor &intrinsics, const core::Tensor &init_source_to_target, core::Tensor &delta, float &inlier_residual, int &inlier_count, const float depth_outlier_trunc, const float depth_huber_delta)
void ComputeOdometryResultIntensityCPU(const core::Tensor &source_depth, const core::Tensor &target_depth, const core::Tensor &source_intensity, const core::Tensor &target_intensity, const core::Tensor &target_intensity_dx, const core::Tensor &target_intensity_dy, const core::Tensor &source_vertex_map, const core::Tensor &intrinsics, const core::Tensor &init_source_to_target, core::Tensor &delta, float &inlier_residual, int &inlier_count, const float depth_outlier_trunc, const float intensity_huber_delta)
void DecodeAndSolve6x6(const core::Tensor &A_reduction, core::Tensor &delta, float &inlier_residual, int &inlier_count)
Decodes a 6x6 linear system from a compressed 29x1 tensor.
Generic file read and write utility for python interface.