9 #include <tbb/parallel_for.h>
10 #include <tbb/parallel_reduce.h>
30 const float square_dist_thr,
39 int64_t rows = source_vertex_indexer.GetShape(0);
40 int64_t cols = source_vertex_indexer.GetShape(1);
44 int64_t n = rows * cols;
46 std::vector<float> A_1x21(21, 0.0);
49 std::vector<float> zeros_21(21, 0.0);
50 A_1x21 = tbb::parallel_reduce(
51 tbb::blocked_range<int>(0, n), zeros_21,
52 [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
53 for (
int workload_idx = r.begin(); workload_idx < r.end();
56 float* A_reduction = A_1x21.data();
57 #pragma omp parallel for reduction(+ : A_reduction[ : 21]) schedule(static) \
58 num_threads(utility::EstimateMaxThreads())
59 for (int workload_idx = 0; workload_idx < n; workload_idx++) {
61 int y = workload_idx / cols;
62 int x = workload_idx % cols;
64 float J_x[6], J_y[6], J_z[6];
67 bool valid = GetJacobianPointToPoint(
68 x, y, square_dist_thr, source_vertex_indexer,
69 target_vertex_indexer, ti, J_x, J_y, J_z, rx, ry,
73 for (int i = 0, j = 0; j < 6; j++) {
74 for (int k = 0; k <= j; k++) {
75 A_reduction[i] += J_x[j] * J_x[k];
76 A_reduction[i] += J_y[j] * J_y[k];
77 A_reduction[i] += J_z[j] * J_z[k];
87 [&](std::vector<float> a, std::vector<float> b) {
88 std::vector<float>
result(21);
89 for (
int j = 0; j < 21; j++) {
96 float* reduction_ptr = A_reduction_tensor.
GetDataPtr<
float>();
99 double* info_ptr = information.
GetDataPtr<
double>();
101 for (
int j = 0; j < 6; j++) {
102 const int64_t reduction_idx = ((j * (j + 1)) / 2);
103 for (
int k = 0; k <= j; k++) {
104 info_ptr[j * 6 + k] = reduction_ptr[reduction_idx + k];
105 info_ptr[k * 6 + j] = reduction_ptr[reduction_idx + k];
121 float& inlier_residual,
123 const float depth_outlier_trunc,
124 const float intensity_huber_delta) {
131 NDArrayIndexer target_intensity_dx_indexer(target_intensity_dx, 2);
132 NDArrayIndexer target_intensity_dy_indexer(target_intensity_dy, 2);
140 int64_t rows = source_vertex_indexer.GetShape(0);
141 int64_t cols = source_vertex_indexer.GetShape(1);
145 int64_t n = rows * cols;
147 std::vector<float> A_1x29(29, 0.0);
150 std::vector<float> zeros_29(29, 0.0);
151 A_1x29 = tbb::parallel_reduce(
152 tbb::blocked_range<int>(0, n), zeros_29,
153 [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
154 for (
int workload_idx = r.begin(); workload_idx < r.end();
157 float* A_reduction = A_1x29.data();
158 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
159 num_threads(utility::EstimateMaxThreads())
160 for (int workload_idx = 0; workload_idx < n; workload_idx++) {
162 int y = workload_idx / cols;
163 int x = workload_idx % cols;
168 bool valid = GetJacobianIntensity(
169 x, y, depth_outlier_trunc, source_depth_indexer,
170 target_depth_indexer, source_intensity_indexer,
171 target_intensity_indexer,
172 target_intensity_dx_indexer,
173 target_intensity_dy_indexer, source_vertex_indexer,
177 float d_huber = HuberDeriv(r_I, intensity_huber_delta);
178 float r_huber = HuberLoss(r_I, intensity_huber_delta);
180 for (int i = 0, j = 0; j < 6; j++) {
181 for (int k = 0; k <= j; k++) {
182 A_reduction[i] += J_I[j] * J_I[k];
185 A_reduction[21 + j] += J_I[j] * d_huber;
187 A_reduction[27] += r_huber;
188 A_reduction[28] += 1;
195 [&](std::vector<float> a, std::vector<float> b) {
196 std::vector<float>
result(29);
197 for (
int j = 0; j < 29; j++) {
219 float& inlier_residual,
221 const float depth_outlier_trunc,
222 const float depth_huber_delta,
223 const float intensity_huber_delta) {
232 NDArrayIndexer target_intensity_dx_indexer(target_intensity_dx, 2);
233 NDArrayIndexer target_intensity_dy_indexer(target_intensity_dy, 2);
241 int64_t rows = source_vertex_indexer.GetShape(0);
242 int64_t cols = source_vertex_indexer.GetShape(1);
246 int64_t n = rows * cols;
248 std::vector<float> A_1x29(29, 0.0);
251 std::vector<float> zeros_29(29, 0.0);
252 A_1x29 = tbb::parallel_reduce(
253 tbb::blocked_range<int>(0, n), zeros_29,
254 [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
255 for (
int workload_idx = r.begin(); workload_idx < r.end();
258 float* A_reduction = A_1x29.data();
259 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
260 num_threads(utility::EstimateMaxThreads())
261 for (int workload_idx = 0; workload_idx < n; workload_idx++) {
263 int y = workload_idx / cols;
264 int x = workload_idx % cols;
266 float J_I[6], J_D[6];
269 bool valid = GetJacobianHybrid(
270 x, y, depth_outlier_trunc, source_depth_indexer,
271 target_depth_indexer, source_intensity_indexer,
272 target_intensity_indexer, target_depth_dx_indexer,
273 target_depth_dy_indexer,
274 target_intensity_dx_indexer,
275 target_intensity_dy_indexer, source_vertex_indexer,
276 ti, J_I, J_D, r_I, r_D);
280 HuberDeriv(r_I, intensity_huber_delta);
281 float d_huber_D = HuberDeriv(r_D, depth_huber_delta);
283 float r_huber_I = HuberLoss(r_I, intensity_huber_delta);
284 float r_huber_D = HuberLoss(r_D, depth_huber_delta);
286 for (int i = 0, j = 0; j < 6; j++) {
287 for (int k = 0; k <= j; k++) {
289 J_I[j] * J_I[k] + J_D[j] * J_D[k];
292 A_reduction[21 + j] +=
293 J_I[j] * d_huber_I + J_D[j] * d_huber_D;
295 A_reduction[27] += r_huber_I + r_huber_D;
296 A_reduction[28] += 1;
303 [&](std::vector<float> a, std::vector<float> b) {
304 std::vector<float>
result(29);
305 for (
int j = 0; j < 29; j++) {
322 float& inlier_residual,
324 const float depth_outlier_trunc,
325 const float depth_huber_delta) {
334 int64_t rows = source_vertex_indexer.GetShape(0);
335 int64_t cols = source_vertex_indexer.GetShape(1);
339 int64_t n = rows * cols;
341 std::vector<float> A_1x29(29, 0.0);
344 std::vector<float> zeros_29(29, 0.0);
345 A_1x29 = tbb::parallel_reduce(
346 tbb::blocked_range<int>(0, n), zeros_29,
347 [&](tbb::blocked_range<int> r, std::vector<float> A_reduction) {
348 for (
int workload_idx = r.begin(); workload_idx < r.end();
351 float* A_reduction = A_1x29.data();
352 #pragma omp parallel for reduction(+ : A_reduction[ : 29]) schedule(static) \
353 num_threads(utility::EstimateMaxThreads())
354 for (int workload_idx = 0; workload_idx < n; workload_idx++) {
356 int y = workload_idx / cols;
357 int x = workload_idx % cols;
362 bool valid = GetJacobianPointToPlane(
363 x, y, depth_outlier_trunc, source_vertex_indexer,
364 target_vertex_indexer, target_normal_indexer, ti,
368 float d_huber = HuberDeriv(r, depth_huber_delta);
369 float r_huber = HuberLoss(r, depth_huber_delta);
370 for (int i = 0, j = 0; j < 6; j++) {
371 for (int k = 0; k <= j; k++) {
372 A_reduction[i] += J_ij[j] * J_ij[k];
375 A_reduction[21 + j] += J_ij[j] * d_huber;
377 A_reduction[27] += r_huber;
378 A_reduction[28] += 1;
385 [&](std::vector<float> a, std::vector<float> b) {
386 std::vector<float>
result(29);
387 for (
int j = 0; j < 29; j++) {
Device GetDevice() const override
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
TArrayIndexer< int64_t > NDArrayIndexer
void ComputeOdometryInformationMatrixCPU(const core::Tensor &source_vertex_map, const core::Tensor &target_vertex_map, const core::Tensor &intrinsic, const core::Tensor &source_to_target, const float square_dist_thr, core::Tensor &information)
void ComputeOdometryResultHybridCPU(const core::Tensor &source_depth, const core::Tensor &target_depth, const core::Tensor &source_intensity, const core::Tensor &target_intensity, const core::Tensor &target_depth_dx, const core::Tensor &target_depth_dy, const core::Tensor &target_intensity_dx, const core::Tensor &target_intensity_dy, const core::Tensor &source_vertex_map, const core::Tensor &intrinsics, const core::Tensor &init_source_to_target, core::Tensor &delta, float &inlier_residual, int &inlier_count, const float depth_outlier_trunc, const float depth_huber_delta, const float intensity_huber_delta)
void ComputeOdometryResultPointToPlaneCPU(const core::Tensor &source_vertex_map, const core::Tensor &target_vertex_map, const core::Tensor &target_normal_map, const core::Tensor &intrinsics, const core::Tensor &init_source_to_target, core::Tensor &delta, float &inlier_residual, int &inlier_count, const float depth_outlier_trunc, const float depth_huber_delta)
void ComputeOdometryResultIntensityCPU(const core::Tensor &source_depth, const core::Tensor &target_depth, const core::Tensor &source_intensity, const core::Tensor &target_intensity, const core::Tensor &target_intensity_dx, const core::Tensor &target_intensity_dy, const core::Tensor &source_vertex_map, const core::Tensor &intrinsics, const core::Tensor &init_source_to_target, core::Tensor &delta, float &inlier_residual, int &inlier_count, const float depth_outlier_trunc, const float intensity_huber_delta)
void DecodeAndSolve6x6(const core::Tensor &A_reduction, core::Tensor &delta, float &inlier_residual, int &inlier_count)
Decodes a 6x6 linear system from a compressed 29x1 tensor.
Generic file read and write utility for python interface.