29 namespace voxel_grid {
34 #if defined(__CUDACC__)
35 void GetVoxelCoordinatesAndFlattenedIndicesCUDA
47 const index_t* buf_indices_ptr = buf_indices.GetDataPtr<
index_t>();
50 float* voxel_coords_ptr = voxel_coords.GetDataPtr<
float>();
51 int64_t* flattened_indices_ptr = flattened_indices.GetDataPtr<int64_t>();
53 index_t n = flattened_indices.GetLength();
54 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
55 index_t resolution3 = resolution * resolution * resolution;
58 index_t block_idx = buf_indices_ptr[workload_idx / resolution3];
59 index_t voxel_idx = workload_idx % resolution3;
61 index_t block_key_offset = block_idx * 3;
62 index_t xb = block_key_ptr[block_key_offset + 0];
63 index_t yb = block_key_ptr[block_key_offset + 1];
64 index_t zb = block_key_ptr[block_key_offset + 2];
67 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
69 float x = (xb * resolution + xv) * voxel_size;
70 float y = (yb * resolution + yv) * voxel_size;
71 float z = (zb * resolution + zv) * voxel_size;
73 flattened_indices_ptr[workload_idx] =
74 block_idx * resolution3 + voxel_idx;
76 index_t voxel_coords_offset = workload_idx * 3;
77 voxel_coords_ptr[voxel_coords_offset + 0] = x;
78 voxel_coords_ptr[voxel_coords_offset + 1] = y;
79 voxel_coords_ptr[voxel_coords_offset + 2] = z;
91 index_t xn = (xo + resolution) % resolution;
92 index_t yn = (yo + resolution) % resolution;
93 index_t zn = (zo + resolution) % resolution;
99 index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
102 *nb_block_masks_indexer.
GetDataPtr<
bool>(curr_block_idx, nb_idx);
103 if (!block_mask_i)
return -1;
106 curr_block_idx, nb_idx);
108 return (((block_idx_i * resolution) + zn) * resolution + yn) * resolution +
112 template <
typename tsdf_t>
114 const tsdf_t* tsdf_base_ptr,
126 nb_block_masks_indexer,
127 nb_block_indices_indexer);
129 index_t vxp = GetLinearIdx(xo + 1, yo, zo);
130 index_t vxn = GetLinearIdx(xo - 1, yo, zo);
131 index_t vyp = GetLinearIdx(xo, yo + 1, zo);
132 index_t vyn = GetLinearIdx(xo, yo - 1, zo);
133 index_t vzp = GetLinearIdx(xo, yo, zo + 1);
134 index_t vzn = GetLinearIdx(xo, yo, zo - 1);
135 if (vxp >= 0 && vxn >= 0) n[0] = tsdf_base_ptr[vxp] - tsdf_base_ptr[vxn];
136 if (vyp >= 0 && vyn >= 0) n[1] = tsdf_base_ptr[vyp] - tsdf_base_ptr[vyn];
137 if (vzp >= 0 && vzn >= 0) n[2] = tsdf_base_ptr[vzp] - tsdf_base_ptr[vzn];
140 template <
typename input_depth_t,
141 typename input_color_t,
145 #if defined(__CUDACC__)
164 index_t resolution2 = resolution * resolution;
165 index_t resolution3 = resolution2 * resolution;
167 TransformIndexer transform_indexer(depth_intrinsic, extrinsics, voxel_size);
172 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
180 if (!block_value_map.Contains(
"tsdf") ||
181 !block_value_map.Contains(
"weight")) {
183 "TSDF and/or weight not allocated in blocks, please implement "
184 "customized integration.");
186 tsdf_t* tsdf_base_ptr = block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
187 weight_t* weight_base_ptr =
188 block_value_map.at(
"weight").GetDataPtr<weight_t>();
190 bool integrate_color =
191 block_value_map.Contains(
"color") &&
color.NumElements() > 0;
192 color_t* color_base_ptr =
nullptr;
195 float color_multiplier = 1.0;
196 if (integrate_color) {
197 color_base_ptr = block_value_map.at(
"color").
GetDataPtr<color_t>();
202 color_multiplier = 255.0;
206 index_t n = indices.GetLength() * resolution3;
209 index_t block_idx = indices_ptr[workload_idx / resolution3];
210 index_t voxel_idx = workload_idx % resolution3;
215 block_keys_indexer.GetDataPtr<
index_t>(block_idx);
222 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
225 index_t x = xb * resolution + xv;
226 index_t y = yb * resolution + yv;
227 index_t z = zb * resolution + zv;
230 float xc, yc, zc, u, v;
232 static_cast<float>(y),
233 static_cast<float>(z), &xc, &yc, &zc);
236 transform_indexer.
Project(xc, yc, zc, &u, &v);
247 *depth_indexer.
GetDataPtr<input_depth_t>(ui, vi) / depth_scale;
249 float sdf = depth - zc;
250 if (depth <= 0 || depth > depth_max || zc <= 0 || sdf < -sdf_trunc) {
253 sdf = sdf < sdf_trunc ? sdf : sdf_trunc;
256 index_t linear_idx = block_idx * resolution3 + voxel_idx;
258 tsdf_t* tsdf_ptr = tsdf_base_ptr + linear_idx;
259 weight_t* weight_ptr = weight_base_ptr + linear_idx;
261 float inv_wsum = 1.0f / (*weight_ptr + 1);
262 float weight = *weight_ptr;
263 *tsdf_ptr = (weight * (*tsdf_ptr) + sdf) * inv_wsum;
265 if (integrate_color) {
266 color_t* color_ptr = color_base_ptr + 3 * linear_idx;
271 transform_indexer.
Unproject(ui, vi, 1.0, &x, &y, &z);
274 colormap_indexer.
Project(x, y, z, &uf, &vf);
279 input_color_t* input_color_ptr =
280 color_indexer.
GetDataPtr<input_color_t>(ui, vi);
282 for (
index_t i = 0; i < 3; ++i) {
283 color_ptr[i] = (weight * color_ptr[i] +
284 input_color_ptr[i] * color_multiplier) *
289 *weight_ptr = weight + 1;
292 #if defined(__CUDACC__)
297 #if defined(__CUDACC__)
298 void EstimateRangeCUDA
309 int64_t block_resolution,
318 int h_down = h / down_factor;
319 int w_down = w / down_factor;
321 block_keys.GetDevice());
325 const int fragment_size = 16;
327 if (fragment_buffer.GetDataPtr() == 0 ||
328 fragment_buffer.NumElements() == 0) {
330 const int reserve_frag_buffer_size =
331 h_down * w_down / (fragment_size * fragment_size) / voxel_size;
332 fragment_buffer =
core::Tensor({reserve_frag_buffer_size, 6},
336 const int frag_buffer_size = fragment_buffer.NumElements() / 6;
341 #if defined(__CUDACC__)
343 block_keys.GetDevice());
344 int* count_ptr =
count.GetDataPtr<
int>();
346 std::atomic<int> count_atomic(0);
347 std::atomic<int>* count_ptr = &count_atomic;
357 block_keys.GetDevice(), block_keys.GetLength(),
359 int* key = block_keys_indexer.
GetDataPtr<
int>(workload_idx);
361 int u_min = w_down - 1, v_min = h_down - 1, u_max = 0,
363 float z_min = depth_max, z_max = depth_min;
365 float xc, yc, zc, u, v;
368 for (
int i = 0; i < 8; ++i) {
369 float xw = (key[0] + ((i & 1) > 0)) * block_resolution *
371 float yw = (key[1] + ((i & 2) > 0)) * block_resolution *
373 float zw = (key[2] + ((i & 4) > 0)) * block_resolution *
378 if (zc <= 0)
continue;
381 w2c_transform_indexer.
Project(xc, yc, zc, &u, &v);
385 v_min =
min(
static_cast<int>(
floorf(v)), v_min);
386 v_max =
max(
static_cast<int>(ceilf(v)), v_max);
388 u_min =
min(
static_cast<int>(
floorf(u)), u_min);
389 u_max =
max(
static_cast<int>(ceilf(u)), u_max);
391 z_min =
min(z_min, zc);
392 z_max =
max(z_max, zc);
395 v_min =
max(0, v_min);
396 v_max =
min(h_down - 1, v_max);
398 u_min =
max(0, u_min);
399 u_max =
min(w_down - 1, u_max);
401 if (v_min >= v_max || u_min >= u_max || z_min >= z_max)
return;
405 ceilf(
float(v_max - v_min + 1) /
float(fragment_size));
407 ceilf(
float(u_max - u_min + 1) /
float(fragment_size));
409 int frag_count = frag_v_count * frag_u_count;
411 int frag_count_end = frag_count_start + frag_count;
412 if (frag_count_end >= frag_buffer_size) {
417 for (
int frag_v = 0; frag_v < frag_v_count; ++frag_v) {
418 for (
int frag_u = 0; frag_u < frag_u_count;
420 float* frag_ptr = frag_buffer_indexer.
GetDataPtr<
float>(
421 frag_count_start +
offset);
427 frag_ptr[2] = v_min + frag_v * fragment_size;
428 frag_ptr[3] = u_min + frag_u * fragment_size;
431 frag_ptr[4] =
min(frag_ptr[2] + fragment_size - 1,
432 static_cast<float>(v_max));
433 frag_ptr[5] =
min(frag_ptr[3] + fragment_size - 1,
434 static_cast<float>(u_max));
438 #if defined(__CUDACC__)
439 int needed_frag_count =
count[0].Item<
int>();
441 int needed_frag_count = (*count_ptr).load();
444 int frag_count = needed_frag_count;
445 if (frag_count >= frag_buffer_size) {
447 "Could not generate full range map; allocated {} fragments but "
449 frag_buffer_size, frag_count);
450 frag_count = frag_buffer_size - 1;
453 frag_buffer_size, frag_count);
459 int v = workload_idx / w_down;
460 int u = workload_idx % w_down;
463 range_ptr[0] = depth_max;
464 range_ptr[1] = depth_min;
469 block_keys.GetDevice(), frag_count * fragment_size * fragment_size,
471 int frag_idx = workload_idx / (fragment_size * fragment_size);
472 int local_idx = workload_idx % (fragment_size * fragment_size);
473 int dv = local_idx / fragment_size;
474 int du = local_idx % fragment_size;
477 frag_buffer_indexer.
GetDataPtr<
float>(frag_idx);
478 int v_min =
static_cast<int>(frag_ptr[2]);
479 int u_min =
static_cast<int>(frag_ptr[3]);
480 int v_max =
static_cast<int>(frag_ptr[4]);
481 int u_max =
static_cast<int>(frag_ptr[5]);
485 if (v > v_max || u > u_max)
return;
487 float z_min = frag_ptr[0];
488 float z_max = frag_ptr[1];
489 float* range_ptr = range_map_indexer.
GetDataPtr<
float>(u, v);
491 atomicMinf(&(range_ptr[0]), z_min);
492 atomicMaxf(&(range_ptr[1]), z_max);
494 #pragma omp critical(EstimateRangeCPU)
496 range_ptr[0] =
min(z_min, range_ptr[0]);
497 range_ptr[1] =
max(z_max, range_ptr[1]);
502 #if defined(__CUDACC__)
506 if (needed_frag_count != frag_count) {
508 needed_frag_count, frag_count);
511 block_keys.GetDevice());
524 return (xin ==
x && yin ==
y && zin ==
z) ?
block_idx : -1;
538 template <
typename tsdf_t,
typename weight_t,
typename color_t>
539 #if defined(__CUDACC__)
544 (std::shared_ptr<core::HashMap>& hashmap,
557 float weight_threshold,
558 float trunc_voxel_multiplier,
559 int range_map_down_factor) {
564 auto device_hashmap = hashmap->GetDeviceHashBackend();
565 #if defined(__CUDACC__)
567 std::dynamic_pointer_cast<core::StdGPUHashBackend<Key, Hash, Eq>>(
569 if (cuda_hashmap ==
nullptr) {
571 "Unsupported backend: CUDA raycasting only supports STDGPU.");
573 auto hashmap_impl = cuda_hashmap->GetImpl();
576 std::dynamic_pointer_cast<core::TBBHashBackend<Key, Hash, Eq>>(
578 if (cpu_hashmap ==
nullptr) {
580 "Unsupported backend: CPU raycasting only supports TBB.");
582 auto hashmap_impl = *cpu_hashmap->GetImpl();
605 if (!block_value_map.Contains(
"tsdf") ||
606 !block_value_map.Contains(
"weight")) {
608 "TSDF and/or weight not allocated in blocks, please implement "
609 "customized integration.");
611 const tsdf_t* tsdf_base_ptr =
612 block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
613 const weight_t* weight_base_ptr =
614 block_value_map.at(
"weight").GetDataPtr<weight_t>();
617 if (renderings_map.Contains(
"depth")) {
618 depth_indexer =
ArrayIndexer(renderings_map.at(
"depth"), 2);
620 if (renderings_map.Contains(
"vertex")) {
621 vertex_indexer =
ArrayIndexer(renderings_map.at(
"vertex"), 2);
623 if (renderings_map.Contains(
"normal")) {
624 normal_indexer =
ArrayIndexer(renderings_map.at(
"normal"), 2);
628 if (renderings_map.Contains(
"index")) {
629 index_indexer =
ArrayIndexer(renderings_map.at(
"index"), 2);
631 if (renderings_map.Contains(
"mask")) {
632 mask_indexer =
ArrayIndexer(renderings_map.at(
"mask"), 2);
634 if (renderings_map.Contains(
"interp_ratio")) {
635 interp_ratio_indexer =
638 if (renderings_map.Contains(
"interp_ratio_dx")) {
639 interp_ratio_dx_indexer =
642 if (renderings_map.Contains(
"interp_ratio_dy")) {
643 interp_ratio_dy_indexer =
646 if (renderings_map.Contains(
"interp_ratio_dz")) {
647 interp_ratio_dz_indexer =
652 bool render_color =
false;
653 if (block_value_map.Contains(
"color") && renderings_map.Contains(
"color")) {
655 color_indexer =
ArrayIndexer(renderings_map.at(
"color"), 2);
657 const color_t* color_base_ptr =
658 render_color ? block_value_map.at(
"color").GetDataPtr<color_t>()
661 bool visit_neighbors = render_color || normal_indexer.
GetDataPtr() ||
677 float block_size = voxel_size * block_resolution;
678 index_t resolution2 = block_resolution * block_resolution;
679 index_t resolution3 = resolution2 * block_resolution;
692 index_t x_vn = (x_v + block_resolution) % block_resolution;
693 index_t y_vn = (y_v + block_resolution) % block_resolution;
694 index_t z_vn = (z_v + block_resolution) % block_resolution;
700 if (dx_b == 0 && dy_b == 0 && dz_b == 0) {
701 return block_buf_idx * resolution3 + z_v * resolution2 +
702 y_v * block_resolution + x_v;
704 Key key(x_b + dx_b, y_b + dy_b, z_b + dz_b);
706 index_t block_buf_idx = cache.
Check(key[0], key[1], key[2]);
707 if (block_buf_idx < 0) {
708 auto iter = hashmap_impl.find(key);
709 if (iter == hashmap_impl.end())
return -1;
710 block_buf_idx = iter->second;
711 cache.
Update(key[0], key[1], key[2], block_buf_idx);
714 return block_buf_idx * resolution3 + z_vn * resolution2 +
715 y_vn * block_resolution + x_vn;
720 float x_o,
float y_o,
float z_o,
721 float x_d,
float y_d,
float z_d,
float t,
723 float x_g = x_o + t * x_d;
724 float y_g = y_o + t * y_d;
725 float z_g = z_o + t * z_d;
732 Key key(x_b, y_b, z_b);
734 if (block_buf_idx < 0) {
735 auto iter = hashmap_impl.find(key);
736 if (iter == hashmap_impl.end())
return -1;
737 block_buf_idx = iter->second;
738 cache.
Update(x_b, y_b, z_b, block_buf_idx);
746 return block_buf_idx * resolution3 + z_v * resolution2 +
747 y_v * block_resolution + x_v;
750 index_t y = workload_idx / cols;
751 index_t x = workload_idx % cols;
753 const float* range = range_indexer.
GetDataPtr<
float>(
754 x / range_map_down_factor, y / range_map_down_factor);
756 float* depth_ptr =
nullptr;
757 float* vertex_ptr =
nullptr;
758 float* color_ptr =
nullptr;
759 float* normal_ptr =
nullptr;
761 int64_t* index_ptr =
nullptr;
762 bool* mask_ptr =
nullptr;
763 float* interp_ratio_ptr =
nullptr;
764 float* interp_ratio_dx_ptr =
nullptr;
765 float* interp_ratio_dy_ptr =
nullptr;
766 float* interp_ratio_dz_ptr =
nullptr;
769 vertex_ptr = vertex_indexer.
GetDataPtr<
float>(x, y);
775 depth_ptr = depth_indexer.
GetDataPtr<
float>(x, y);
779 normal_ptr = normal_indexer.
GetDataPtr<
float>(x, y);
786 mask_ptr = mask_indexer.
GetDataPtr<
bool>(x, y);
790 for (
int i = 0; i < 8; ++i) {
795 index_ptr = index_indexer.
GetDataPtr<int64_t>(x, y);
799 for (
int i = 0; i < 8; ++i) {
804 interp_ratio_ptr = interp_ratio_indexer.
GetDataPtr<
float>(x, y);
808 for (
int i = 0; i < 8; ++i) {
809 interp_ratio_ptr[i] = 0;
813 interp_ratio_dx_ptr =
814 interp_ratio_dx_indexer.
GetDataPtr<
float>(x, y);
818 for (
int i = 0; i < 8; ++i) {
819 interp_ratio_dx_ptr[i] = 0;
823 interp_ratio_dy_ptr =
824 interp_ratio_dy_indexer.
GetDataPtr<
float>(x, y);
828 for (
int i = 0; i < 8; ++i) {
829 interp_ratio_dy_ptr[i] = 0;
833 interp_ratio_dz_ptr =
834 interp_ratio_dz_indexer.
GetDataPtr<
float>(x, y);
838 for (
int i = 0; i < 8; ++i) {
839 interp_ratio_dz_ptr[i] = 0;
844 color_ptr = color_indexer.
GetDataPtr<
float>(x, y);
851 const float t_max = range[1];
852 if (t >= t_max)
return;
855 float x_c = 0, y_c = 0, z_c = 0;
856 float x_g = 0, y_g = 0, z_g = 0;
857 float x_o = 0, y_o = 0, z_o = 0;
862 float tsdf_prev = -1.0f;
864 float sdf_trunc = voxel_size * trunc_voxel_multiplier;
871 c2w_transform_indexer.
Unproject(
static_cast<float>(x),
872 static_cast<float>(y), 1.0f, &x_c, &y_c,
874 c2w_transform_indexer.
RigidTransform(x_c, y_c, z_c, &x_g, &y_g, &z_g);
875 float x_d = (x_g - x_o);
876 float y_d = (y_g - y_o);
877 float z_d = (z_g - z_o);
880 bool surface_found =
false;
883 GetLinearIdxAtT(x_o, y_o, z_o, x_d, y_d, z_d, t, cache);
885 if (linear_idx < 0) {
890 tsdf = tsdf_base_ptr[linear_idx];
891 w = weight_base_ptr[linear_idx];
892 if (tsdf_prev > 0 && w >= weight_threshold && tsdf <= 0) {
893 surface_found =
true;
897 float delta = tsdf * sdf_trunc;
898 t += delta < voxel_size ? voxel_size : delta;
904 (t * tsdf_prev - t_prev * tsdf) / (tsdf_prev - tsdf);
905 x_g = x_o + t_intersect * x_d;
906 y_g = y_o + t_intersect * y_d;
907 z_g = z_o + t_intersect * z_d;
911 *depth_ptr = t_intersect * depth_scale;
915 x_g, y_g, z_g, vertex_ptr + 0, vertex_ptr + 1,
918 if (!visit_neighbors)
return;
926 float x_v = (x_g - float(x_b) * block_size) / voxel_size;
927 float y_v = (y_g - float(y_b) * block_size) / voxel_size;
928 float z_v = (z_g - float(z_b) * block_size) / voxel_size;
930 Key key(x_b, y_b, z_b);
933 if (block_buf_idx < 0) {
934 auto iter = hashmap_impl.find(key);
935 if (iter == hashmap_impl.end())
return;
936 block_buf_idx = iter->second;
937 cache.
Update(x_b, y_b, z_b, block_buf_idx);
944 float ratio_x = x_v - float(x_v_floor);
945 float ratio_y = y_v - float(y_v_floor);
946 float ratio_z = z_v - float(z_v_floor);
949 for (
index_t k = 0; k < 8; ++k) {
950 index_t dx_v = (k & 1) > 0 ? 1 : 0;
951 index_t dy_v = (k & 2) > 0 ? 1 : 0;
952 index_t dz_v = (k & 4) > 0 ? 1 : 0;
954 index_t linear_idx_k = GetLinearIdxAtP(
955 x_b, y_b, z_b, x_v_floor + dx_v, y_v_floor + dy_v,
956 z_v_floor + dz_v, block_buf_idx, cache);
958 if (linear_idx_k >= 0 && weight_base_ptr[linear_idx_k] > 0) {
959 float rx = dx_v * (ratio_x) + (1 - dx_v) * (1 - ratio_x);
960 float ry = dy_v * (ratio_y) + (1 - dy_v) * (1 - ratio_y);
961 float rz = dz_v * (ratio_z) + (1 - dz_v) * (1 - ratio_z);
962 float r = rx * ry * rz;
964 if (interp_ratio_ptr) {
965 interp_ratio_ptr[k] = r;
971 index_ptr[k] = linear_idx_k;
974 float tsdf_k = tsdf_base_ptr[linear_idx_k];
975 float interp_ratio_dx = ry * rz * (2 * dx_v - 1);
976 float interp_ratio_dy = rx * rz * (2 * dy_v - 1);
977 float interp_ratio_dz = rx * ry * (2 * dz_v - 1);
979 if (interp_ratio_dx_ptr) {
980 interp_ratio_dx_ptr[k] = interp_ratio_dx;
982 if (interp_ratio_dy_ptr) {
983 interp_ratio_dy_ptr[k] = interp_ratio_dy;
985 if (interp_ratio_dz_ptr) {
986 interp_ratio_dz_ptr[k] = interp_ratio_dz;
990 normal_ptr[0] += interp_ratio_dx * tsdf_k;
991 normal_ptr[1] += interp_ratio_dy * tsdf_k;
992 normal_ptr[2] += interp_ratio_dz * tsdf_k;
996 index_t color_linear_idx = linear_idx_k * 3;
998 r * color_base_ptr[color_linear_idx + 0];
1000 r * color_base_ptr[color_linear_idx + 1];
1002 r * color_base_ptr[color_linear_idx + 2];
1012 color_ptr[0] /= sum_r;
1013 color_ptr[1] /= sum_r;
1014 color_ptr[2] /= sum_r;
1018 constexpr
float EPSILON = 1e-5f;
1019 float norm = sqrt(normal_ptr[0] * normal_ptr[0] +
1020 normal_ptr[1] * normal_ptr[1] +
1021 normal_ptr[2] * normal_ptr[2]);
1023 w2c_transform_indexer.
Rotate(
1024 -normal_ptr[0] / norm, -normal_ptr[1] / norm,
1025 -normal_ptr[2] / norm, normal_ptr + 0,
1026 normal_ptr + 1, normal_ptr + 2);
1032 #if defined(__CUDACC__)
1037 template <
typename tsdf_t,
typename weight_t,
typename color_t>
1038 #if defined(__CUDACC__)
1039 void ExtractPointCloudCUDA
1053 float weight_threshold,
1058 index_t resolution2 = resolution * resolution;
1059 index_t resolution3 = resolution2 * resolution;
1062 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
1072 if (!block_value_map.Contains(
"tsdf") ||
1073 !block_value_map.Contains(
"weight")) {
1075 "TSDF and/or weight not allocated in blocks, please implement "
1076 "customized integration.");
1078 const tsdf_t* tsdf_base_ptr =
1079 block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
1080 const weight_t* weight_base_ptr =
1081 block_value_map.at(
"weight").GetDataPtr<weight_t>();
1082 const color_t* color_base_ptr =
nullptr;
1083 if (block_value_map.Contains(
"color")) {
1084 color_base_ptr = block_value_map.at(
"color").GetDataPtr<color_t>();
1087 index_t n_blocks = indices.GetLength();
1088 index_t n = n_blocks * resolution3;
1091 #if defined(__CUDACC__)
1093 block_keys.GetDevice());
1096 std::atomic<index_t> count_atomic(0);
1097 std::atomic<index_t>* count_ptr = &count_atomic;
1100 if (valid_size < 0) {
1102 "No estimated max point cloud size provided, using a 2-pass "
1103 "estimation. Surface extraction could be slow.");
1114 nb_block_masks_indexer,
1115 nb_block_indices_indexer);
1120 index_t workload_block_idx = workload_idx / resolution3;
1121 index_t block_idx = indices_ptr[workload_block_idx];
1122 index_t voxel_idx = workload_idx % resolution3;
1126 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1128 index_t linear_idx = block_idx * resolution3 + voxel_idx;
1129 float tsdf_o = tsdf_base_ptr[linear_idx];
1130 float weight_o = weight_base_ptr[linear_idx];
1131 if (weight_o <= weight_threshold)
return;
1134 for (
index_t i = 0; i < 3; ++i) {
1136 GetLinearIdx(xv + (i == 0), yv + (i == 1),
1137 zv + (i == 2), workload_block_idx);
1138 if (linear_idx_i < 0)
continue;
1140 float tsdf_i = tsdf_base_ptr[linear_idx_i];
1141 float weight_i = weight_base_ptr[linear_idx_i];
1142 if (weight_i > weight_threshold &&
1143 tsdf_i * tsdf_o < 0) {
1149 #if defined(__CUDACC__)
1153 valid_size = (*count_ptr).load();
1158 if (
points.GetLength() == 0) {
1172 if (color_base_ptr) {
1182 nb_block_masks_indexer,
1183 nb_block_indices_indexer);
1188 index_t curr_block_idx,
float* n) {
1189 return DeviceGetNormal<tsdf_t>(
1190 tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution,
1191 nb_block_masks_indexer, nb_block_indices_indexer);
1195 index_t workload_block_idx = workload_idx / resolution3;
1196 index_t block_idx = indices_ptr[workload_block_idx];
1197 index_t voxel_idx = workload_idx % resolution3;
1202 block_keys_indexer.GetDataPtr<
index_t>(block_idx);
1203 index_t xb = block_key_ptr[0];
1204 index_t yb = block_key_ptr[1];
1205 index_t zb = block_key_ptr[2];
1209 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1211 index_t linear_idx = block_idx * resolution3 + voxel_idx;
1212 float tsdf_o = tsdf_base_ptr[linear_idx];
1213 float weight_o = weight_base_ptr[linear_idx];
1214 if (weight_o <= weight_threshold)
return;
1216 float no[3] = {0}, ne[3] = {0};
1219 GetNormal(xv, yv, zv, workload_block_idx, no);
1221 index_t x = xb * resolution + xv;
1222 index_t y = yb * resolution + yv;
1223 index_t z = zb * resolution + zv;
1226 for (
index_t i = 0; i < 3; ++i) {
1228 GetLinearIdx(xv + (i == 0), yv + (i == 1), zv + (i == 2),
1229 workload_block_idx);
1230 if (linear_idx_i < 0)
continue;
1232 float tsdf_i = tsdf_base_ptr[linear_idx_i];
1233 float weight_i = weight_base_ptr[linear_idx_i];
1234 if (weight_i > weight_threshold && tsdf_i * tsdf_o < 0) {
1235 float ratio = (0 - tsdf_o) / (tsdf_i - tsdf_o);
1238 if (idx >= valid_size) {
1239 printf(
"Point cloud size larger than "
1240 "estimated, please increase the "
1245 float* point_ptr = point_indexer.
GetDataPtr<
float>(idx);
1246 point_ptr[0] = voxel_size * (x + ratio * int(i == 0));
1247 point_ptr[1] = voxel_size * (y + ratio * int(i == 1));
1248 point_ptr[2] = voxel_size * (z + ratio * int(i == 2));
1251 float* normal_ptr = normal_indexer.
GetDataPtr<
float>(idx);
1252 GetNormal(xv + (i == 0), yv + (i == 1), zv + (i == 2),
1253 workload_block_idx, ne);
1254 float nx = (1 - ratio) * no[0] + ratio * ne[0];
1255 float ny = (1 - ratio) * no[1] + ratio * ne[1];
1256 float nz = (1 - ratio) * no[2] + ratio * ne[2];
1257 float norm =
static_cast<float>(
1258 sqrt(nx * nx + ny * ny + nz * nz) + 1e-5);
1259 normal_ptr[0] = nx / norm;
1260 normal_ptr[1] = ny / norm;
1261 normal_ptr[2] = nz / norm;
1263 if (color_base_ptr) {
1264 float* color_ptr = color_indexer.
GetDataPtr<
float>(idx);
1265 const color_t* color_o_ptr =
1266 color_base_ptr + 3 * linear_idx;
1267 float r_o = color_o_ptr[0];
1268 float g_o = color_o_ptr[1];
1269 float b_o = color_o_ptr[2];
1271 const color_t* color_i_ptr =
1272 color_base_ptr + 3 * linear_idx_i;
1273 float r_i = color_i_ptr[0];
1274 float g_i = color_i_ptr[1];
1275 float b_i = color_i_ptr[2];
1277 color_ptr[0] = ((1 - ratio) * r_o + ratio * r_i) / 255.0f;
1278 color_ptr[1] = ((1 - ratio) * g_o + ratio * g_i) / 255.0f;
1279 color_ptr[2] = ((1 - ratio) * b_o + ratio * b_i) / 255.0f;
1285 #if defined(__CUDACC__)
1288 index_t total_count = (*count_ptr).load();
1292 valid_size = total_count;
1294 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__)
1299 template <
typename tsdf_t,
typename weight_t,
typename color_t>
1300 #if defined(__CUDACC__)
1301 void ExtractTriangleMeshCUDA
1317 float weight_threshold,
1321 index_t resolution = block_resolution;
1322 index_t resolution3 = resolution * resolution * resolution;
1325 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
1326 index_t n_blocks =
static_cast<index_t>(block_indices.GetLength());
1334 {n_blocks, resolution, resolution, resolution, 4},
core::Int32,
1336 }
catch (
const std::runtime_error&) {
1338 "Unable to allocate assistance mesh structure for Marching "
1339 "Cubes with {} active voxel blocks. Please consider using a "
1340 "larger voxel size (currently {}) for TSDF integration, or "
1341 "using tsdf_volume.cpu() to perform mesh extraction on CPU.",
1342 n_blocks, voxel_size);
1346 ArrayIndexer mesh_structure_indexer(mesh_structure, 4);
1347 ArrayIndexer nb_block_masks_indexer(nb_block_masks, 2);
1348 ArrayIndexer nb_block_indices_indexer(nb_block_indices, 2);
1351 const index_t* indices_ptr = block_indices.GetDataPtr<
index_t>();
1352 const index_t* inv_indices_ptr = inv_block_indices.GetDataPtr<
index_t>();
1354 if (!block_value_map.Contains(
"tsdf") ||
1355 !block_value_map.Contains(
"weight")) {
1357 "TSDF and/or weight not allocated in blocks, please implement "
1358 "customized integration.");
1360 const tsdf_t* tsdf_base_ptr =
1361 block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
1362 const weight_t* weight_base_ptr =
1363 block_value_map.at(
"weight").GetDataPtr<weight_t>();
1364 const color_t* color_base_ptr =
nullptr;
1365 if (block_value_map.Contains(
"color")) {
1366 color_base_ptr = block_value_map.at(
"color").GetDataPtr<color_t>();
1369 index_t n = n_blocks * resolution3;
1378 static_cast<index_t>(resolution),
1379 nb_block_masks_indexer,
1380 nb_block_indices_indexer);
1384 index_t workload_block_idx = widx / resolution3;
1385 index_t voxel_idx = widx % resolution3;
1389 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1394 for (
index_t i = 0; i < 8; ++i) {
1396 GetLinearIdx(xv + vtx_shifts[i][0], yv + vtx_shifts[i][1],
1397 zv + vtx_shifts[i][2], workload_block_idx);
1398 if (linear_idx_i < 0)
return;
1400 float tsdf_i = tsdf_base_ptr[linear_idx_i];
1401 float weight_i = weight_base_ptr[linear_idx_i];
1402 if (weight_i <= weight_threshold)
return;
1404 table_idx |= ((tsdf_i < 0) ? (1 << i) : 0);
1408 xv, yv, zv, workload_block_idx);
1409 mesh_struct_ptr[3] = table_idx;
1411 if (table_idx == 0 || table_idx == 255)
return;
1414 index_t edges_with_vertices = edge_table[table_idx];
1415 for (
index_t i = 0; i < 12; ++i) {
1416 if (edges_with_vertices & (1 << i)) {
1417 index_t xv_i = xv + edge_shifts[i][0];
1418 index_t yv_i = yv + edge_shifts[i][1];
1419 index_t zv_i = zv + edge_shifts[i][2];
1420 index_t edge_i = edge_shifts[i][3];
1422 index_t dxb = xv_i / resolution;
1423 index_t dyb = yv_i / resolution;
1424 index_t dzb = zv_i / resolution;
1426 index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
1430 workload_block_idx, nb_idx);
1433 xv_i - dxb * resolution,
1434 yv_i - dyb * resolution,
1435 zv_i - dzb * resolution,
1436 inv_indices_ptr[block_idx_i]);
1439 mesh_ptr_i[edge_i] = -1;
1445 #if defined(__CUDACC__)
1450 std::atomic<index_t> count_atomic(0);
1451 std::atomic<index_t>* count_ptr = &count_atomic;
1454 if (vertex_count < 0) {
1457 index_t workload_block_idx = widx / resolution3;
1458 index_t voxel_idx = widx % resolution3;
1462 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1467 xv, yv, zv, workload_block_idx);
1470 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
1471 mesh_struct_ptr[2] != -1) {
1476 for (
index_t e = 0; e < 3; ++e) {
1477 index_t vertex_idx = mesh_struct_ptr[e];
1478 if (vertex_idx != -1)
continue;
1484 #if defined(__CUDACC__)
1487 vertex_count = (*count_ptr).load();
1498 if (color_base_ptr) {
1506 #if defined(__CUDACC__)
1520 nb_block_masks_indexer,
1521 nb_block_indices_indexer);
1526 index_t curr_block_idx,
float* n) {
1527 return DeviceGetNormal<tsdf_t>(
1528 tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution,
1529 nb_block_masks_indexer, nb_block_indices_indexer);
1533 index_t workload_block_idx = widx / resolution3;
1534 index_t block_idx = indices_ptr[workload_block_idx];
1535 index_t voxel_idx = widx % resolution3;
1540 index_t xb = block_key_ptr[0];
1541 index_t yb = block_key_ptr[1];
1542 index_t zb = block_key_ptr[2];
1546 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1549 index_t x = xb * resolution + xv;
1550 index_t y = yb * resolution + yv;
1551 index_t z = zb * resolution + zv;
1555 xv, yv, zv, workload_block_idx);
1558 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
1559 mesh_struct_ptr[2] != -1) {
1564 index_t linear_idx = resolution3 * block_idx + voxel_idx;
1565 float tsdf_o = tsdf_base_ptr[linear_idx];
1567 float no[3] = {0}, ne[3] = {0};
1570 GetNormal(xv, yv, zv, workload_block_idx, no);
1573 for (
index_t e = 0; e < 3; ++e) {
1574 index_t vertex_idx = mesh_struct_ptr[e];
1575 if (vertex_idx != -1)
continue;
1578 GetLinearIdx(xv + (e == 0), yv + (e == 1), zv + (e == 2),
1579 workload_block_idx);
1581 "Internal error: GetVoxelAt returns nullptr.");
1582 float tsdf_e = tsdf_base_ptr[linear_idx_e];
1583 float ratio = (0 - tsdf_o) / (tsdf_e - tsdf_o);
1586 mesh_struct_ptr[e] = idx;
1588 float ratio_x = ratio *
index_t(e == 0);
1589 float ratio_y = ratio *
index_t(e == 1);
1590 float ratio_z = ratio *
index_t(e == 2);
1592 float* vertex_ptr = vertex_indexer.
GetDataPtr<
float>(idx);
1593 vertex_ptr[0] = voxel_size * (x + ratio_x);
1594 vertex_ptr[1] = voxel_size * (y + ratio_y);
1595 vertex_ptr[2] = voxel_size * (z + ratio_z);
1598 float* normal_ptr = normal_indexer.GetDataPtr<
float>(idx);
1599 GetNormal(xv + (e == 0), yv + (e == 1), zv + (e == 2),
1600 workload_block_idx, ne);
1601 float nx = (1 - ratio) * no[0] + ratio * ne[0];
1602 float ny = (1 - ratio) * no[1] + ratio * ne[1];
1603 float nz = (1 - ratio) * no[2] + ratio * ne[2];
1604 float norm =
static_cast<float>(sqrt(nx * nx + ny * ny + nz * nz) +
1606 normal_ptr[0] = nx / norm;
1607 normal_ptr[1] = ny / norm;
1608 normal_ptr[2] = nz / norm;
1610 if (color_base_ptr) {
1611 float* color_ptr = color_indexer.
GetDataPtr<
float>(idx);
1612 float r_o = color_base_ptr[linear_idx * 3 + 0];
1613 float g_o = color_base_ptr[linear_idx * 3 + 1];
1614 float b_o = color_base_ptr[linear_idx * 3 + 2];
1616 float r_e = color_base_ptr[linear_idx_e * 3 + 0];
1617 float g_e = color_base_ptr[linear_idx_e * 3 + 1];
1618 float b_e = color_base_ptr[linear_idx_e * 3 + 2];
1620 color_ptr[0] = ((1 - ratio) * r_o + ratio * r_e) / 255.0f;
1621 color_ptr[1] = ((1 - ratio) * g_o + ratio * g_e) / 255.0f;
1622 color_ptr[2] = ((1 - ratio) * b_o + ratio * b_e) / 255.0f;
1628 index_t triangle_count = vertex_count * 3;
1632 #if defined(__CUDACC__)
1640 index_t workload_block_idx = widx / resolution3;
1641 index_t voxel_idx = widx % resolution3;
1645 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1649 xv, yv, zv, workload_block_idx);
1651 index_t table_idx = mesh_struct_ptr[3];
1652 if (tri_count[table_idx] == 0)
return;
1654 for (
index_t tri = 0; tri < 16; tri += 3) {
1655 if (tri_table[table_idx][tri] == -1)
return;
1659 for (
index_t vertex = 0; vertex < 3; ++vertex) {
1660 index_t edge = tri_table[table_idx][tri + vertex];
1662 index_t xv_i = xv + edge_shifts[edge][0];
1663 index_t yv_i = yv + edge_shifts[edge][1];
1664 index_t zv_i = zv + edge_shifts[edge][2];
1665 index_t edge_i = edge_shifts[edge][3];
1667 index_t dxb = xv_i / resolution;
1668 index_t dyb = yv_i / resolution;
1669 index_t dzb = zv_i / resolution;
1671 index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
1675 workload_block_idx, nb_idx);
1678 xv_i - dxb * resolution,
1679 yv_i - dyb * resolution,
1680 zv_i - dzb * resolution,
1681 inv_indices_ptr[block_idx_i]);
1684 triangle_indexer.GetDataPtr<
index_t>(tri_idx);
1685 triangle_ptr[2 - vertex] = mesh_struct_ptr_i[edge_i];
1690 #if defined(__CUDACC__)
1693 triangle_count = (*count_ptr).load();
1696 triangles = triangles.Slice(0, 0, triangle_count);
#define CLOUDVIEWER_DEVICE
#define OPEN3D_ATOMIC_ADD(X, Y)
CLOUDVIEWER_HOST_DEVICE int Sign(int x)
#define CLOUDVIEWER_ASSERT(...)
static const Dtype Float64
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
CLOUDVIEWER_HOST_DEVICE void * GetDataPtr() const
CLOUDVIEWER_HOST_DEVICE bool InBoundary(float x, float y) const
__host__ __device__ float2 floorf(float2 v)
void ParallelFor(const Device &device, int64_t n, const func_t &func)
CLOUDVIEWER_DEVICE void DeviceGetNormal(const tsdf_t *tsdf_base_ptr, index_t xo, index_t yo, index_t zo, index_t curr_block_idx, float *n, index_t resolution, const ArrayIndexer &nb_block_masks_indexer, const ArrayIndexer &nb_block_indices_indexer)
TArrayIndexer< index_t > ArrayIndexer
void EstimateRangeCPU(const core::Tensor &block_keys, core::Tensor &range_minmax_map, const core::Tensor &intrinsics, const core::Tensor &extrinsics, int h, int w, int down_factor, int64_t block_resolution, float voxel_size, float depth_min, float depth_max, core::Tensor &fragment_buffer)
void RayCastCPU(std::shared_ptr< core::HashMap > &hashmap, const TensorMap &block_value_map, const core::Tensor &range_map, TensorMap &renderings_map, const core::Tensor &intrinsic, const core::Tensor &extrinsic, index_t h, index_t w, index_t block_resolution, float voxel_size, float depth_scale, float depth_min, float depth_max, float weight_threshold, float trunc_voxel_multiplier, int range_map_down_factor)
void ExtractPointCloudCPU(const core::Tensor &block_indices, const core::Tensor &nb_block_indices, const core::Tensor &nb_block_masks, const core::Tensor &block_keys, const TensorMap &block_value_map, core::Tensor &points, core::Tensor &normals, core::Tensor &colors, index_t block_resolution, float voxel_size, float weight_threshold, index_t &valid_size)
void ExtractTriangleMeshCPU(const core::Tensor &block_indices, const core::Tensor &inv_block_indices, const core::Tensor &nb_block_indices, const core::Tensor &nb_block_masks, const core::Tensor &block_keys, const TensorMap &block_value_map, core::Tensor &vertices, core::Tensor &triangles, core::Tensor &vertex_normals, core::Tensor &vertex_colors, index_t block_resolution, float voxel_size, float weight_threshold, index_t &vertex_count)
void IntegrateCPU(const core::Tensor &depth, const core::Tensor &color, const core::Tensor &block_indices, const core::Tensor &block_keys, TensorMap &block_value_map, const core::Tensor &depth_intrinsic, const core::Tensor &color_intrinsic, const core::Tensor &extrinsic, index_t resolution, float voxel_size, float sdf_trunc, float depth_scale, float depth_max)
void GetVoxelCoordinatesAndFlattenedIndicesCPU(const core::Tensor &buf_indices, const core::Tensor &block_keys, core::Tensor &voxel_coords, core::Tensor &flattened_indices, index_t block_resolution, float voxel_size)
CLOUDVIEWER_DEVICE index_t DeviceGetLinearIdx(index_t xo, index_t yo, index_t zo, index_t curr_block_idx, index_t resolution, const ArrayIndexer &nb_block_masks_indexer, const ArrayIndexer &nb_block_indices_indexer)
core::Tensor InverseTransformation(const core::Tensor &T)
TODO(wei): find a proper place for such functionalities.
Generic file read and write utility for python interface.
index_t CLOUDVIEWER_DEVICE Check(index_t xin, index_t yin, index_t zin)
void CLOUDVIEWER_DEVICE Update(index_t xin, index_t yin, index_t zin, index_t block_idx_in)