1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
10 #include "cloudViewer/core/Dispatch.h"
11 #include "cloudViewer/core/Dtype.h"
12 #include "cloudViewer/core/MemoryManager.h"
13 #include "cloudViewer/core/ParallelFor.h"
14 #include "cloudViewer/core/SizeVector.h"
15 #include "cloudViewer/core/Tensor.h"
16 #include "cloudViewer/core/hashmap/CUDA/StdGPUHashBackend.h"
17 #include "cloudViewer/core/hashmap/DeviceHashBackend.h"
18 #include "cloudViewer/core/hashmap/Dispatch.h"
19 #include "cloudViewer/core/hashmap/HashMap.h"
20 #include "cloudViewer/t/geometry/kernel/GeometryIndexer.h"
21 #include "cloudViewer/t/geometry/kernel/GeometryMacros.h"
22 #include "cloudViewer/t/geometry/kernel/VoxelBlockGrid.h"
23 #include "cloudViewer/t/geometry/kernel/VoxelBlockGridImpl.h"
25 namespace cloudViewer {
29 namespace voxel_grid {
32 CLOUDVIEWER_HOST_DEVICE Coord3i(index_t x, index_t y, index_t z)
33 : x_(x), y_(y), z_(z) {}
34 CLOUDVIEWER_HOST_DEVICE bool operator==(const Coord3i &other) const {
35 return x_ == other.x_ && y_ == other.y_ && z_ == other.z_;
43 void PointCloudTouchCUDA(std::shared_ptr<core::HashMap> &hashmap,
44 const core::Tensor &points,
45 core::Tensor &voxel_block_coords,
46 index_t voxel_grid_resolution,
49 index_t resolution = voxel_grid_resolution;
50 float block_size = voxel_size * resolution;
52 index_t n = points.GetLength();
53 const float *pcd_ptr = static_cast<const float *>(points.GetDataPtr());
55 core::Device device = points.GetDevice();
56 core::Tensor block_coordi({8 * n, 3}, core::Int32, device);
57 index_t *block_coordi_ptr =
58 static_cast<index_t *>(block_coordi.GetDataPtr());
59 core::Tensor count(std::vector<index_t>{0}, {}, core::Int32, device);
60 index_t *count_ptr = static_cast<index_t *>(count.GetDataPtr());
62 core::ParallelFor(hashmap->GetDevice(), n,
63 [=] CLOUDVIEWER_DEVICE(index_t workload_idx) {
64 float x = pcd_ptr[3 * workload_idx + 0];
65 float y = pcd_ptr[3 * workload_idx + 1];
66 float z = pcd_ptr[3 * workload_idx + 2];
68 index_t xb_lo = static_cast<index_t>(
69 floorf((x - sdf_trunc) / block_size));
70 index_t xb_hi = static_cast<index_t>(
71 floorf((x + sdf_trunc) / block_size));
72 index_t yb_lo = static_cast<index_t>(
73 floorf((y - sdf_trunc) / block_size));
74 index_t yb_hi = static_cast<index_t>(
75 floorf((y + sdf_trunc) / block_size));
76 index_t zb_lo = static_cast<index_t>(
77 floorf((z - sdf_trunc) / block_size));
78 index_t zb_hi = static_cast<index_t>(
79 floorf((z + sdf_trunc) / block_size));
81 for (index_t xb = xb_lo; xb <= xb_hi; ++xb) {
82 for (index_t yb = yb_lo; yb <= yb_hi; ++yb) {
83 for (index_t zb = zb_lo; zb <= zb_hi; ++zb) {
84 index_t idx = atomicAdd(count_ptr, 1);
85 block_coordi_ptr[3 * idx + 0] = xb;
86 block_coordi_ptr[3 * idx + 1] = yb;
87 block_coordi_ptr[3 * idx + 2] = zb;
93 index_t total_block_count = count.Item<index_t>();
94 if (total_block_count == 0) {
96 "No block is touched in TSDF volume, abort integration. Please "
97 "check specified parameters, especially depth_scale and "
100 block_coordi = block_coordi.Slice(0, 0, total_block_count);
101 core::Tensor block_buf_indices, block_masks;
102 hashmap->Activate(block_coordi.Slice(0, 0, count.Item<index_t>()),
103 block_buf_indices, block_masks);
104 voxel_block_coords = block_coordi.IndexGet({block_masks});
107 void DepthTouchCUDA(std::shared_ptr<core::HashMap> &hashmap,
108 const core::Tensor &depth,
109 const core::Tensor &intrinsic,
110 const core::Tensor &extrinsic,
111 core::Tensor &voxel_block_coords,
112 index_t voxel_grid_resolution,
118 core::Device device = depth.GetDevice();
119 NDArrayIndexer depth_indexer(depth, 2);
120 core::Tensor pose = t::geometry::InverseTransformation(extrinsic);
121 TransformIndexer ti(intrinsic, pose, 1.0f);
124 index_t rows_strided = depth_indexer.GetShape(0) / stride;
125 index_t cols_strided = depth_indexer.GetShape(1) / stride;
126 index_t n = rows_strided * cols_strided;
128 const index_t step_size = 3;
129 const index_t est_multipler_factor = (step_size + 1);
131 static core::Tensor block_coordi;
132 if (block_coordi.GetLength() != est_multipler_factor * n) {
133 block_coordi = core::Tensor({est_multipler_factor * n, 3},
134 core::Dtype::Int32, device);
138 core::Tensor count(std::vector<index_t>{0}, {1}, core::Dtype::Int32,
140 index_t *count_ptr = count.GetDataPtr<index_t>();
141 index_t *block_coordi_ptr = block_coordi.GetDataPtr<index_t>();
143 index_t resolution = voxel_grid_resolution;
144 float block_size = voxel_size * resolution;
145 DISPATCH_DTYPE_TO_TEMPLATE(depth.GetDtype(), [&]() {
147 device, n, [=] CLOUDVIEWER_DEVICE(index_t workload_idx) {
148 index_t y = (workload_idx / cols_strided) * stride;
149 index_t x = (workload_idx % cols_strided) * stride;
151 float d = *depth_indexer.GetDataPtr<scalar_t>(x, y) /
153 if (d > 0 && d < depth_max) {
154 float x_c = 0, y_c = 0, z_c = 0;
155 ti.Unproject(static_cast<float>(x),
156 static_cast<float>(y), 1.0, &x_c, &y_c,
158 float x_g = 0, y_g = 0, z_g = 0;
159 ti.RigidTransform(x_c, y_c, z_c, &x_g, &y_g, &z_g);
162 float x_o = 0, y_o = 0, z_o = 0;
163 ti.GetCameraPosition(&x_o, &y_o, &z_o);
166 float x_d = x_g - x_o;
167 float y_d = y_g - y_o;
168 float z_d = z_g - z_o;
170 const float t_min = max(d - sdf_trunc, 0.0);
171 const float t_max = min(d + sdf_trunc, depth_max);
172 const float t_step = (t_max - t_min) / step_size;
176 OPEN3D_ATOMIC_ADD(count_ptr, (step_size + 1));
177 for (index_t step = 0; step <= step_size; ++step) {
178 index_t offset = (step + idx) * 3;
180 index_t xb = static_cast<index_t>(
181 floorf((x_o + t * x_d) / block_size));
182 index_t yb = static_cast<index_t>(
183 floorf((y_o + t * y_d) / block_size));
184 index_t zb = static_cast<index_t>(
185 floorf((z_o + t * z_d) / block_size));
187 block_coordi_ptr[offset + 0] = xb;
188 block_coordi_ptr[offset + 1] = yb;
189 block_coordi_ptr[offset + 2] = zb;
197 index_t total_block_count = static_cast<index_t>(count[0].Item<index_t>());
198 if (total_block_count == 0) {
200 "No block is touched in TSDF volume, abort integration. Please "
201 "check specified parameters, especially depth_scale and "
205 total_block_count = std::min(total_block_count,
206 static_cast<index_t>(hashmap->GetCapacity()));
207 block_coordi = block_coordi.Slice(0, 0, total_block_count);
208 core::Tensor block_addrs, block_masks;
209 hashmap->Activate(block_coordi, block_addrs, block_masks);
211 // Customized IndexGet (generic version too slow)
213 core::Tensor({hashmap->Size(), 3}, core::Int32, device);
214 index_t *voxel_block_coord_ptr = voxel_block_coords.GetDataPtr<index_t>();
215 bool *block_masks_ptr = block_masks.GetDataPtr<bool>();
217 core::ParallelFor(device, total_block_count,
218 [=] CLOUDVIEWER_DEVICE(index_t workload_idx) {
219 if (block_masks_ptr[workload_idx]) {
220 index_t idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
221 index_t offset_lhs = 3 * idx;
222 index_t offset_rhs = 3 * workload_idx;
223 voxel_block_coord_ptr[offset_lhs + 0] =
224 block_coordi_ptr[offset_rhs + 0];
225 voxel_block_coord_ptr[offset_lhs + 1] =
226 block_coordi_ptr[offset_rhs + 1];
227 voxel_block_coord_ptr[offset_lhs + 2] =
228 block_coordi_ptr[offset_rhs + 2];
231 CLOUDVIEWER_CUDA_CHECK(cudaDeviceSynchronize());
234 #define FN_ARGUMENTS \
235 const core::Tensor &depth, const core::Tensor &color, \
236 const core::Tensor &indices, const core::Tensor &block_keys, \
237 TensorMap &block_values, const core::Tensor &depth_intrinsic, \
238 const core::Tensor &color_intrinsic, \
239 const core::Tensor &extrinsic, index_t resolution, \
240 float voxel_size, float sdf_trunc, float depth_scale, \
243 template void IntegrateCUDA<uint16_t, uint8_t, float, uint16_t, uint16_t>(
245 template void IntegrateCUDA<uint16_t, uint8_t, float, float, float>(
247 template void IntegrateCUDA<float, float, float, uint16_t, uint16_t>(
249 template void IntegrateCUDA<float, float, float, float, float>(FN_ARGUMENTS);
253 #define FN_ARGUMENTS \
254 std::shared_ptr<core::HashMap> &hashmap, const TensorMap &block_value_map, \
255 const core::Tensor &range_map, TensorMap &renderings_map, \
256 const core::Tensor &intrinsic, const core::Tensor &extrinsic, \
257 index_t h, index_t w, index_t block_resolution, float voxel_size, \
258 float depth_scale, float depth_min, float depth_max, \
259 float weight_threshold, float trunc_voxel_multiplier, \
260 int range_map_down_factor
262 template void RayCastCUDA<float, uint16_t, uint16_t>(FN_ARGUMENTS);
263 template void RayCastCUDA<float, float, float>(FN_ARGUMENTS);
267 #define FN_ARGUMENTS \
268 const core::Tensor &block_indices, const core::Tensor &nb_block_indices, \
269 const core::Tensor &nb_block_masks, \
270 const core::Tensor &block_keys, const TensorMap &block_value_map, \
271 core::Tensor &points, core::Tensor &normals, core::Tensor &colors, \
272 index_t block_resolution, float voxel_size, \
273 float weight_threshold, index_t &valid_size
275 template void ExtractPointCloudCUDA<float, uint16_t, uint16_t>(FN_ARGUMENTS);
276 template void ExtractPointCloudCUDA<float, float, float>(FN_ARGUMENTS);
280 void ExtractTriangleMeshCUDA(const core::Tensor &block_indices,
281 const core::Tensor &inv_block_indices,
282 const core::Tensor &nb_block_indices,
283 const core::Tensor &nb_block_masks,
284 const core::Tensor &block_keys,
285 const std::vector<core::Tensor> &block_values,
286 core::Tensor &vertices,
287 core::Tensor &triangles,
288 core::Tensor &vertex_normals,
289 core::Tensor &vertex_colors,
290 index_t block_resolution,
292 float weight_threshold,
293 index_t &vertex_count);
295 #define FN_ARGUMENTS \
296 const core::Tensor &block_indices, const core::Tensor &inv_block_indices, \
297 const core::Tensor &nb_block_indices, \
298 const core::Tensor &nb_block_masks, \
299 const core::Tensor &block_keys, const TensorMap &block_value_map, \
300 core::Tensor &vertices, core::Tensor &triangles, \
301 core::Tensor &vertex_normals, core::Tensor &vertex_colors, \
302 index_t block_resolution, float voxel_size, \
303 float weight_threshold, index_t &vertex_count
305 template void ExtractTriangleMeshCUDA<float, uint16_t, uint16_t>(FN_ARGUMENTS);
306 template void ExtractTriangleMeshCUDA<float, float, float>(FN_ARGUMENTS);
310 } // namespace voxel_grid
311 } // namespace kernel
312 } // namespace geometry
314 } // namespace cloudViewer