ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
VoxelBlockGridCUDA.cu
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <Logging.h>
9 
10 #include "cloudViewer/core/Dispatch.h"
11 #include "cloudViewer/core/Dtype.h"
12 #include "cloudViewer/core/MemoryManager.h"
13 #include "cloudViewer/core/ParallelFor.h"
14 #include "cloudViewer/core/SizeVector.h"
15 #include "cloudViewer/core/Tensor.h"
16 #include "cloudViewer/core/hashmap/CUDA/StdGPUHashBackend.h"
17 #include "cloudViewer/core/hashmap/DeviceHashBackend.h"
18 #include "cloudViewer/core/hashmap/Dispatch.h"
19 #include "cloudViewer/core/hashmap/HashMap.h"
20 #include "cloudViewer/t/geometry/kernel/GeometryIndexer.h"
21 #include "cloudViewer/t/geometry/kernel/GeometryMacros.h"
22 #include "cloudViewer/t/geometry/kernel/VoxelBlockGrid.h"
23 #include "cloudViewer/t/geometry/kernel/VoxelBlockGridImpl.h"
24 
25 namespace cloudViewer {
26 namespace t {
27 namespace geometry {
28 namespace kernel {
29 namespace voxel_grid {
30 
31 struct Coord3i {
32  CLOUDVIEWER_HOST_DEVICE Coord3i(index_t x, index_t y, index_t z)
33  : x_(x), y_(y), z_(z) {}
34  CLOUDVIEWER_HOST_DEVICE bool operator==(const Coord3i &other) const {
35  return x_ == other.x_ && y_ == other.y_ && z_ == other.z_;
36  }
37 
38  index_t x_;
39  index_t y_;
40  index_t z_;
41 };
42 
43 void PointCloudTouchCUDA(std::shared_ptr<core::HashMap> &hashmap,
44  const core::Tensor &points,
45  core::Tensor &voxel_block_coords,
46  index_t voxel_grid_resolution,
47  float voxel_size,
48  float sdf_trunc) {
49  index_t resolution = voxel_grid_resolution;
50  float block_size = voxel_size * resolution;
51 
52  index_t n = points.GetLength();
53  const float *pcd_ptr = static_cast<const float *>(points.GetDataPtr());
54 
55  core::Device device = points.GetDevice();
56  core::Tensor block_coordi({8 * n, 3}, core::Int32, device);
57  index_t *block_coordi_ptr =
58  static_cast<index_t *>(block_coordi.GetDataPtr());
59  core::Tensor count(std::vector<index_t>{0}, {}, core::Int32, device);
60  index_t *count_ptr = static_cast<index_t *>(count.GetDataPtr());
61 
62  core::ParallelFor(hashmap->GetDevice(), n,
63  [=] CLOUDVIEWER_DEVICE(index_t workload_idx) {
64  float x = pcd_ptr[3 * workload_idx + 0];
65  float y = pcd_ptr[3 * workload_idx + 1];
66  float z = pcd_ptr[3 * workload_idx + 2];
67 
68  index_t xb_lo = static_cast<index_t>(
69  floorf((x - sdf_trunc) / block_size));
70  index_t xb_hi = static_cast<index_t>(
71  floorf((x + sdf_trunc) / block_size));
72  index_t yb_lo = static_cast<index_t>(
73  floorf((y - sdf_trunc) / block_size));
74  index_t yb_hi = static_cast<index_t>(
75  floorf((y + sdf_trunc) / block_size));
76  index_t zb_lo = static_cast<index_t>(
77  floorf((z - sdf_trunc) / block_size));
78  index_t zb_hi = static_cast<index_t>(
79  floorf((z + sdf_trunc) / block_size));
80 
81  for (index_t xb = xb_lo; xb <= xb_hi; ++xb) {
82  for (index_t yb = yb_lo; yb <= yb_hi; ++yb) {
83  for (index_t zb = zb_lo; zb <= zb_hi; ++zb) {
84  index_t idx = atomicAdd(count_ptr, 1);
85  block_coordi_ptr[3 * idx + 0] = xb;
86  block_coordi_ptr[3 * idx + 1] = yb;
87  block_coordi_ptr[3 * idx + 2] = zb;
88  }
89  }
90  }
91  });
92 
93  index_t total_block_count = count.Item<index_t>();
94  if (total_block_count == 0) {
95  utility::LogError(
96  "No block is touched in TSDF volume, abort integration. Please "
97  "check specified parameters, especially depth_scale and "
98  "voxel_size");
99  }
100  block_coordi = block_coordi.Slice(0, 0, total_block_count);
101  core::Tensor block_buf_indices, block_masks;
102  hashmap->Activate(block_coordi.Slice(0, 0, count.Item<index_t>()),
103  block_buf_indices, block_masks);
104  voxel_block_coords = block_coordi.IndexGet({block_masks});
105 }
106 
107 void DepthTouchCUDA(std::shared_ptr<core::HashMap> &hashmap,
108  const core::Tensor &depth,
109  const core::Tensor &intrinsic,
110  const core::Tensor &extrinsic,
111  core::Tensor &voxel_block_coords,
112  index_t voxel_grid_resolution,
113  float voxel_size,
114  float sdf_trunc,
115  float depth_scale,
116  float depth_max,
117  index_t stride) {
118  core::Device device = depth.GetDevice();
119  NDArrayIndexer depth_indexer(depth, 2);
120  core::Tensor pose = t::geometry::InverseTransformation(extrinsic);
121  TransformIndexer ti(intrinsic, pose, 1.0f);
122 
123  // Output
124  index_t rows_strided = depth_indexer.GetShape(0) / stride;
125  index_t cols_strided = depth_indexer.GetShape(1) / stride;
126  index_t n = rows_strided * cols_strided;
127 
128  const index_t step_size = 3;
129  const index_t est_multipler_factor = (step_size + 1);
130 
131  static core::Tensor block_coordi;
132  if (block_coordi.GetLength() != est_multipler_factor * n) {
133  block_coordi = core::Tensor({est_multipler_factor * n, 3},
134  core::Dtype::Int32, device);
135  }
136 
137  // Counter
138  core::Tensor count(std::vector<index_t>{0}, {1}, core::Dtype::Int32,
139  device);
140  index_t *count_ptr = count.GetDataPtr<index_t>();
141  index_t *block_coordi_ptr = block_coordi.GetDataPtr<index_t>();
142 
143  index_t resolution = voxel_grid_resolution;
144  float block_size = voxel_size * resolution;
145  DISPATCH_DTYPE_TO_TEMPLATE(depth.GetDtype(), [&]() {
146  core::ParallelFor(
147  device, n, [=] CLOUDVIEWER_DEVICE(index_t workload_idx) {
148  index_t y = (workload_idx / cols_strided) * stride;
149  index_t x = (workload_idx % cols_strided) * stride;
150 
151  float d = *depth_indexer.GetDataPtr<scalar_t>(x, y) /
152  depth_scale;
153  if (d > 0 && d < depth_max) {
154  float x_c = 0, y_c = 0, z_c = 0;
155  ti.Unproject(static_cast<float>(x),
156  static_cast<float>(y), 1.0, &x_c, &y_c,
157  &z_c);
158  float x_g = 0, y_g = 0, z_g = 0;
159  ti.RigidTransform(x_c, y_c, z_c, &x_g, &y_g, &z_g);
160 
161  // Origin
162  float x_o = 0, y_o = 0, z_o = 0;
163  ti.GetCameraPosition(&x_o, &y_o, &z_o);
164 
165  // Direction
166  float x_d = x_g - x_o;
167  float y_d = y_g - y_o;
168  float z_d = z_g - z_o;
169 
170  const float t_min = max(d - sdf_trunc, 0.0);
171  const float t_max = min(d + sdf_trunc, depth_max);
172  const float t_step = (t_max - t_min) / step_size;
173 
174  float t = t_min;
175  index_t idx =
176  OPEN3D_ATOMIC_ADD(count_ptr, (step_size + 1));
177  for (index_t step = 0; step <= step_size; ++step) {
178  index_t offset = (step + idx) * 3;
179 
180  index_t xb = static_cast<index_t>(
181  floorf((x_o + t * x_d) / block_size));
182  index_t yb = static_cast<index_t>(
183  floorf((y_o + t * y_d) / block_size));
184  index_t zb = static_cast<index_t>(
185  floorf((z_o + t * z_d) / block_size));
186 
187  block_coordi_ptr[offset + 0] = xb;
188  block_coordi_ptr[offset + 1] = yb;
189  block_coordi_ptr[offset + 2] = zb;
190 
191  t += t_step;
192  }
193  }
194  });
195  });
196 
197  index_t total_block_count = static_cast<index_t>(count[0].Item<index_t>());
198  if (total_block_count == 0) {
199  utility::LogError(
200  "No block is touched in TSDF volume, abort integration. Please "
201  "check specified parameters, especially depth_scale and "
202  "voxel_size");
203  }
204 
205  total_block_count = std::min(total_block_count,
206  static_cast<index_t>(hashmap->GetCapacity()));
207  block_coordi = block_coordi.Slice(0, 0, total_block_count);
208  core::Tensor block_addrs, block_masks;
209  hashmap->Activate(block_coordi, block_addrs, block_masks);
210 
211  // Customized IndexGet (generic version too slow)
212  voxel_block_coords =
213  core::Tensor({hashmap->Size(), 3}, core::Int32, device);
214  index_t *voxel_block_coord_ptr = voxel_block_coords.GetDataPtr<index_t>();
215  bool *block_masks_ptr = block_masks.GetDataPtr<bool>();
216  count[0] = 0;
217  core::ParallelFor(device, total_block_count,
218  [=] CLOUDVIEWER_DEVICE(index_t workload_idx) {
219  if (block_masks_ptr[workload_idx]) {
220  index_t idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
221  index_t offset_lhs = 3 * idx;
222  index_t offset_rhs = 3 * workload_idx;
223  voxel_block_coord_ptr[offset_lhs + 0] =
224  block_coordi_ptr[offset_rhs + 0];
225  voxel_block_coord_ptr[offset_lhs + 1] =
226  block_coordi_ptr[offset_rhs + 1];
227  voxel_block_coord_ptr[offset_lhs + 2] =
228  block_coordi_ptr[offset_rhs + 2];
229  }
230  });
231  CLOUDVIEWER_CUDA_CHECK(cudaDeviceSynchronize());
232 }
233 
234 #define FN_ARGUMENTS \
235  const core::Tensor &depth, const core::Tensor &color, \
236  const core::Tensor &indices, const core::Tensor &block_keys, \
237  TensorMap &block_values, const core::Tensor &depth_intrinsic, \
238  const core::Tensor &color_intrinsic, \
239  const core::Tensor &extrinsic, index_t resolution, \
240  float voxel_size, float sdf_trunc, float depth_scale, \
241  float depth_max
242 
243 template void IntegrateCUDA<uint16_t, uint8_t, float, uint16_t, uint16_t>(
244  FN_ARGUMENTS);
245 template void IntegrateCUDA<uint16_t, uint8_t, float, float, float>(
246  FN_ARGUMENTS);
247 template void IntegrateCUDA<float, float, float, uint16_t, uint16_t>(
248  FN_ARGUMENTS);
249 template void IntegrateCUDA<float, float, float, float, float>(FN_ARGUMENTS);
250 
251 #undef FN_ARGUMENTS
252 
253 #define FN_ARGUMENTS \
254  std::shared_ptr<core::HashMap> &hashmap, const TensorMap &block_value_map, \
255  const core::Tensor &range_map, TensorMap &renderings_map, \
256  const core::Tensor &intrinsic, const core::Tensor &extrinsic, \
257  index_t h, index_t w, index_t block_resolution, float voxel_size, \
258  float depth_scale, float depth_min, float depth_max, \
259  float weight_threshold, float trunc_voxel_multiplier, \
260  int range_map_down_factor
261 
262 template void RayCastCUDA<float, uint16_t, uint16_t>(FN_ARGUMENTS);
263 template void RayCastCUDA<float, float, float>(FN_ARGUMENTS);
264 
265 #undef FN_ARGUMENTS
266 
267 #define FN_ARGUMENTS \
268  const core::Tensor &block_indices, const core::Tensor &nb_block_indices, \
269  const core::Tensor &nb_block_masks, \
270  const core::Tensor &block_keys, const TensorMap &block_value_map, \
271  core::Tensor &points, core::Tensor &normals, core::Tensor &colors, \
272  index_t block_resolution, float voxel_size, \
273  float weight_threshold, index_t &valid_size
274 
275 template void ExtractPointCloudCUDA<float, uint16_t, uint16_t>(FN_ARGUMENTS);
276 template void ExtractPointCloudCUDA<float, float, float>(FN_ARGUMENTS);
277 
278 #undef FN_ARGUMENTS
279 
280 void ExtractTriangleMeshCUDA(const core::Tensor &block_indices,
281  const core::Tensor &inv_block_indices,
282  const core::Tensor &nb_block_indices,
283  const core::Tensor &nb_block_masks,
284  const core::Tensor &block_keys,
285  const std::vector<core::Tensor> &block_values,
286  core::Tensor &vertices,
287  core::Tensor &triangles,
288  core::Tensor &vertex_normals,
289  core::Tensor &vertex_colors,
290  index_t block_resolution,
291  float voxel_size,
292  float weight_threshold,
293  index_t &vertex_count);
294 
295 #define FN_ARGUMENTS \
296  const core::Tensor &block_indices, const core::Tensor &inv_block_indices, \
297  const core::Tensor &nb_block_indices, \
298  const core::Tensor &nb_block_masks, \
299  const core::Tensor &block_keys, const TensorMap &block_value_map, \
300  core::Tensor &vertices, core::Tensor &triangles, \
301  core::Tensor &vertex_normals, core::Tensor &vertex_colors, \
302  index_t block_resolution, float voxel_size, \
303  float weight_threshold, index_t &vertex_count
304 
305 template void ExtractTriangleMeshCUDA<float, uint16_t, uint16_t>(FN_ARGUMENTS);
306 template void ExtractTriangleMeshCUDA<float, float, float>(FN_ARGUMENTS);
307 
308 #undef FN_ARGUMENTS
309 
310 } // namespace voxel_grid
311 } // namespace kernel
312 } // namespace geometry
313 } // namespace t
314 } // namespace cloudViewer