ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
RaycastingScene.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #ifdef _MSC_VER
9 // embree header files in tutorials/common redefine some macros on win
10 #pragma warning(disable : 4005)
11 #endif
13 
14 #ifdef BUILD_SYCL_MODULE
15 #include <sycl/sycl.hpp>
16 #endif
17 // This header is in the embree src dir (embree/src/ext_embree/..).
18 #include <embree4/rtcore.h>
19 // Qt defines a macro named 'emit' which conflicts with oneTBB's profiling API.
20 // Undefine it before including TBB headers.
21 #ifdef emit
22 #undef emit
23 #endif
24 #include <Helper.h>
25 #include <Logging.h>
26 #include <tbb/parallel_for.h>
27 
28 #include <Eigen/Core>
29 #include <cstring>
30 #include <tuple>
31 #include <unsupported/Eigen/AlignedVector3>
32 #include <vector>
33 
35 
36 namespace callbacks {
37 
38 struct GeomPrimID {
39  uint32_t geomID;
40  uint32_t primID;
41  float ray_tfar;
42 };
43 
45  RTCRayQueryContext context;
48 };
49 
50 #ifdef BUILD_SYCL_MODULE
51 RTC_SYCL_INDIRECTLY_CALLABLE void CountIntersectionsFunc(
52  const RTCFilterFunctionNArguments* args) {
53 #else
54 void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) {
55 #endif
56  int* valid = args->valid;
58  reinterpret_cast<const CountIntersectionsContext*>(args->context);
59  struct RTCRayN* rayN = args->ray;
60  struct RTCHitN* hitN = args->hit;
61  const unsigned int N = args->N;
62 
63  // Avoid crashing when debug visualizations are used.
64  if (context == nullptr) return;
65 
66  GeomPrimID* previous_geom_prim_ID_tfar =
67  context->previous_geom_prim_ID_tfar;
68  int* intersections = context->intersections;
69 
70  // Iterate over all rays in ray packet.
71  for (unsigned int ui = 0; ui < N; ui += 1) {
72  // Calculate loop and execution mask
73  unsigned int vi = ui + 0;
74  if (vi >= N) continue;
75 
76  // Ignore inactive rays.
77  if (valid[vi] != -1) continue;
78 
79  // Read ray/hit from ray structure.
80  RTCRay ray = rtcGetRayFromRayN(rayN, N, ui);
81  RTCHit hit = rtcGetHitFromHitN(hitN, N, ui);
82 
83  unsigned int ray_id = ray.id;
84  GeomPrimID gpID = {hit.geomID, hit.primID, ray.tfar};
85  auto& prev_gpIDtfar = previous_geom_prim_ID_tfar[ray_id];
86  if (prev_gpIDtfar.geomID != hit.geomID ||
87  (prev_gpIDtfar.primID != hit.primID &&
88  prev_gpIDtfar.ray_tfar != ray.tfar)) {
89  ++(intersections[ray_id]);
90  previous_geom_prim_ID_tfar[ray_id] = gpID;
91  }
92  // Always ignore hit
93  valid[ui] = 0;
94  }
95 }
96 
98  RTCRayQueryContext context;
100  unsigned int* ray_ids;
101  unsigned int* geometry_ids;
102  unsigned int* primitive_ids;
104  float* t_hit;
105  int* cumsum;
106  unsigned int* track_intersections;
107 };
108 
109 #ifdef BUILD_SYCL_MODULE
110 RTC_SYCL_INDIRECTLY_CALLABLE void ListIntersectionsFunc(
111  const RTCFilterFunctionNArguments* args) {
112 #else
113 void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) {
114 #endif
115  int* valid = args->valid;
117  reinterpret_cast<const ListIntersectionsContext*>(args->context);
118  struct RTCRayN* rayN = args->ray;
119  struct RTCHitN* hitN = args->hit;
120  const unsigned int N = args->N;
121 
122  // Avoid crashing when debug visualizations are used.
123  if (context == nullptr) return;
124 
125  GeomPrimID* previous_geom_prim_ID_tfar =
126  context->previous_geom_prim_ID_tfar;
127  unsigned int* ray_ids = context->ray_ids;
128  unsigned int* geometry_ids = context->geometry_ids;
129  unsigned int* primitive_ids = context->primitive_ids;
130  float* primitive_uvs = context->primitive_uvs;
131  float* t_hit = context->t_hit;
132  int* cumsum = context->cumsum;
133  unsigned int* track_intersections = context->track_intersections;
134 
135  // Iterate over all rays in ray packet.
136  for (unsigned int ui = 0; ui < N; ui += 1) {
137  // Calculate loop and execution mask
138  unsigned int vi = ui + 0;
139  if (vi >= N) continue;
140 
141  // Ignore inactive rays.
142  if (valid[vi] != -1) continue;
143 
144  // Read ray/hit from ray structure.
145  RTCRay ray = rtcGetRayFromRayN(rayN, N, ui);
146  RTCHit hit = rtcGetHitFromHitN(hitN, N, ui);
147 
148  unsigned int ray_id = ray.id;
149  GeomPrimID gpID = {hit.geomID, hit.primID, ray.tfar};
150  auto& prev_gpIDtfar = previous_geom_prim_ID_tfar[ray_id];
151  if (prev_gpIDtfar.geomID != hit.geomID ||
152  (prev_gpIDtfar.primID != hit.primID &&
153  prev_gpIDtfar.ray_tfar != ray.tfar)) {
154  size_t idx = cumsum[ray_id] + track_intersections[ray_id];
155  ray_ids[idx] = ray_id;
156  geometry_ids[idx] = hit.geomID;
157  primitive_ids[idx] = hit.primID;
158  primitive_uvs[idx * 2 + 0] = hit.u;
159  primitive_uvs[idx * 2 + 1] = hit.v;
160  t_hit[idx] = ray.tfar;
161  previous_geom_prim_ID_tfar[ray_id] = gpID;
162  ++(track_intersections[ray_id]);
163  }
164  // Always ignore hit
165  valid[ui] = 0;
166  }
167 }
168 
169 } // namespace callbacks
170 
171 namespace {
172 
173 typedef Eigen::AlignedVector3<float> Vec3fa;
174 // Dont force alignment for Vec2f because we use it just for storing
175 typedef Eigen::Matrix<float, 2, 1, Eigen::DontAlign> Vec2f;
176 typedef Eigen::Vector3f Vec3f;
177 
178 // Error function called by embree.
179 void ErrorFunction(void* userPtr, enum RTCError error, const char* str) {
180  cloudViewer::utility::LogError("Embree error: {} {}",
181  rtcGetErrorString(error), str);
182 }
183 
184 // Checks the last dim, ensures that the number of dims is >= min_ndim, checks
185 // the device, and dtype.
186 template <class DTYPE>
187 void AssertTensorDtypeLastDimDeviceMinNDim(
188  const cloudViewer::core::Tensor& tensor,
189  const std::string& tensor_name,
190  int64_t last_dim,
191  const cloudViewer::core::Device& device,
192  int64_t min_ndim = 2) {
194  if (tensor.NumDims() < min_ndim) {
196  "{} Tensor ndim is {} but expected ndim >= {}", tensor_name,
197  tensor.NumDims(), min_ndim);
198  }
199  if (tensor.GetShape().back() != last_dim) {
201  "The last dimension of the {} Tensor must be {} but got "
202  "Tensor with shape {}",
203  tensor_name, last_dim, tensor.GetShape().ToString());
204  }
206  tensor, cloudViewer::core::Dtype::FromType<DTYPE>());
207 }
208 
209 // Adapted from common/math/closest_point.h
210 template <typename Vec3faType>
211 inline Vec3faType closestPointTriangle(Vec3faType const& p,
212  Vec3faType const& a,
213  Vec3faType const& b,
214  Vec3faType const& c,
215  float& tex_u,
216  float& tex_v) {
217  const Vec3faType ab = b - a;
218  const Vec3faType ac = c - a;
219  const Vec3faType ap = p - a;
220 
221  const float d1 = ab.dot(ap);
222  const float d2 = ac.dot(ap);
223  if (d1 <= 0.f && d2 <= 0.f) {
224  tex_u = 0;
225  tex_v = 0;
226  return a;
227  }
228 
229  const Vec3faType bp = p - b;
230  const float d3 = ab.dot(bp);
231  const float d4 = ac.dot(bp);
232  if (d3 >= 0.f && d4 <= d3) {
233  tex_u = 1;
234  tex_v = 0;
235  return b;
236  }
237 
238  const Vec3faType cp = p - c;
239  const float d5 = ab.dot(cp);
240  const float d6 = ac.dot(cp);
241  if (d6 >= 0.f && d5 <= d6) {
242  tex_u = 0;
243  tex_v = 1;
244  return c;
245  }
246 
247  const float vc = d1 * d4 - d3 * d2;
248  if (vc <= 0.f && d1 >= 0.f && d3 <= 0.f) {
249  const float v = d1 / (d1 - d3);
250  tex_u = v;
251  tex_v = 0;
252  return a + v * ab;
253  }
254 
255  const float vb = d5 * d2 - d1 * d6;
256  if (vb <= 0.f && d2 >= 0.f && d6 <= 0.f) {
257  const float v = d2 / (d2 - d6);
258  tex_u = 0;
259  tex_v = v;
260  return a + v * ac;
261  }
262 
263  const float va = d3 * d6 - d5 * d4;
264  if (va <= 0.f && (d4 - d3) >= 0.f && (d5 - d6) >= 0.f) {
265  const float v = (d4 - d3) / ((d4 - d3) + (d5 - d6));
266  tex_u = 1 - v;
267  tex_v = v;
268  return b + v * (c - b);
269  }
270 
271  const float denom = 1.f / (va + vb + vc);
272  const float v = vb * denom;
273  const float w = vc * denom;
274  tex_u = v;
275  tex_v = w;
276  return a + v * ab + w * ac;
277 }
278 
279 struct GeometryPtr {
280  RTCGeometryType geom_type;
281  const void* ptr1;
282  const void* ptr2;
283 };
284 
285 template <typename Vec3fType, typename Vec2fType>
286 struct ClosestPointResult {
287  ClosestPointResult()
288  : primID(RTC_INVALID_GEOMETRY_ID), geomID(RTC_INVALID_GEOMETRY_ID) {}
289 
290  Vec3fType p;
291  unsigned int primID;
292  unsigned int geomID;
293  Vec2fType uv;
294  Vec3fType n;
295  GeometryPtr* geometry_ptrs_ptr;
296 };
297 
298 // Code adapted from the embree closest_point tutorial.
299 template <typename Vec3fType, typename Vec3faType, typename Vec2fType>
300 bool ClosestPointFunc(RTCPointQueryFunctionArguments* args) {
301  assert(args->userPtr);
302  const unsigned int geomID = args->geomID;
303  const unsigned int primID = args->primID;
304 
305  // query position in world space
306  Vec3faType q(args->query->x, args->query->y, args->query->z);
307 
308  ClosestPointResult<Vec3fType, Vec2fType>* result =
309  static_cast<ClosestPointResult<Vec3fType, Vec2fType>*>(
310  args->userPtr);
311  const RTCGeometryType geom_type =
312  result->geometry_ptrs_ptr[geomID].geom_type;
313  const void* ptr1 = result->geometry_ptrs_ptr[geomID].ptr1;
314  const void* ptr2 = result->geometry_ptrs_ptr[geomID].ptr2;
315 
316  if (RTC_GEOMETRY_TYPE_TRIANGLE == geom_type) {
317  const float* vertex_positions = (const float*)ptr1;
318  const uint32_t* triangle_indices = (const uint32_t*)ptr2;
319 
320  Vec3faType v0(
321  vertex_positions[3 * triangle_indices[3 * primID + 0] + 0],
322  vertex_positions[3 * triangle_indices[3 * primID + 0] + 1],
323  vertex_positions[3 * triangle_indices[3 * primID + 0] + 2]);
324  Vec3faType v1(
325  vertex_positions[3 * triangle_indices[3 * primID + 1] + 0],
326  vertex_positions[3 * triangle_indices[3 * primID + 1] + 1],
327  vertex_positions[3 * triangle_indices[3 * primID + 1] + 2]);
328  Vec3faType v2(
329  vertex_positions[3 * triangle_indices[3 * primID + 2] + 0],
330  vertex_positions[3 * triangle_indices[3 * primID + 2] + 1],
331  vertex_positions[3 * triangle_indices[3 * primID + 2] + 2]);
332 
333  // Determine distance to closest point on triangle
334  float u, v;
335  const Vec3faType p =
336  closestPointTriangle<Vec3faType>(q, v0, v1, v2, u, v);
337  float d = (q - p).norm();
338 
339  // Store result in userPtr and update the query radius if we found a
340  // point closer to the query position. This is optional but allows for
341  // faster traversal (due to better culling).
342  if (d < args->query->radius) {
343  args->query->radius = d;
344  result->p = p;
345  result->primID = primID;
346  result->geomID = geomID;
347  Vec3faType e1 = v1 - v0;
348  Vec3faType e2 = v2 - v0;
349  result->uv = Vec2fType(u, v);
350  result->n = (e1.cross(e2)).normalized();
351  return true; // Return true to indicate that the query radius
352  // changed.
353  }
354  }
355  return false;
356 }
357 
358 } // namespace
359 
360 namespace cloudViewer {
361 namespace t {
362 namespace geometry {
363 
365  RTCScene scene_;
366  bool scene_committed_; // true if the scene has been committed.
367  RTCDevice device_;
368  // Vector for storing some information about the added geometry.
369  std::vector<GeometryPtr> geometry_ptrs_;
370  core::Device tensor_device_; // cpu or sycl
371 
373 
374  virtual ~Impl() = default;
375 
376  void CommitScene() {
377  if (!scene_committed_) {
378  if (devprop_join_commit) {
379  rtcJoinCommitScene(scene_);
380  } else {
381  rtcCommitScene(scene_);
382  }
383  scene_committed_ = true;
384  }
385  }
386 
387  virtual void CastRays(const float* const rays,
388  const size_t num_rays,
389  float* t_hit,
390  unsigned int* geometry_ids,
391  unsigned int* primitive_ids,
392  float* primitive_uvs,
393  float* primitive_normals,
394  const int nthreads,
395  const bool line_intersection) = 0;
396 
397  virtual void TestOcclusions(const float* const rays,
398  const size_t num_rays,
399  const float tnear,
400  const float tfar,
401  int8_t* occluded,
402  const int nthreads) = 0;
403 
404  virtual void CountIntersections(const float* const rays,
405  const size_t num_rays,
406  int* intersections,
407  const int nthreads) = 0;
408 
409  virtual void ListIntersections(const float* const rays,
410  const size_t num_rays,
411  const size_t num_intersections,
412  int* cumsum,
413  unsigned int* track_intersections,
414  unsigned int* ray_ids,
415  unsigned int* geometry_ids,
416  unsigned int* primitive_ids,
417  float* primitive_uvs,
418  float* t_hit,
419  const int nthreads) = 0;
420 
421  virtual void ComputeClosestPoints(const float* const query_points,
422  const size_t num_query_points,
423  float* closest_points,
424  unsigned int* geometry_ids,
425  unsigned int* primitive_ids,
426  float* primitive_uvs,
427  float* primitive_normals,
428  const int nthreads) = 0;
429 
430  virtual void ArraySum(int* data_ptr,
431  size_t num_elements,
432  size_t& result) = 0;
433 
434  virtual void ArrayPartialSum(int* input,
435  int* output,
436  size_t num_elements) = 0;
437 
438  virtual void CopyArray(int* src, uint32_t* dst, size_t num_elements) = 0;
439 };
440 
441 #ifdef BUILD_SYCL_MODULE
442 struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl {
443  // SYCL variables
444  sycl::queue queue_;
445  sycl::context context_;
446  sycl::device sycl_device_;
447 
448  callbacks::GeomPrimID* li_previous_geom_prim_ID_tfar = nullptr;
449  callbacks::GeomPrimID* ci_previous_geom_prim_ID_tfar = nullptr;
450 
451  ~SYCLImpl() {
452  if (li_previous_geom_prim_ID_tfar) {
453  sycl::free(li_previous_geom_prim_ID_tfar, queue_);
454  }
455  if (ci_previous_geom_prim_ID_tfar) {
456  sycl::free(ci_previous_geom_prim_ID_tfar, queue_);
457  }
458  }
459 
460  void InitializeDevice() {
461  try {
462  sycl_device_ = sycl::device(rtcSYCLDeviceSelector);
463  } catch (std::exception& e) {
464  utility::LogError("Caught exception creating sycl::device: {}",
465  e.what());
466  return;
467  }
468 
469  queue_ = sycl::queue(sycl_device_);
470  context_ = sycl::context(sycl_device_);
471 
472  device_ = rtcNewSYCLDevice(context_, "");
473  rtcSetDeviceSYCLDevice(device_, sycl_device_);
474 
475  if (!device_) {
476  utility::LogError("Error %d: cannot create device\n",
477  rtcGetDeviceError(NULL));
478  }
479  }
480 
481  void CastRays(const float* const rays,
482  const size_t num_rays,
483  float* t_hit,
484  unsigned int* geometry_ids,
485  unsigned int* primitive_ids,
486  float* primitive_uvs,
487  float* primitive_normals,
488  const int nthreads,
489  const bool line_intersection) override {
490  CommitScene();
491 
492  auto scene = this->scene_;
493  queue_.submit([=](sycl::handler& cgh) {
494  cgh.parallel_for(
495  sycl::range<1>(num_rays),
496  [=](sycl::item<1> item, sycl::kernel_handler kh) {
497  const size_t i = item.get_id(0);
498 
499  struct RTCRayHit rh;
500  const float* r = &rays[i * 6];
501  rh.ray.org_x = r[0];
502  rh.ray.org_y = r[1];
503  rh.ray.org_z = r[2];
504  if (line_intersection) {
505  rh.ray.dir_x = r[3] - r[0];
506  rh.ray.dir_y = r[4] - r[1];
507  rh.ray.dir_z = r[5] - r[2];
508  } else {
509  rh.ray.dir_x = r[3];
510  rh.ray.dir_y = r[4];
511  rh.ray.dir_z = r[5];
512  }
513  rh.ray.tnear = 0;
514  if (line_intersection) {
515  rh.ray.tfar = 1.f;
516  } else {
517  rh.ray.tfar =
519  }
520  rh.ray.mask = -1;
521  rh.ray.id = i;
522  rh.ray.flags = 0;
523  rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
524  rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
525 
526  rtcIntersect1(scene, &rh);
527 
528  t_hit[i] = rh.ray.tfar;
529  if (rh.hit.geomID != RTC_INVALID_GEOMETRY_ID) {
530  geometry_ids[i] = rh.hit.geomID;
531  primitive_ids[i] = rh.hit.primID;
532  primitive_uvs[i * 2 + 0] = rh.hit.u;
533  primitive_uvs[i * 2 + 1] = rh.hit.v;
534  float inv_norm =
535  1.f / std::sqrt(rh.hit.Ng_x * rh.hit.Ng_x +
536  rh.hit.Ng_y * rh.hit.Ng_y +
537  rh.hit.Ng_z * rh.hit.Ng_z);
538  primitive_normals[i * 3 + 0] =
539  rh.hit.Ng_x * inv_norm;
540  primitive_normals[i * 3 + 1] =
541  rh.hit.Ng_y * inv_norm;
542  primitive_normals[i * 3 + 2] =
543  rh.hit.Ng_z * inv_norm;
544  } else {
545  geometry_ids[i] = RTC_INVALID_GEOMETRY_ID;
546  primitive_ids[i] = RTC_INVALID_GEOMETRY_ID;
547  primitive_uvs[i * 2 + 0] = 0;
548  primitive_uvs[i * 2 + 1] = 0;
549  primitive_normals[i * 3 + 0] = 0;
550  primitive_normals[i * 3 + 1] = 0;
551  primitive_normals[i * 3 + 2] = 0;
552  }
553  });
554  });
555  queue_.wait_and_throw();
556  }
557 
558  void TestOcclusions(const float* const rays,
559  const size_t num_rays,
560  const float tnear,
561  const float tfar,
562  int8_t* occluded,
563  const int nthreads) override {
564  CommitScene();
565 
566  auto scene = this->scene_;
567  queue_.submit([=](sycl::handler& cgh) {
568  cgh.parallel_for(
569  sycl::range<1>(num_rays),
570  [=](sycl::item<1> item, sycl::kernel_handler kh) {
571  struct RTCRayQueryContext context;
572  rtcInitRayQueryContext(&context);
573 
574  RTCOccludedArguments args;
575  rtcInitOccludedArguments(&args);
576  args.context = &context;
577 
578  const size_t i = item.get_id(0);
579 
580  struct RTCRay ray;
581  const float* r = &rays[i * 6];
582  ray.org_x = r[0];
583  ray.org_y = r[1];
584  ray.org_z = r[2];
585  ray.dir_x = r[3];
586  ray.dir_y = r[4];
587  ray.dir_z = r[5];
588  ray.tnear = tnear;
589  ray.tfar = tfar;
590  ray.mask = -1;
591  ray.id = i;
592  ray.flags = 0;
593 
594  rtcOccluded1(scene, &ray, &args);
595 
596  occluded[i] = int8_t(
598  ray.tfar);
599  });
600  });
601  queue_.wait_and_throw();
602  }
603 
604  void CountIntersections(const float* const rays,
605  const size_t num_rays,
606  int* intersections,
607  const int nthreads) override {
608  CommitScene();
609 
610  queue_.memset(intersections, 0, sizeof(int) * num_rays).wait();
611 
612  ci_previous_geom_prim_ID_tfar =
613  sycl::malloc_device<callbacks::GeomPrimID>(num_rays, queue_);
614 
615  // Check if allocation was successful
616  if (!ci_previous_geom_prim_ID_tfar) {
617  throw std::runtime_error("Failed to allocate device memory");
618  }
619 
620  auto host_previous_geom_prim_ID_tfar =
621  std::unique_ptr<callbacks::GeomPrimID[],
622  std::default_delete<callbacks::GeomPrimID[]>>(
623  new callbacks::GeomPrimID[num_rays]);
624  for (size_t i = 0; i < num_rays; ++i) {
625  host_previous_geom_prim_ID_tfar[i] = {
626  uint32_t(RTC_INVALID_GEOMETRY_ID),
627  uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f};
628  }
629 
630  // Copy the initialized data to the device
631  queue_.memcpy(ci_previous_geom_prim_ID_tfar,
632  host_previous_geom_prim_ID_tfar.get(),
633  num_rays * sizeof(callbacks::GeomPrimID))
634  .wait();
635 
636  auto scene = this->scene_;
637  auto ci_previous_geom_prim_ID_tfar_ = ci_previous_geom_prim_ID_tfar;
638  queue_.submit([=](sycl::handler& cgh) {
639  cgh.parallel_for(
640  sycl::range<1>(num_rays),
641  [=](sycl::item<1> item, sycl::kernel_handler kh) {
643  rtcInitRayQueryContext(&context.context);
644  context.previous_geom_prim_ID_tfar =
645  ci_previous_geom_prim_ID_tfar_;
646  context.intersections = intersections;
647 
648  RTCIntersectArguments args;
649  rtcInitIntersectArguments(&args);
650  args.filter = callbacks::CountIntersectionsFunc;
651  args.context = &context.context;
652 
653  const size_t i = item.get_id(0);
654 
655  struct RTCRayHit rh;
656  const float* r = &rays[i * 6];
657  rh.ray.org_x = r[0];
658  rh.ray.org_y = r[1];
659  rh.ray.org_z = r[2];
660  rh.ray.dir_x = r[3];
661  rh.ray.dir_y = r[4];
662  rh.ray.dir_z = r[5];
663  rh.ray.tnear = 0;
664  rh.ray.tfar = std::numeric_limits<float>::infinity();
665  rh.ray.mask = -1;
666  rh.ray.flags = 0;
667  rh.ray.id = i;
668  rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
669  rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
670 
671  rtcIntersect1(scene, &rh, &args);
672  });
673  });
674  queue_.wait_and_throw();
675 
676  // Free the allocated memory
677  sycl::free(ci_previous_geom_prim_ID_tfar, queue_);
678  ci_previous_geom_prim_ID_tfar = nullptr;
679  }
680 
681  void ListIntersections(const float* const rays,
682  const size_t num_rays,
683  const size_t num_intersections,
684  int* cumsum,
685  unsigned int* track_intersections,
686  unsigned int* ray_ids,
687  unsigned int* geometry_ids,
688  unsigned int* primitive_ids,
689  float* primitive_uvs,
690  float* t_hit,
691  const int nthreads) override {
692  CommitScene();
693 
694  queue_.memset(track_intersections, 0, sizeof(uint32_t) * num_rays)
695  .wait();
696  queue_.memset(ray_ids, 0, sizeof(uint32_t) * num_intersections).wait();
697  queue_.memset(geometry_ids, 0, sizeof(uint32_t) * num_intersections)
698  .wait();
699  queue_.memset(primitive_ids, 0, sizeof(uint32_t) * num_intersections)
700  .wait();
701  queue_.memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2)
702  .wait();
703  queue_.memset(t_hit, 0, sizeof(float) * num_intersections).wait();
704 
705  li_previous_geom_prim_ID_tfar =
706  sycl::malloc_device<callbacks::GeomPrimID>(num_rays, queue_);
707 
708  // Check if allocation was successful
709  if (!li_previous_geom_prim_ID_tfar) {
710  throw std::runtime_error("Failed to allocate device memory");
711  }
712 
713  auto host_previous_geom_prim_ID_tfar =
714  std::unique_ptr<callbacks::GeomPrimID[],
715  std::default_delete<callbacks::GeomPrimID[]>>(
716  new callbacks::GeomPrimID[num_rays]);
717  for (size_t i = 0; i < num_rays; ++i) {
718  host_previous_geom_prim_ID_tfar[i] = {
719  uint32_t(RTC_INVALID_GEOMETRY_ID),
720  uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f};
721  }
722 
723  // Copy the initialized data to the device
724  queue_.memcpy(li_previous_geom_prim_ID_tfar,
725  host_previous_geom_prim_ID_tfar.get(),
726  num_rays * sizeof(callbacks::GeomPrimID))
727  .wait();
728 
729  auto scene = this->scene_;
730  auto li_previous_geom_prim_ID_tfar_ = li_previous_geom_prim_ID_tfar;
731  queue_.submit([=](sycl::handler& cgh) {
732  cgh.parallel_for(
733  sycl::range<1>(num_rays),
734  [=](sycl::item<1> item, sycl::kernel_handler kh) {
736  rtcInitRayQueryContext(&context.context);
737  context.previous_geom_prim_ID_tfar =
738  li_previous_geom_prim_ID_tfar_;
739  context.ray_ids = ray_ids;
740  context.geometry_ids = geometry_ids;
741  context.primitive_ids = primitive_ids;
742  context.primitive_uvs = primitive_uvs;
743  context.t_hit = t_hit;
744  context.cumsum = cumsum;
745  context.track_intersections = track_intersections;
746 
747  RTCIntersectArguments args;
748  rtcInitIntersectArguments(&args);
749  args.filter = callbacks::ListIntersectionsFunc;
750  args.context = &context.context;
751 
752  const size_t i = item.get_id(0);
753 
754  struct RTCRayHit rh;
755  const float* r = &rays[i * 6];
756  rh.ray.org_x = r[0];
757  rh.ray.org_y = r[1];
758  rh.ray.org_z = r[2];
759  rh.ray.dir_x = r[3];
760  rh.ray.dir_y = r[4];
761  rh.ray.dir_z = r[5];
762  rh.ray.tnear = 0;
763  rh.ray.tfar = std::numeric_limits<float>::infinity();
764  rh.ray.mask = -1;
765  rh.ray.flags = 0;
766  rh.ray.id = i;
767  rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
768  rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
769 
770  rtcIntersect1(scene, &rh, &args);
771  });
772  });
773  queue_.wait_and_throw();
774 
775  // Free the allocated memory
776  sycl::free(li_previous_geom_prim_ID_tfar, queue_);
777  li_previous_geom_prim_ID_tfar = nullptr;
778  }
779 
780  void ComputeClosestPoints(const float* const query_points,
781  const size_t num_query_points,
782  float* closest_points,
783  unsigned int* geometry_ids,
784  unsigned int* primitive_ids,
785  float* primitive_uvs,
786  float* primitive_normals,
787  const int nthreads) override {
788  throw std::logic_error("Function not yet implemented");
789  }
790 
791  void ArraySum(int* data_ptr, size_t num_elements, size_t& result) override {
792  sycl::buffer<size_t, 1> result_buf(&result, sycl::range<1>(1));
793 
794  queue_.submit([&](sycl::handler& cgh) {
795  auto result_acc =
796  result_buf.get_access<sycl::access::mode::read_write>(cgh);
797  cgh.parallel_for(
798  sycl::range<1>(num_elements),
799  [=](sycl::item<1> item, sycl::kernel_handler kh) {
800  const size_t i = item.get_id(0);
801  sycl::atomic_ref<size_t, sycl::memory_order::relaxed,
802  sycl::memory_scope::device>
803  atomic_result_data(result_acc[0]);
804  atomic_result_data.fetch_add(data_ptr[i]);
805  });
806  });
807  queue_.wait_and_throw();
808  }
809 
810  void ArrayPartialSum(int* input,
811  int* output,
812  size_t num_elements) override {
813  queue_.submit([&](sycl::handler& cgh) {
814  cgh.single_task([=]() {
815  for (size_t idx = 1; idx < num_elements; ++idx) {
816  output[idx] = output[idx - 1] + input[idx - 1];
817  }
818  });
819  });
820 
821  queue_.wait_and_throw();
822  }
823 
824  void CopyArray(int* src, uint32_t* dst, size_t num_elements) override {
825  queue_.memcpy(dst, src, num_elements * sizeof(uint32_t)).wait();
826  }
827 };
828 #endif
829 
831  // The maximum number of rays used in calls to embree.
832  const size_t BATCH_SIZE = 1024;
833 
834  void CastRays(const float* const rays,
835  const size_t num_rays,
836  float* t_hit,
837  unsigned int* geometry_ids,
838  unsigned int* primitive_ids,
839  float* primitive_uvs,
840  float* primitive_normals,
841  const int nthreads,
842  const bool line_intersection) override {
843  CommitScene();
844 
845  auto LoopFn = [&](const tbb::blocked_range<size_t>& range) {
846  std::vector<RTCRayHit> rayhits(range.size());
847 
848  for (size_t i = range.begin(); i < range.end(); ++i) {
849  RTCRayHit& rh = rayhits[i - range.begin()];
850  const float* r = &rays[i * 6];
851  rh.ray.org_x = r[0];
852  rh.ray.org_y = r[1];
853  rh.ray.org_z = r[2];
854  if (line_intersection) {
855  rh.ray.dir_x = r[3] - r[0];
856  rh.ray.dir_y = r[4] - r[1];
857  rh.ray.dir_z = r[5] - r[2];
858  } else {
859  rh.ray.dir_x = r[3];
860  rh.ray.dir_y = r[4];
861  rh.ray.dir_z = r[5];
862  }
863  rh.ray.tnear = 0;
864  if (line_intersection) {
865  rh.ray.tfar = 1.f;
866  } else {
867  rh.ray.tfar = std::numeric_limits<float>::infinity();
868  }
869  rh.ray.mask = -1;
870  rh.ray.id = i - range.begin();
871  rh.ray.flags = 0;
872  rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
873  rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
874 
875  rtcIntersect1(scene_, &rh);
876  }
877 
878  for (size_t i = range.begin(); i < range.end(); ++i) {
879  RTCRayHit rh = rayhits[i - range.begin()];
880  size_t idx = rh.ray.id + range.begin();
881  t_hit[idx] = rh.ray.tfar;
882  if (rh.hit.geomID != RTC_INVALID_GEOMETRY_ID) {
883  geometry_ids[idx] = rh.hit.geomID;
884  primitive_ids[idx] = rh.hit.primID;
885  primitive_uvs[idx * 2 + 0] = rh.hit.u;
886  primitive_uvs[idx * 2 + 1] = rh.hit.v;
887  float inv_norm = 1.f / std::sqrt(rh.hit.Ng_x * rh.hit.Ng_x +
888  rh.hit.Ng_y * rh.hit.Ng_y +
889  rh.hit.Ng_z * rh.hit.Ng_z);
890  primitive_normals[idx * 3 + 0] = rh.hit.Ng_x * inv_norm;
891  primitive_normals[idx * 3 + 1] = rh.hit.Ng_y * inv_norm;
892  primitive_normals[idx * 3 + 2] = rh.hit.Ng_z * inv_norm;
893  } else {
894  geometry_ids[idx] = RTC_INVALID_GEOMETRY_ID;
895  primitive_ids[idx] = RTC_INVALID_GEOMETRY_ID;
896  primitive_uvs[idx * 2 + 0] = 0;
897  primitive_uvs[idx * 2 + 1] = 0;
898  primitive_normals[idx * 3 + 0] = 0;
899  primitive_normals[idx * 3 + 1] = 0;
900  primitive_normals[idx * 3 + 2] = 0;
901  }
902  }
903  };
904 
905  if (nthreads > 0) {
906  tbb::task_arena arena(nthreads);
907  arena.execute([&]() {
908  tbb::parallel_for(
909  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
910  LoopFn);
911  });
912  } else {
913  tbb::parallel_for(
914  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
915  LoopFn);
916  }
917  }
918 
919  void TestOcclusions(const float* const rays,
920  const size_t num_rays,
921  const float tnear,
922  const float tfar,
923  int8_t* occluded,
924  const int nthreads) override {
925  CommitScene();
926 
927  struct RTCRayQueryContext context;
928  rtcInitRayQueryContext(&context);
929 
930  RTCOccludedArguments args;
931  rtcInitOccludedArguments(&args);
932  args.context = &context;
933 
934  auto LoopFn = [&](const tbb::blocked_range<size_t>& range) {
935  std::vector<RTCRay> rayvec(range.size());
936  for (size_t i = range.begin(); i < range.end(); ++i) {
937  RTCRay& ray = rayvec[i - range.begin()];
938  const float* r = &rays[i * 6];
939  ray.org_x = r[0];
940  ray.org_y = r[1];
941  ray.org_z = r[2];
942  ray.dir_x = r[3];
943  ray.dir_y = r[4];
944  ray.dir_z = r[5];
945  ray.tnear = tnear;
946  ray.tfar = tfar;
947  ray.mask = -1;
948  ray.id = i - range.begin();
949  ray.flags = 0;
950 
951  rtcOccluded1(scene_, &ray, &args);
952  }
953 
954  for (size_t i = range.begin(); i < range.end(); ++i) {
955  RTCRay ray = rayvec[i - range.begin()];
956  size_t idx = ray.id + range.begin();
957  occluded[idx] = int8_t(
958  -std::numeric_limits<float>::infinity() == ray.tfar);
959  }
960  };
961 
962  if (nthreads > 0) {
963  tbb::task_arena arena(nthreads);
964  arena.execute([&]() {
965  tbb::parallel_for(
966  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
967  LoopFn);
968  });
969  } else {
970  tbb::parallel_for(
971  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
972  LoopFn);
973  }
974  }
975 
976  void CountIntersections(const float* const rays,
977  const size_t num_rays,
978  int* intersections,
979  const int nthreads) override {
980  CommitScene();
981 
982  std::memset(intersections, 0, sizeof(int) * num_rays);
983 
984  auto previous_geom_prim_ID_tfar =
985  std::unique_ptr<callbacks::GeomPrimID[],
986  std::default_delete<callbacks::GeomPrimID[]>>(
987  new callbacks::GeomPrimID[num_rays]);
988  for (size_t i = 0; i < num_rays; ++i) {
989  previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID),
990  uint32_t(RTC_INVALID_GEOMETRY_ID),
991  0.f};
992  }
993 
995  rtcInitRayQueryContext(&context.context);
996  context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar.get();
997  context.intersections = intersections;
998 
999  RTCIntersectArguments args;
1000  rtcInitIntersectArguments(&args);
1001  args.filter = callbacks::CountIntersectionsFunc;
1002  args.context = &context.context;
1003 
1004  auto LoopFn = [&](const tbb::blocked_range<size_t>& range) {
1005  std::vector<RTCRayHit> rayhits(range.size());
1006 
1007  for (size_t i = range.begin(); i < range.end(); ++i) {
1008  RTCRayHit* rh = &rayhits[i - range.begin()];
1009  const float* r = &rays[i * 6];
1010  rh->ray.org_x = r[0];
1011  rh->ray.org_y = r[1];
1012  rh->ray.org_z = r[2];
1013  rh->ray.dir_x = r[3];
1014  rh->ray.dir_y = r[4];
1015  rh->ray.dir_z = r[5];
1016  rh->ray.tnear = 0;
1017  rh->ray.tfar = std::numeric_limits<float>::infinity();
1018  rh->ray.mask = -1;
1019  rh->ray.flags = 0;
1020  rh->ray.id = i;
1021  rh->hit.geomID = RTC_INVALID_GEOMETRY_ID;
1022  rh->hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
1023 
1024  rtcIntersect1(scene_, rh, &args);
1025  }
1026  };
1027 
1028  if (nthreads > 0) {
1029  tbb::task_arena arena(nthreads);
1030  arena.execute([&]() {
1031  tbb::parallel_for(
1032  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1033  LoopFn);
1034  });
1035  } else {
1036  tbb::parallel_for(
1037  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1038  LoopFn);
1039  }
1040  }
1041 
1042  void ListIntersections(const float* const rays,
1043  const size_t num_rays,
1044  const size_t num_intersections,
1045  int* cumsum,
1046  unsigned int* track_intersections,
1047  unsigned int* ray_ids,
1048  unsigned int* geometry_ids,
1049  unsigned int* primitive_ids,
1050  float* primitive_uvs,
1051  float* t_hit,
1052  const int nthreads) override {
1053  CommitScene();
1054 
1055  std::memset(track_intersections, 0, sizeof(uint32_t) * num_rays);
1056  std::memset(ray_ids, 0, sizeof(uint32_t) * num_intersections);
1057  std::memset(geometry_ids, 0, sizeof(uint32_t) * num_intersections);
1058  std::memset(primitive_ids, 0, sizeof(uint32_t) * num_intersections);
1059  std::memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2);
1060  std::memset(t_hit, 0, sizeof(float) * num_intersections);
1061 
1062  auto previous_geom_prim_ID_tfar =
1063  std::unique_ptr<callbacks::GeomPrimID[],
1064  std::default_delete<callbacks::GeomPrimID[]>>(
1065  new callbacks::GeomPrimID[num_rays]);
1066  for (size_t i = 0; i < num_rays; ++i) {
1067  previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID),
1068  uint32_t(RTC_INVALID_GEOMETRY_ID),
1069  0.f};
1070  }
1071 
1073  rtcInitRayQueryContext(&context.context);
1074  context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar.get();
1075  context.ray_ids = ray_ids;
1076  context.geometry_ids = geometry_ids;
1077  context.primitive_ids = primitive_ids;
1078  context.primitive_uvs = primitive_uvs;
1079  context.t_hit = t_hit;
1080  context.cumsum = cumsum;
1081  context.track_intersections = track_intersections;
1082 
1083  RTCIntersectArguments args;
1084  rtcInitIntersectArguments(&args);
1085  args.filter = callbacks::ListIntersectionsFunc;
1086  args.context = &context.context;
1087 
1088  auto LoopFn = [&](const tbb::blocked_range<size_t>& range) {
1089  std::vector<RTCRayHit> rayhits(range.size());
1090 
1091  for (size_t i = range.begin(); i < range.end(); ++i) {
1092  RTCRayHit* rh = &rayhits[i - range.begin()];
1093  const float* r = &rays[i * 6];
1094  rh->ray.org_x = r[0];
1095  rh->ray.org_y = r[1];
1096  rh->ray.org_z = r[2];
1097  rh->ray.dir_x = r[3];
1098  rh->ray.dir_y = r[4];
1099  rh->ray.dir_z = r[5];
1100  rh->ray.tnear = 0;
1101  rh->ray.tfar = std::numeric_limits<float>::infinity();
1102  rh->ray.mask = -1;
1103  rh->ray.flags = 0;
1104  rh->ray.id = i;
1105  rh->hit.geomID = RTC_INVALID_GEOMETRY_ID;
1106  rh->hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
1107 
1108  rtcIntersect1(scene_, rh, &args);
1109  }
1110  };
1111 
1112  if (nthreads > 0) {
1113  tbb::task_arena arena(nthreads);
1114  arena.execute([&]() {
1115  tbb::parallel_for(
1116  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1117  LoopFn);
1118  });
1119  } else {
1120  tbb::parallel_for(
1121  tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1122  LoopFn);
1123  }
1124  }
1125 
1126  void ComputeClosestPoints(const float* const query_points,
1127  const size_t num_query_points,
1128  float* closest_points,
1129  unsigned int* geometry_ids,
1130  unsigned int* primitive_ids,
1131  float* primitive_uvs,
1132  float* primitive_normals,
1133  const int nthreads) override {
1134  CommitScene();
1135 
1136  auto LoopFn = [&](const tbb::blocked_range<size_t>& range) {
1137  for (size_t i = range.begin(); i < range.end(); ++i) {
1138  RTCPointQuery query;
1139  query.x = query_points[i * 3 + 0];
1140  query.y = query_points[i * 3 + 1];
1141  query.z = query_points[i * 3 + 2];
1142  query.radius = std::numeric_limits<float>::infinity();
1143  query.time = 0.f;
1144 
1145  ClosestPointResult<Vec3f, Vec2f> result;
1146  result.geometry_ptrs_ptr = geometry_ptrs_.data();
1147 
1148  RTCPointQueryContext instStack;
1149  rtcInitPointQueryContext(&instStack);
1150  rtcPointQuery(scene_, &query, &instStack,
1151  &ClosestPointFunc<Vec3f, Vec3fa, Vec2f>,
1152  (void*)&result);
1153 
1154  closest_points[3 * i + 0] = result.p.x();
1155  closest_points[3 * i + 1] = result.p.y();
1156  closest_points[3 * i + 2] = result.p.z();
1157  geometry_ids[i] = result.geomID;
1158  primitive_ids[i] = result.primID;
1159  primitive_uvs[2 * i + 0] = result.uv.x();
1160  primitive_uvs[2 * i + 1] = result.uv.y();
1161  primitive_normals[3 * i + 0] = result.n.x();
1162  primitive_normals[3 * i + 1] = result.n.y();
1163  primitive_normals[3 * i + 2] = result.n.z();
1164  }
1165  };
1166 
1167  if (nthreads > 0) {
1168  tbb::task_arena arena(nthreads);
1169  arena.execute([&]() {
1170  tbb::parallel_for(tbb::blocked_range<size_t>(
1171  0, num_query_points, BATCH_SIZE),
1172  LoopFn);
1173  });
1174  } else {
1175  tbb::parallel_for(
1176  tbb::blocked_range<size_t>(0, num_query_points, BATCH_SIZE),
1177  LoopFn);
1178  }
1179  }
1180 
1181  void ArraySum(int* data_ptr, size_t num_elements, size_t& result) override {
1182  result = std::accumulate(data_ptr, data_ptr + num_elements, result);
1183  }
1184 
1185  void ArrayPartialSum(int* input,
1186  int* output,
1187  size_t num_elements) override {
1188  output[0] = 0;
1189  std::partial_sum(input, input + num_elements - 1, output + 1);
1190  }
1191 
1192  void CopyArray(int* src, uint32_t* dst, size_t num_elements) override {
1193  std::copy(src, src + num_elements, dst);
1194  }
1195 };
1196 
1197 RaycastingScene::RaycastingScene(int64_t nthreads, const core::Device& device) {
1198 #ifdef BUILD_SYCL_MODULE
1199  if (device.IsSYCL()) {
1200  impl_ = std::make_unique<SYCLImpl>();
1201  dynamic_cast<RaycastingScene::SYCLImpl*>(impl_.get())
1202  ->InitializeDevice();
1203  impl_->tensor_device_ = device;
1204  } else {
1205 #endif
1206  impl_ = std::make_unique<CPUImpl>();
1207 
1208  if (nthreads > 0) {
1209  std::string config("threads=" + std::to_string(nthreads));
1210  impl_->device_ = rtcNewDevice(config.c_str());
1211  } else {
1212  impl_->device_ = rtcNewDevice(NULL);
1213  }
1214 #ifdef BUILD_SYCL_MODULE
1215  }
1216 #endif
1217 
1218  rtcSetDeviceErrorFunction(impl_->device_, ErrorFunction, NULL);
1219 
1220  impl_->scene_ = rtcNewScene(impl_->device_);
1221  // set flag for better accuracy
1222  rtcSetSceneFlags(impl_->scene_,
1223  RTC_SCENE_FLAG_ROBUST |
1224  RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS);
1225 
1226  impl_->devprop_join_commit = rtcGetDeviceProperty(
1227  impl_->device_, RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED);
1228 
1229  impl_->scene_committed_ = false;
1230 }
1231 
1232 RaycastingScene::~RaycastingScene() {
1233  rtcReleaseScene(impl_->scene_);
1234  rtcReleaseDevice(impl_->device_);
1235 }
1236 
1237 uint32_t RaycastingScene::AddTriangles(const core::Tensor& vertex_positions,
1238  const core::Tensor& triangle_indices) {
1239  core::AssertTensorDevice(vertex_positions, impl_->tensor_device_);
1240  core::AssertTensorShape(vertex_positions, {utility::nullopt, 3});
1241  core::AssertTensorDtype(vertex_positions, core::Float32);
1242  core::AssertTensorDevice(triangle_indices, impl_->tensor_device_);
1243  core::AssertTensorShape(triangle_indices, {utility::nullopt, 3});
1244  core::AssertTensorDtype(triangle_indices, core::UInt32);
1245 
1246  const size_t num_vertices = vertex_positions.GetLength();
1247  const size_t num_triangles = triangle_indices.GetLength();
1248 
1249  // scene needs to be recommitted
1250  impl_->scene_committed_ = false;
1251  RTCGeometry geom =
1252  rtcNewGeometry(impl_->device_, RTC_GEOMETRY_TYPE_TRIANGLE);
1253 
1254  // rtcSetNewGeometryBuffer will take care of alignment and padding
1255  float* vertex_buffer = (float*)rtcSetNewGeometryBuffer(
1256  geom, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3,
1257  3 * sizeof(float), num_vertices);
1258 
1259  uint32_t* index_buffer = (uint32_t*)rtcSetNewGeometryBuffer(
1260  geom, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3,
1261  3 * sizeof(uint32_t), num_triangles);
1262 
1263  {
1264  auto data = vertex_positions.Contiguous();
1265 #ifdef BUILD_SYCL_MODULE
1266  if (impl_->tensor_device_.IsSYCL()) {
1267  dynamic_cast<RaycastingScene::SYCLImpl*>(impl_.get())
1268  ->queue_
1269  .memcpy(vertex_buffer, data.GetDataPtr(),
1270  sizeof(float) * 3 * num_vertices)
1271  .wait();
1272  } else {
1273 #endif
1274  std::memcpy(vertex_buffer, data.GetDataPtr(),
1275  sizeof(float) * 3 * num_vertices);
1276 #ifdef BUILD_SYCL_MODULE
1277  }
1278 #endif
1279  }
1280  {
1281  auto data = triangle_indices.Contiguous();
1282 #ifdef BUILD_SYCL_MODULE
1283  if (impl_->tensor_device_.IsSYCL()) {
1284  dynamic_cast<RaycastingScene::SYCLImpl*>(impl_.get())
1285  ->queue_
1286  .memcpy(index_buffer, data.GetDataPtr(),
1287  sizeof(uint32_t) * 3 * num_triangles)
1288  .wait();
1289  } else {
1290 #endif
1291  std::memcpy(index_buffer, data.GetDataPtr(),
1292  sizeof(uint32_t) * 3 * num_triangles);
1293 #ifdef BUILD_SYCL_MODULE
1294  }
1295 #endif
1296  }
1297  rtcSetGeometryEnableFilterFunctionFromArguments(geom, true);
1298  rtcCommitGeometry(geom);
1299 
1300  uint32_t geom_id = rtcAttachGeometry(impl_->scene_, geom);
1301  rtcReleaseGeometry(geom);
1302 
1303  GeometryPtr geometry_ptr = {RTC_GEOMETRY_TYPE_TRIANGLE,
1304  (const void*)vertex_buffer,
1305  (const void*)index_buffer};
1306  impl_->geometry_ptrs_.push_back(geometry_ptr);
1307  return geom_id;
1308 }
1309 
1310 uint32_t RaycastingScene::AddTriangles(const TriangleMesh& mesh) {
1311  size_t num_verts = mesh.GetVertexPositions().GetLength();
1312  if (num_verts > std::numeric_limits<uint32_t>::max()) {
1314  "Cannot add mesh with more than {} vertices to the scene",
1316  }
1317  return AddTriangles(mesh.GetVertexPositions(),
1319 }
1320 
1321 std::unordered_map<std::string, core::Tensor> RaycastingScene::CastRays(
1322  const core::Tensor& rays, const int nthreads) const {
1323  AssertTensorDtypeLastDimDeviceMinNDim<float>(rays, "rays", 6,
1324  impl_->tensor_device_);
1325  auto shape = rays.GetShape();
1326  shape.pop_back(); // Remove last dim, we want to use this shape for the
1327  // results.
1328  size_t num_rays = shape.NumElements();
1329 
1330  std::unordered_map<std::string, core::Tensor> result;
1331  result["t_hit"] = core::Tensor(shape, core::Float32, rays.GetDevice());
1332  result["geometry_ids"] =
1333  core::Tensor(shape, core::UInt32, rays.GetDevice());
1334  result["primitive_ids"] =
1335  core::Tensor(shape, core::UInt32, rays.GetDevice());
1336  shape.push_back(2);
1337  result["primitive_uvs"] =
1338  core::Tensor(shape, core::Float32, rays.GetDevice());
1339  shape.back() = 3;
1340  result["primitive_normals"] =
1341  core::Tensor(shape, core::Float32, rays.GetDevice());
1342 
1343  auto data = rays.Contiguous();
1344  impl_->CastRays(data.GetDataPtr<float>(), num_rays,
1345  result["t_hit"].GetDataPtr<float>(),
1346  result["geometry_ids"].GetDataPtr<uint32_t>(),
1347  result["primitive_ids"].GetDataPtr<uint32_t>(),
1348  result["primitive_uvs"].GetDataPtr<float>(),
1349  result["primitive_normals"].GetDataPtr<float>(), nthreads,
1350  false);
1351 
1352  return result;
1353 }
1354 
1355 core::Tensor RaycastingScene::TestOcclusions(const core::Tensor& rays,
1356  const float tnear,
1357  const float tfar,
1358  const int nthreads) {
1359  AssertTensorDtypeLastDimDeviceMinNDim<float>(rays, "rays", 6,
1360  impl_->tensor_device_);
1361  auto shape = rays.GetShape();
1362  shape.pop_back(); // Remove last dim, we want to use this shape for the
1363  // results.
1364  size_t num_rays = shape.NumElements();
1365 
1366  core::Tensor result(shape, core::Bool, rays.GetDevice());
1367 
1368  auto data = rays.Contiguous();
1369  impl_->TestOcclusions(data.GetDataPtr<float>(), num_rays, tnear, tfar,
1370  reinterpret_cast<int8_t*>(result.GetDataPtr<bool>()),
1371  nthreads);
1372 
1373  return result;
1374 }
1375 
1376 core::Tensor RaycastingScene::CountIntersections(const core::Tensor& rays,
1377  const int nthreads) {
1378  AssertTensorDtypeLastDimDeviceMinNDim<float>(rays, "rays", 6,
1379  impl_->tensor_device_);
1380  auto shape = rays.GetShape();
1381  shape.pop_back(); // Remove last dim, we want to use this shape for the
1382  // results.
1383  size_t num_rays = shape.NumElements();
1384 
1385  core::Tensor intersections(shape, core::Dtype::FromType<int>(),
1386  impl_->tensor_device_);
1387 
1388  auto data = rays.Contiguous();
1389 
1390  impl_->CountIntersections(data.GetDataPtr<float>(), num_rays,
1391  intersections.GetDataPtr<int>(), nthreads);
1392  return intersections;
1393 }
1394 
1395 std::unordered_map<std::string, core::Tensor>
1396 RaycastingScene::ListIntersections(const core::Tensor& rays,
1397  const int nthreads) {
1398  AssertTensorDtypeLastDimDeviceMinNDim<float>(rays, "rays", 6,
1399  impl_->tensor_device_);
1400 
1401  auto shape = rays.GetShape();
1402  shape.pop_back(); // Remove last dim, we want to use this shape for the
1403  // results.
1404  size_t num_rays = shape.NumElements();
1405 
1406  // determine total number of intersections
1407  core::Tensor intersections(shape, core::Dtype::FromType<int>(),
1408  impl_->tensor_device_);
1409  core::Tensor track_intersections(shape, core::Dtype::FromType<uint32_t>(),
1410  impl_->tensor_device_);
1411  auto data = rays.Contiguous();
1412  impl_->CountIntersections(data.GetDataPtr<float>(), num_rays,
1413  intersections.GetDataPtr<int>(), nthreads);
1414 
1415  // prepare shape with that number of elements
1416  int* data_ptr = intersections.GetDataPtr<int>();
1417  size_t num_intersections = 0;
1418  impl_->ArraySum(data_ptr, num_rays, num_intersections);
1419 
1420  // prepare ray allocations (cumsum)
1421  core::Tensor cumsum_tensor_cpu =
1422  core::Tensor::Zeros(shape, core::Dtype::FromType<int>());
1423  core::Tensor cumsum_tensor = cumsum_tensor_cpu.To(impl_->tensor_device_);
1424  int* cumsum_ptr = cumsum_tensor.GetDataPtr<int>();
1425  impl_->ArrayPartialSum(data_ptr, cumsum_ptr, num_rays);
1426 
1427  // generate results structure
1428  std::unordered_map<std::string, core::Tensor> result;
1429  shape.clear();
1430  shape.push_back(num_rays + 1);
1431  result["ray_splits"] = core::Tensor(shape, core::UInt32);
1432 
1433  uint32_t* ptr = result["ray_splits"].GetDataPtr<uint32_t>();
1434  impl_->CopyArray(cumsum_ptr, ptr, num_rays);
1435 
1436  ptr[num_rays] = num_intersections;
1437  shape[0] = num_intersections;
1438  result["ray_ids"] =
1439  core::Tensor(shape, core::UInt32, impl_->tensor_device_);
1440  result["geometry_ids"] =
1441  core::Tensor(shape, core::UInt32, impl_->tensor_device_);
1442  result["primitive_ids"] =
1443  core::Tensor(shape, core::UInt32, impl_->tensor_device_);
1444  result["t_hit"] = core::Tensor(shape, core::Float32, impl_->tensor_device_);
1445  shape.push_back(2);
1446  result["primitive_uvs"] =
1447  core::Tensor(shape, core::Float32, impl_->tensor_device_);
1448 
1449  impl_->ListIntersections(data.GetDataPtr<float>(), num_rays,
1450  num_intersections, cumsum_ptr,
1451  track_intersections.GetDataPtr<uint32_t>(),
1452  result["ray_ids"].GetDataPtr<uint32_t>(),
1453  result["geometry_ids"].GetDataPtr<uint32_t>(),
1454  result["primitive_ids"].GetDataPtr<uint32_t>(),
1455  result["primitive_uvs"].GetDataPtr<float>(),
1456  result["t_hit"].GetDataPtr<float>(), nthreads);
1457 
1458  return result;
1459 }
1460 
1461 std::unordered_map<std::string, core::Tensor>
1462 RaycastingScene::ComputeClosestPoints(const core::Tensor& query_points,
1463  const int nthreads) {
1464  AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points, "query_points",
1465  3, impl_->tensor_device_);
1466  auto shape = query_points.GetShape();
1467  shape.pop_back(); // Remove last dim, we want to use this shape for the
1468  // results.
1469  size_t num_query_points = shape.NumElements();
1470 
1471  std::unordered_map<std::string, core::Tensor> result;
1472  result["geometry_ids"] = core::Tensor(shape, core::UInt32);
1473  result["primitive_ids"] = core::Tensor(shape, core::UInt32);
1474  shape.push_back(3);
1475  result["points"] = core::Tensor(shape, core::Float32);
1476  result["primitive_normals"] = core::Tensor(shape, core::Float32);
1477  shape.back() = 2;
1478  result["primitive_uvs"] = core::Tensor(shape, core::Float32);
1479 
1480  auto data = query_points.Contiguous();
1481  impl_->ComputeClosestPoints(data.GetDataPtr<float>(), num_query_points,
1482  result["points"].GetDataPtr<float>(),
1483  result["geometry_ids"].GetDataPtr<uint32_t>(),
1484  result["primitive_ids"].GetDataPtr<uint32_t>(),
1485  result["primitive_uvs"].GetDataPtr<float>(),
1486  result["primitive_normals"].GetDataPtr<float>(),
1487  nthreads);
1488 
1489  return result;
1490 }
1491 
1492 core::Tensor RaycastingScene::ComputeDistance(const core::Tensor& query_points,
1493  const int nthreads) {
1494  AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points, "query_points",
1495  3, impl_->tensor_device_);
1496  auto shape = query_points.GetShape();
1497  shape.pop_back(); // Remove last dim, we want to use this shape for the
1498  // results.
1499 
1500  auto data = query_points.Contiguous();
1501  auto closest_points = ComputeClosestPoints(data, nthreads);
1502 
1503  size_t num_query_points = shape.NumElements();
1504  Eigen::Map<Eigen::MatrixXf> query_points_map(data.GetDataPtr<float>(), 3,
1505  num_query_points);
1506  Eigen::Map<Eigen::MatrixXf> closest_points_map(
1507  closest_points["points"].GetDataPtr<float>(), 3, num_query_points);
1508  core::Tensor distance(shape, core::Float32);
1509  Eigen::Map<Eigen::VectorXf> distance_map(distance.GetDataPtr<float>(),
1510  num_query_points);
1511 
1512  distance_map = (closest_points_map - query_points_map).colwise().norm();
1513  return distance;
1514 }
1515 
1516 namespace {
1517 // Helper function to determine the inside and outside with voting.
1518 core::Tensor VoteInsideOutside(RaycastingScene& scene,
1519  const core::Tensor& query_points,
1520  const int nthreads = 0,
1521  const int num_votes = 3,
1522  const int inside_val = 1,
1523  const int outside_val = 0) {
1524  auto shape = query_points.GetShape();
1525  shape.pop_back(); // Remove last dim, we want to use this shape for the
1526  // results.
1527  size_t num_query_points = shape.NumElements();
1528 
1529  // Use local RNG here to generate rays with a similar direction in a
1530  // deterministic manner.
1531  std::mt19937 gen(42);
1532  std::uniform_real_distribution<float> dist(-0.001, 0.001);
1533  Eigen::MatrixXf ray_dirs(3, num_votes);
1534  ray_dirs = ray_dirs.unaryExpr([&](float) { return 1 + dist(gen); });
1535 
1536  auto query_points_ = query_points.Contiguous();
1537  Eigen::Map<Eigen::MatrixXf> query_points_map(
1538  query_points_.GetDataPtr<float>(), 3, num_query_points);
1539 
1540  core::Tensor rays({int64_t(num_votes * num_query_points), 6},
1541  core::Float32);
1542  Eigen::Map<Eigen::MatrixXf> rays_map(rays.GetDataPtr<float>(), 6,
1543  num_votes * num_query_points);
1544  if (num_votes > 1) {
1545  for (size_t i = 0; i < num_query_points; ++i) {
1546  for (int j = 0; j < num_votes; ++j) {
1547  rays_map.col(i * num_votes + j).topRows<3>() =
1548  query_points_map.col(i);
1549  rays_map.col(i * num_votes + j).bottomRows<3>() =
1550  ray_dirs.col(j);
1551  }
1552  }
1553  } else {
1554  for (size_t i = 0; i < num_query_points; ++i) {
1555  rays_map.col(i).topRows<3>() = query_points_map.col(i);
1556  rays_map.col(i).bottomRows<3>() = ray_dirs;
1557  }
1558  }
1559 
1560  auto intersections = scene.CountIntersections(rays, nthreads);
1561  Eigen::Map<Eigen::MatrixXi> intersections_map(
1562  intersections.GetDataPtr<int>(), num_votes, num_query_points);
1563 
1564  if (num_votes > 1) {
1565  core::Tensor result({int64_t(num_query_points)}, core::Int32);
1566  Eigen::Map<Eigen::VectorXi> result_map(result.GetDataPtr<int>(),
1567  num_query_points);
1568  result_map =
1569  intersections_map.unaryExpr([&](const int x) { return x % 2; })
1570  .colwise()
1571  .sum()
1572  .unaryExpr([&](const int x) {
1573  return (x > num_votes / 2) ? inside_val
1574  : outside_val;
1575  });
1576  return result.Reshape(shape);
1577  } else {
1578  intersections_map = intersections_map.unaryExpr([&](const int x) {
1579  return (x % 2) ? inside_val : outside_val;
1580  });
1581  return intersections.Reshape(shape);
1582  }
1583 }
1584 } // namespace
1585 
1586 core::Tensor RaycastingScene::ComputeSignedDistance(
1587  const core::Tensor& query_points,
1588  const int nthreads,
1589  const int nsamples) {
1590  AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points, "query_points",
1591  3, impl_->tensor_device_);
1592 
1593  if (nsamples < 1 || (nsamples % 2) != 1) {
1595  "nsamples must be odd and >= 1 but is {}", nsamples);
1596  }
1597  auto shape = query_points.GetShape();
1598  shape.pop_back(); // Remove last dim, we want to use this shape for the
1599  // results.
1600  size_t num_query_points = shape.NumElements();
1601  auto data = query_points.Contiguous();
1602  auto distance = ComputeDistance(data, nthreads);
1603  Eigen::Map<Eigen::VectorXf> distance_map(distance.GetDataPtr<float>(),
1604  num_query_points);
1605 
1606  auto inside_outside =
1607  VoteInsideOutside(*this, data, nthreads, nsamples, -1, 1);
1608  Eigen::Map<Eigen::VectorXi> inside_outside_map(
1609  inside_outside.GetDataPtr<int>(), num_query_points);
1610  distance_map.array() *= inside_outside_map.array().cast<float>();
1611  return distance;
1612 }
1613 
1614 core::Tensor RaycastingScene::ComputeOccupancy(const core::Tensor& query_points,
1615  const int nthreads,
1616  const int nsamples) {
1617  AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points, "query_points",
1618  3, impl_->tensor_device_);
1619 
1620  if (nsamples < 1 || (nsamples % 2) != 1) {
1621  cloudViewer::utility::LogError("samples must be odd and >= 1 but is {}",
1622  nsamples);
1623  }
1624 
1625  auto result = VoteInsideOutside(*this, query_points, nthreads, nsamples);
1626  return result.To(core::Float32);
1627 }
1628 
1629 core::Tensor RaycastingScene::CreateRaysPinhole(
1630  const core::Tensor& intrinsic_matrix,
1631  const core::Tensor& extrinsic_matrix,
1632  int width_px,
1633  int height_px) {
1634  core::AssertTensorDevice(intrinsic_matrix, core::Device());
1635  core::AssertTensorShape(intrinsic_matrix, {3, 3});
1636  core::AssertTensorDevice(extrinsic_matrix, core::Device());
1637  core::AssertTensorShape(extrinsic_matrix, {4, 4});
1638 
1639  auto intrinsic_matrix_contig =
1640  intrinsic_matrix.To(core::Float64).Contiguous();
1641  auto extrinsic_matrix_contig =
1642  extrinsic_matrix.To(core::Float64).Contiguous();
1643  // Eigen is col major
1644  Eigen::Map<Eigen::MatrixXd> KT(intrinsic_matrix_contig.GetDataPtr<double>(),
1645  3, 3);
1646  Eigen::Map<Eigen::MatrixXd> TT(extrinsic_matrix_contig.GetDataPtr<double>(),
1647  4, 4);
1648 
1649  Eigen::Matrix3d invK = KT.transpose().inverse();
1650  Eigen::Matrix3d RT = TT.block(0, 0, 3, 3);
1651  Eigen::Vector3d t = TT.transpose().block(0, 3, 3, 1);
1652  Eigen::Vector3d C = -RT * t;
1653  Eigen::Matrix3f RT_invK = (RT * invK).cast<float>();
1654 
1655  core::Tensor rays({height_px, width_px, 6}, core::Float32);
1656  Eigen::Map<Eigen::MatrixXf> rays_map(rays.GetDataPtr<float>(), 6,
1657  height_px * width_px);
1658 
1659  Eigen::Matrix<float, 6, 1> r;
1660  r.topRows<3>() = C.cast<float>();
1661  int64_t linear_idx = 0;
1662  for (int y = 0; y < height_px; ++y) {
1663  for (int x = 0; x < width_px; ++x, ++linear_idx) {
1664  Eigen::Vector3f px(x + 0.5f, y + 0.5f, 1);
1665  Eigen::Vector3f ray_dir = RT_invK * px;
1666  r.bottomRows<3>() = ray_dir;
1667  rays_map.col(linear_idx) = r;
1668  }
1669  }
1670  return rays;
1671 }
1672 
1673 core::Tensor RaycastingScene::CreateRaysPinhole(double fov_deg,
1674  const core::Tensor& center,
1675  const core::Tensor& eye,
1676  const core::Tensor& up,
1677  int width_px,
1678  int height_px) {
1680  core::AssertTensorShape(center, {3});
1682  core::AssertTensorShape(eye, {3});
1684  core::AssertTensorShape(up, {3});
1685 
1686  double focal_length =
1687  0.5 * width_px / std::tan(0.5 * (M_PI / 180) * fov_deg);
1688 
1689  core::Tensor intrinsic_matrix =
1690  core::Tensor::Eye(3, core::Float64, core::Device());
1691  Eigen::Map<Eigen::MatrixXd> intrinsic_matrix_map(
1692  intrinsic_matrix.GetDataPtr<double>(), 3, 3);
1693  intrinsic_matrix_map(0, 0) = focal_length;
1694  intrinsic_matrix_map(1, 1) = focal_length;
1695  intrinsic_matrix_map(2, 0) = 0.5 * width_px;
1696  intrinsic_matrix_map(2, 1) = 0.5 * height_px;
1697 
1698  auto center_contig = center.To(core::Float64).Contiguous();
1699  auto eye_contig = eye.To(core::Float64).Contiguous();
1700  auto up_contig = up.To(core::Float64).Contiguous();
1701 
1702  Eigen::Map<const Eigen::Vector3d> center_map(
1703  center_contig.GetDataPtr<double>());
1704  Eigen::Map<const Eigen::Vector3d> eye_map(eye_contig.GetDataPtr<double>());
1705  Eigen::Map<const Eigen::Vector3d> up_map(up_contig.GetDataPtr<double>());
1706 
1707  Eigen::Matrix3d R = Eigen::Matrix3d::Identity();
1708  R.row(1) = up_map / up_map.norm();
1709  R.row(2) = center_map - eye_map;
1710  R.row(2) /= R.row(2).norm();
1711  R.row(0) = R.row(1).cross(R.row(2));
1712  R.row(0) /= R.row(0).norm();
1713  R.row(1) = R.row(2).cross(R.row(0));
1714  Eigen::Vector3d t = -R * eye_map;
1715 
1716  core::Tensor extrinsic_matrix =
1717  core::Tensor::Eye(4, core::Float64, core::Device());
1718  Eigen::Map<Eigen::MatrixXd> extrinsic_matrix_map(
1719  extrinsic_matrix.GetDataPtr<double>(), 4, 4);
1720  extrinsic_matrix_map.block(3, 0, 1, 3) = t.transpose();
1721  extrinsic_matrix_map.block(0, 0, 3, 3) = R.transpose();
1722 
1723  return CreateRaysPinhole(intrinsic_matrix, extrinsic_matrix, width_px,
1724  height_px);
1725 }
1726 
1727 uint32_t RaycastingScene::INVALID_ID() { return RTC_INVALID_GEOMETRY_ID; }
1728 
1729 } // namespace geometry
1730 } // namespace t
1731 } // namespace cloudViewer
1732 namespace fmt {
1733 template <>
1734 struct formatter<RTCError> {
1735  template <typename FormatContext>
1736  auto format(const RTCError& c, FormatContext& ctx) const {
1737  const char* name = rtcGetErrorString(c);
1738  return format_to(ctx.out(), name);
1739  }
1740 
1741  template <typename ParseContext>
1742  constexpr auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
1743  return ctx.begin();
1744  }
1745 };
1746 } // namespace fmt
constexpr double M_PI
Pi.
Definition: CVConst.h:19
std::string name
math::float2 uv
#define AssertTensorDevice(tensor,...)
Definition: TensorCheck.h:45
#define AssertTensorDtype(tensor,...)
Definition: TensorCheck.h:21
#define AssertTensorShape(tensor,...)
Definition: TensorCheck.h:61
std::vector< UVAtlasVertex > vb
#define NULL
core::Tensor result
Definition: VtkUtils.cpp:76
bool copy
Definition: VtkUtils.cpp:74
A class to compute the Error function (erf)
Definition: ErrorFunction.h:28
bool IsSYCL() const
Returns true iff device type is SYCL GPU.
Definition: Device.h:52
std::string ToString() const
Definition: SizeVector.cpp:132
Tensor Contiguous() const
Definition: Tensor.cpp:772
int64_t NumDims() const
Definition: Tensor.h:1172
int64_t GetLength() const
Definition: Tensor.h:1125
Device GetDevice() const override
Definition: Tensor.cpp:1435
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739
A scene class with basic ray casting and closest point queries.
core::Tensor CountIntersections(const core::Tensor &rays, const int nthreads=0)
Computes the number of intersection of the rays with the scene.
A triangle mesh contains vertices and triangles.
Definition: TriangleMesh.h:98
#define LogError(...)
Definition: Logging.h:60
__device__ __forceinline__ float infinity()
Definition: result_set.h:36
int max(int a, int b)
Definition: cutil_math.h:48
Helper functions for the ml ops.
ImGuiContext * context
Definition: Window.cpp:76
static double dist(double x1, double y1, double x2, double y2)
Definition: lsd.c:207
static void error(char *msg)
Definition: lsd.c:159
void CountIntersectionsFunc(const RTCFilterFunctionNArguments *args)
void ListIntersectionsFunc(const RTCFilterFunctionNArguments *args)
const Dtype UInt32
Definition: Dtype.cpp:50
void Zeros(benchmark::State &state, const Device &device)
Definition: Zeros.cpp:16
const Dtype Float64
Definition: Dtype.cpp:43
const Dtype Int32
Definition: Dtype.cpp:46
const Dtype Float32
Definition: Dtype.cpp:42
constexpr nullopt_t nullopt
Definition: Optional.h:136
Generic file read and write utility for python interface.
std::string to_string(const T &n)
Definition: Common.h:20
unsigned Bool
Definition: sqlite3.c:20710
void ComputeClosestPoints(const float *const query_points, const size_t num_query_points, float *closest_points, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads) override
void ListIntersections(const float *const rays, const size_t num_rays, const size_t num_intersections, int *cumsum, unsigned int *track_intersections, unsigned int *ray_ids, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *t_hit, const int nthreads) override
void ArraySum(int *data_ptr, size_t num_elements, size_t &result) override
void CopyArray(int *src, uint32_t *dst, size_t num_elements) override
void ArrayPartialSum(int *input, int *output, size_t num_elements) override
void CastRays(const float *const rays, const size_t num_rays, float *t_hit, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads, const bool line_intersection) override
void TestOcclusions(const float *const rays, const size_t num_rays, const float tnear, const float tfar, int8_t *occluded, const int nthreads) override
void CountIntersections(const float *const rays, const size_t num_rays, int *intersections, const int nthreads) override
virtual void ListIntersections(const float *const rays, const size_t num_rays, const size_t num_intersections, int *cumsum, unsigned int *track_intersections, unsigned int *ray_ids, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *t_hit, const int nthreads)=0
virtual void TestOcclusions(const float *const rays, const size_t num_rays, const float tnear, const float tfar, int8_t *occluded, const int nthreads)=0
virtual void CopyArray(int *src, uint32_t *dst, size_t num_elements)=0
virtual void ArraySum(int *data_ptr, size_t num_elements, size_t &result)=0
virtual void CastRays(const float *const rays, const size_t num_rays, float *t_hit, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads, const bool line_intersection)=0
virtual void ComputeClosestPoints(const float *const query_points, const size_t num_query_points, float *closest_points, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads)=0
virtual void ArrayPartialSum(int *input, int *output, size_t num_elements)=0
virtual void CountIntersections(const float *const rays, const size_t num_rays, int *intersections, const int nthreads)=0
auto format(const RTCError &c, FormatContext &ctx) const
constexpr auto parse(ParseContext &ctx) -> decltype(ctx.begin())