10 #pragma warning(disable : 4005)
14 #ifdef BUILD_SYCL_MODULE
15 #include <sycl/sycl.hpp>
18 #include <embree4/rtcore.h>
26 #include <tbb/parallel_for.h>
31 #include <unsupported/Eigen/AlignedVector3>
50 #ifdef BUILD_SYCL_MODULE
52 const RTCFilterFunctionNArguments* args) {
56 int* valid = args->valid;
59 struct RTCRayN* rayN = args->ray;
60 struct RTCHitN* hitN = args->hit;
61 const unsigned int N = args->N;
67 context->previous_geom_prim_ID_tfar;
68 int* intersections =
context->intersections;
71 for (
unsigned int ui = 0; ui < N; ui += 1) {
73 unsigned int vi = ui + 0;
74 if (vi >= N)
continue;
77 if (valid[vi] != -1)
continue;
80 RTCRay ray = rtcGetRayFromRayN(rayN, N, ui);
81 RTCHit hit = rtcGetHitFromHitN(hitN, N, ui);
83 unsigned int ray_id = ray.id;
85 auto& prev_gpIDtfar = previous_geom_prim_ID_tfar[ray_id];
86 if (prev_gpIDtfar.geomID != hit.geomID ||
87 (prev_gpIDtfar.primID != hit.primID &&
88 prev_gpIDtfar.ray_tfar != ray.tfar)) {
89 ++(intersections[ray_id]);
90 previous_geom_prim_ID_tfar[ray_id] = gpID;
109 #ifdef BUILD_SYCL_MODULE
111 const RTCFilterFunctionNArguments* args) {
115 int* valid = args->valid;
118 struct RTCRayN* rayN = args->ray;
119 struct RTCHitN* hitN = args->hit;
120 const unsigned int N = args->N;
123 if (
context ==
nullptr)
return;
126 context->previous_geom_prim_ID_tfar;
127 unsigned int* ray_ids =
context->ray_ids;
128 unsigned int* geometry_ids =
context->geometry_ids;
129 unsigned int* primitive_ids =
context->primitive_ids;
130 float* primitive_uvs =
context->primitive_uvs;
133 unsigned int* track_intersections =
context->track_intersections;
136 for (
unsigned int ui = 0; ui < N; ui += 1) {
138 unsigned int vi = ui + 0;
139 if (vi >= N)
continue;
142 if (valid[vi] != -1)
continue;
145 RTCRay ray = rtcGetRayFromRayN(rayN, N, ui);
146 RTCHit hit = rtcGetHitFromHitN(hitN, N, ui);
148 unsigned int ray_id = ray.id;
150 auto& prev_gpIDtfar = previous_geom_prim_ID_tfar[ray_id];
151 if (prev_gpIDtfar.geomID != hit.geomID ||
152 (prev_gpIDtfar.primID != hit.primID &&
153 prev_gpIDtfar.ray_tfar != ray.tfar)) {
154 size_t idx = cumsum[ray_id] + track_intersections[ray_id];
155 ray_ids[idx] = ray_id;
156 geometry_ids[idx] = hit.
geomID;
157 primitive_ids[idx] = hit.primID;
158 primitive_uvs[idx * 2 + 0] = hit.u;
159 primitive_uvs[idx * 2 + 1] = hit.v;
160 t_hit[idx] = ray.tfar;
161 previous_geom_prim_ID_tfar[ray_id] = gpID;
162 ++(track_intersections[ray_id]);
173 typedef Eigen::AlignedVector3<float> Vec3fa;
175 typedef Eigen::Matrix<float, 2, 1, Eigen::DontAlign> Vec2f;
176 typedef Eigen::Vector3f Vec3f;
179 void ErrorFunction(
void* userPtr,
enum RTCError
error,
const char* str) {
181 rtcGetErrorString(
error), str);
186 template <
class DTYPE>
187 void AssertTensorDtypeLastDimDeviceMinNDim(
189 const std::string& tensor_name,
192 int64_t min_ndim = 2) {
194 if (tensor.
NumDims() < min_ndim) {
196 "{} Tensor ndim is {} but expected ndim >= {}", tensor_name,
201 "The last dimension of the {} Tensor must be {} but got "
202 "Tensor with shape {}",
206 tensor, cloudViewer::core::Dtype::FromType<DTYPE>());
210 template <
typename Vec3faType>
211 inline Vec3faType closestPointTriangle(Vec3faType
const& p,
217 const Vec3faType ab = b - a;
218 const Vec3faType ac = c - a;
219 const Vec3faType ap = p - a;
221 const float d1 = ab.dot(ap);
222 const float d2 = ac.dot(ap);
223 if (d1 <= 0.f && d2 <= 0.f) {
229 const Vec3faType bp = p - b;
230 const float d3 = ab.dot(bp);
231 const float d4 = ac.dot(bp);
232 if (d3 >= 0.f && d4 <= d3) {
238 const Vec3faType cp = p - c;
239 const float d5 = ab.dot(cp);
240 const float d6 = ac.dot(cp);
241 if (d6 >= 0.f && d5 <= d6) {
247 const float vc = d1 * d4 - d3 * d2;
248 if (vc <= 0.f && d1 >= 0.f && d3 <= 0.f) {
249 const float v = d1 / (d1 - d3);
255 const float vb = d5 * d2 - d1 * d6;
256 if (vb <= 0.f && d2 >= 0.f && d6 <= 0.f) {
257 const float v = d2 / (d2 - d6);
263 const float va = d3 * d6 - d5 * d4;
264 if (va <= 0.f && (d4 - d3) >= 0.f && (d5 - d6) >= 0.f) {
265 const float v = (d4 - d3) / ((d4 - d3) + (d5 - d6));
268 return b + v * (c - b);
271 const float denom = 1.f / (va +
vb + vc);
272 const float v =
vb * denom;
273 const float w = vc * denom;
276 return a + v * ab + w * ac;
280 RTCGeometryType geom_type;
285 template <
typename Vec3fType,
typename Vec2fType>
286 struct ClosestPointResult {
288 : primID(RTC_INVALID_GEOMETRY_ID), geomID(RTC_INVALID_GEOMETRY_ID) {}
295 GeometryPtr* geometry_ptrs_ptr;
299 template <
typename Vec3fType,
typename Vec3faType,
typename Vec2fType>
300 bool ClosestPointFunc(RTCPointQueryFunctionArguments* args) {
301 assert(args->userPtr);
302 const unsigned int geomID = args->geomID;
303 const unsigned int primID = args->primID;
306 Vec3faType q(args->query->x, args->query->y, args->query->z);
308 ClosestPointResult<Vec3fType, Vec2fType>*
result =
309 static_cast<ClosestPointResult<Vec3fType, Vec2fType>*
>(
311 const RTCGeometryType geom_type =
312 result->geometry_ptrs_ptr[geomID].geom_type;
313 const void* ptr1 =
result->geometry_ptrs_ptr[geomID].ptr1;
314 const void* ptr2 =
result->geometry_ptrs_ptr[geomID].ptr2;
316 if (RTC_GEOMETRY_TYPE_TRIANGLE == geom_type) {
317 const float* vertex_positions = (
const float*)ptr1;
318 const uint32_t* triangle_indices = (
const uint32_t*)ptr2;
321 vertex_positions[3 * triangle_indices[3 * primID + 0] + 0],
322 vertex_positions[3 * triangle_indices[3 * primID + 0] + 1],
323 vertex_positions[3 * triangle_indices[3 * primID + 0] + 2]);
325 vertex_positions[3 * triangle_indices[3 * primID + 1] + 0],
326 vertex_positions[3 * triangle_indices[3 * primID + 1] + 1],
327 vertex_positions[3 * triangle_indices[3 * primID + 1] + 2]);
329 vertex_positions[3 * triangle_indices[3 * primID + 2] + 0],
330 vertex_positions[3 * triangle_indices[3 * primID + 2] + 1],
331 vertex_positions[3 * triangle_indices[3 * primID + 2] + 2]);
336 closestPointTriangle<Vec3faType>(q, v0, v1, v2, u, v);
337 float d = (q - p).norm();
342 if (d < args->query->radius) {
343 args->query->radius = d;
347 Vec3faType e1 = v1 - v0;
348 Vec3faType e2 = v2 - v0;
349 result->uv = Vec2fType(u, v);
350 result->n = (e1.cross(e2)).normalized();
377 if (!scene_committed_) {
378 if (devprop_join_commit) {
379 rtcJoinCommitScene(scene_);
381 rtcCommitScene(scene_);
383 scene_committed_ =
true;
388 const size_t num_rays,
390 unsigned int* geometry_ids,
391 unsigned int* primitive_ids,
392 float* primitive_uvs,
393 float* primitive_normals,
395 const bool line_intersection) = 0;
398 const size_t num_rays,
402 const int nthreads) = 0;
405 const size_t num_rays,
407 const int nthreads) = 0;
410 const size_t num_rays,
411 const size_t num_intersections,
413 unsigned int* track_intersections,
414 unsigned int* ray_ids,
415 unsigned int* geometry_ids,
416 unsigned int* primitive_ids,
417 float* primitive_uvs,
419 const int nthreads) = 0;
422 const size_t num_query_points,
423 float* closest_points,
424 unsigned int* geometry_ids,
425 unsigned int* primitive_ids,
426 float* primitive_uvs,
427 float* primitive_normals,
428 const int nthreads) = 0;
436 size_t num_elements) = 0;
438 virtual void CopyArray(
int* src, uint32_t* dst,
size_t num_elements) = 0;
441 #ifdef BUILD_SYCL_MODULE
446 sycl::device sycl_device_;
452 if (li_previous_geom_prim_ID_tfar) {
453 sycl::free(li_previous_geom_prim_ID_tfar, queue_);
455 if (ci_previous_geom_prim_ID_tfar) {
456 sycl::free(ci_previous_geom_prim_ID_tfar, queue_);
460 void InitializeDevice() {
462 sycl_device_ = sycl::device(rtcSYCLDeviceSelector);
463 }
catch (std::exception& e) {
469 queue_ = sycl::queue(sycl_device_);
472 device_ = rtcNewSYCLDevice(context_,
"");
473 rtcSetDeviceSYCLDevice(device_, sycl_device_);
477 rtcGetDeviceError(
NULL));
481 void CastRays(
const float*
const rays,
482 const size_t num_rays,
484 unsigned int* geometry_ids,
485 unsigned int* primitive_ids,
486 float* primitive_uvs,
487 float* primitive_normals,
489 const bool line_intersection)
override {
492 auto scene = this->scene_;
493 queue_.submit([=](sycl::handler& cgh) {
495 sycl::range<1>(num_rays),
496 [=](sycl::item<1> item, sycl::kernel_handler kh) {
497 const size_t i = item.get_id(0);
500 const float* r = &rays[i * 6];
504 if (line_intersection) {
505 rh.ray.dir_x = r[3] - r[0];
506 rh.ray.dir_y = r[4] - r[1];
507 rh.ray.dir_z = r[5] - r[2];
514 if (line_intersection) {
523 rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
524 rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
526 rtcIntersect1(scene, &rh);
528 t_hit[i] = rh.ray.tfar;
529 if (rh.hit.geomID != RTC_INVALID_GEOMETRY_ID) {
530 geometry_ids[i] = rh.hit.geomID;
531 primitive_ids[i] = rh.hit.primID;
532 primitive_uvs[i * 2 + 0] = rh.hit.u;
533 primitive_uvs[i * 2 + 1] = rh.hit.v;
535 1.f / std::sqrt(rh.hit.Ng_x * rh.hit.Ng_x +
536 rh.hit.Ng_y * rh.hit.Ng_y +
537 rh.hit.Ng_z * rh.hit.Ng_z);
538 primitive_normals[i * 3 + 0] =
539 rh.hit.Ng_x * inv_norm;
540 primitive_normals[i * 3 + 1] =
541 rh.hit.Ng_y * inv_norm;
542 primitive_normals[i * 3 + 2] =
543 rh.hit.Ng_z * inv_norm;
545 geometry_ids[i] = RTC_INVALID_GEOMETRY_ID;
546 primitive_ids[i] = RTC_INVALID_GEOMETRY_ID;
547 primitive_uvs[i * 2 + 0] = 0;
548 primitive_uvs[i * 2 + 1] = 0;
549 primitive_normals[i * 3 + 0] = 0;
550 primitive_normals[i * 3 + 1] = 0;
551 primitive_normals[i * 3 + 2] = 0;
555 queue_.wait_and_throw();
558 void TestOcclusions(
const float*
const rays,
559 const size_t num_rays,
563 const int nthreads)
override {
566 auto scene = this->scene_;
567 queue_.submit([=](sycl::handler& cgh) {
569 sycl::range<1>(num_rays),
570 [=](sycl::item<1> item, sycl::kernel_handler kh) {
571 struct RTCRayQueryContext
context;
572 rtcInitRayQueryContext(&
context);
574 RTCOccludedArguments args;
575 rtcInitOccludedArguments(&args);
578 const size_t i = item.get_id(0);
581 const float* r = &rays[i * 6];
594 rtcOccluded1(scene, &ray, &args);
596 occluded[i] = int8_t(
601 queue_.wait_and_throw();
604 void CountIntersections(
const float*
const rays,
605 const size_t num_rays,
607 const int nthreads)
override {
610 queue_.memset(intersections, 0,
sizeof(
int) * num_rays).wait();
612 ci_previous_geom_prim_ID_tfar =
613 sycl::malloc_device<callbacks::GeomPrimID>(num_rays, queue_);
616 if (!ci_previous_geom_prim_ID_tfar) {
617 throw std::runtime_error(
"Failed to allocate device memory");
620 auto host_previous_geom_prim_ID_tfar =
622 std::default_delete<callbacks::GeomPrimID[]>>(
624 for (
size_t i = 0; i < num_rays; ++i) {
625 host_previous_geom_prim_ID_tfar[i] = {
626 uint32_t(RTC_INVALID_GEOMETRY_ID),
627 uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f};
631 queue_.memcpy(ci_previous_geom_prim_ID_tfar,
632 host_previous_geom_prim_ID_tfar.get(),
636 auto scene = this->scene_;
637 auto ci_previous_geom_prim_ID_tfar_ = ci_previous_geom_prim_ID_tfar;
638 queue_.submit([=](sycl::handler& cgh) {
640 sycl::range<1>(num_rays),
641 [=](sycl::item<1> item, sycl::kernel_handler kh) {
643 rtcInitRayQueryContext(&
context.context);
644 context.previous_geom_prim_ID_tfar =
645 ci_previous_geom_prim_ID_tfar_;
646 context.intersections = intersections;
648 RTCIntersectArguments args;
649 rtcInitIntersectArguments(&args);
651 args.context = &
context.context;
653 const size_t i = item.get_id(0);
656 const float* r = &rays[i * 6];
668 rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
669 rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
671 rtcIntersect1(scene, &rh, &args);
674 queue_.wait_and_throw();
677 sycl::free(ci_previous_geom_prim_ID_tfar, queue_);
678 ci_previous_geom_prim_ID_tfar =
nullptr;
681 void ListIntersections(
const float*
const rays,
682 const size_t num_rays,
683 const size_t num_intersections,
685 unsigned int* track_intersections,
686 unsigned int* ray_ids,
687 unsigned int* geometry_ids,
688 unsigned int* primitive_ids,
689 float* primitive_uvs,
691 const int nthreads)
override {
694 queue_.memset(track_intersections, 0,
sizeof(uint32_t) * num_rays)
696 queue_.memset(ray_ids, 0,
sizeof(uint32_t) * num_intersections).wait();
697 queue_.memset(geometry_ids, 0,
sizeof(uint32_t) * num_intersections)
699 queue_.memset(primitive_ids, 0,
sizeof(uint32_t) * num_intersections)
701 queue_.memset(primitive_uvs, 0,
sizeof(
float) * num_intersections * 2)
703 queue_.memset(t_hit, 0,
sizeof(
float) * num_intersections).wait();
705 li_previous_geom_prim_ID_tfar =
706 sycl::malloc_device<callbacks::GeomPrimID>(num_rays, queue_);
709 if (!li_previous_geom_prim_ID_tfar) {
710 throw std::runtime_error(
"Failed to allocate device memory");
713 auto host_previous_geom_prim_ID_tfar =
715 std::default_delete<callbacks::GeomPrimID[]>>(
717 for (
size_t i = 0; i < num_rays; ++i) {
718 host_previous_geom_prim_ID_tfar[i] = {
719 uint32_t(RTC_INVALID_GEOMETRY_ID),
720 uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f};
724 queue_.memcpy(li_previous_geom_prim_ID_tfar,
725 host_previous_geom_prim_ID_tfar.get(),
729 auto scene = this->scene_;
730 auto li_previous_geom_prim_ID_tfar_ = li_previous_geom_prim_ID_tfar;
731 queue_.submit([=](sycl::handler& cgh) {
733 sycl::range<1>(num_rays),
734 [=](sycl::item<1> item, sycl::kernel_handler kh) {
736 rtcInitRayQueryContext(&
context.context);
737 context.previous_geom_prim_ID_tfar =
738 li_previous_geom_prim_ID_tfar_;
740 context.geometry_ids = geometry_ids;
741 context.primitive_ids = primitive_ids;
742 context.primitive_uvs = primitive_uvs;
745 context.track_intersections = track_intersections;
747 RTCIntersectArguments args;
748 rtcInitIntersectArguments(&args);
750 args.context = &
context.context;
752 const size_t i = item.get_id(0);
755 const float* r = &rays[i * 6];
767 rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
768 rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
770 rtcIntersect1(scene, &rh, &args);
773 queue_.wait_and_throw();
776 sycl::free(li_previous_geom_prim_ID_tfar, queue_);
777 li_previous_geom_prim_ID_tfar =
nullptr;
780 void ComputeClosestPoints(
const float*
const query_points,
781 const size_t num_query_points,
782 float* closest_points,
783 unsigned int* geometry_ids,
784 unsigned int* primitive_ids,
785 float* primitive_uvs,
786 float* primitive_normals,
787 const int nthreads)
override {
788 throw std::logic_error(
"Function not yet implemented");
791 void ArraySum(
int* data_ptr,
size_t num_elements,
size_t&
result)
override {
792 sycl::buffer<size_t, 1> result_buf(&
result, sycl::range<1>(1));
794 queue_.submit([&](sycl::handler& cgh) {
796 result_buf.get_access<sycl::access::mode::read_write>(cgh);
798 sycl::range<1>(num_elements),
799 [=](sycl::item<1> item, sycl::kernel_handler kh) {
800 const size_t i = item.get_id(0);
801 sycl::atomic_ref<size_t, sycl::memory_order::relaxed,
802 sycl::memory_scope::device>
803 atomic_result_data(result_acc[0]);
804 atomic_result_data.fetch_add(data_ptr[i]);
807 queue_.wait_and_throw();
810 void ArrayPartialSum(
int* input,
812 size_t num_elements)
override {
813 queue_.submit([&](sycl::handler& cgh) {
814 cgh.single_task([=]() {
815 for (
size_t idx = 1; idx < num_elements; ++idx) {
816 output[idx] = output[idx - 1] + input[idx - 1];
821 queue_.wait_and_throw();
824 void CopyArray(
int* src, uint32_t* dst,
size_t num_elements)
override {
825 queue_.memcpy(dst, src, num_elements *
sizeof(uint32_t)).wait();
832 const size_t BATCH_SIZE = 1024;
835 const size_t num_rays,
837 unsigned int* geometry_ids,
838 unsigned int* primitive_ids,
839 float* primitive_uvs,
840 float* primitive_normals,
842 const bool line_intersection)
override {
845 auto LoopFn = [&](
const tbb::blocked_range<size_t>& range) {
846 std::vector<RTCRayHit> rayhits(range.size());
848 for (
size_t i = range.begin(); i < range.end(); ++i) {
849 RTCRayHit& rh = rayhits[i - range.begin()];
850 const float* r = &rays[i * 6];
854 if (line_intersection) {
855 rh.ray.dir_x = r[3] - r[0];
856 rh.ray.dir_y = r[4] - r[1];
857 rh.ray.dir_z = r[5] - r[2];
864 if (line_intersection) {
870 rh.ray.id = i - range.begin();
872 rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
873 rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
875 rtcIntersect1(scene_, &rh);
878 for (
size_t i = range.begin(); i < range.end(); ++i) {
879 RTCRayHit rh = rayhits[i - range.begin()];
880 size_t idx = rh.ray.id + range.begin();
881 t_hit[idx] = rh.ray.tfar;
882 if (rh.hit.geomID != RTC_INVALID_GEOMETRY_ID) {
883 geometry_ids[idx] = rh.hit.geomID;
884 primitive_ids[idx] = rh.hit.primID;
885 primitive_uvs[idx * 2 + 0] = rh.hit.u;
886 primitive_uvs[idx * 2 + 1] = rh.hit.v;
887 float inv_norm = 1.f / std::sqrt(rh.hit.Ng_x * rh.hit.Ng_x +
888 rh.hit.Ng_y * rh.hit.Ng_y +
889 rh.hit.Ng_z * rh.hit.Ng_z);
890 primitive_normals[idx * 3 + 0] = rh.hit.Ng_x * inv_norm;
891 primitive_normals[idx * 3 + 1] = rh.hit.Ng_y * inv_norm;
892 primitive_normals[idx * 3 + 2] = rh.hit.Ng_z * inv_norm;
894 geometry_ids[idx] = RTC_INVALID_GEOMETRY_ID;
895 primitive_ids[idx] = RTC_INVALID_GEOMETRY_ID;
896 primitive_uvs[idx * 2 + 0] = 0;
897 primitive_uvs[idx * 2 + 1] = 0;
898 primitive_normals[idx * 3 + 0] = 0;
899 primitive_normals[idx * 3 + 1] = 0;
900 primitive_normals[idx * 3 + 2] = 0;
906 tbb::task_arena arena(nthreads);
907 arena.execute([&]() {
909 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
914 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
920 const size_t num_rays,
924 const int nthreads)
override {
927 struct RTCRayQueryContext
context;
928 rtcInitRayQueryContext(&
context);
930 RTCOccludedArguments args;
931 rtcInitOccludedArguments(&args);
934 auto LoopFn = [&](
const tbb::blocked_range<size_t>& range) {
935 std::vector<RTCRay> rayvec(range.size());
936 for (
size_t i = range.begin(); i < range.end(); ++i) {
937 RTCRay& ray = rayvec[i - range.begin()];
938 const float* r = &rays[i * 6];
948 ray.id = i - range.begin();
951 rtcOccluded1(scene_, &ray, &args);
954 for (
size_t i = range.begin(); i < range.end(); ++i) {
955 RTCRay ray = rayvec[i - range.begin()];
956 size_t idx = ray.id + range.begin();
957 occluded[idx] = int8_t(
963 tbb::task_arena arena(nthreads);
964 arena.execute([&]() {
966 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
971 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
977 const size_t num_rays,
979 const int nthreads)
override {
982 std::memset(intersections, 0,
sizeof(
int) * num_rays);
984 auto previous_geom_prim_ID_tfar =
986 std::default_delete<callbacks::GeomPrimID[]>>(
988 for (
size_t i = 0; i < num_rays; ++i) {
989 previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID),
990 uint32_t(RTC_INVALID_GEOMETRY_ID),
995 rtcInitRayQueryContext(&
context.context);
996 context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar.get();
997 context.intersections = intersections;
999 RTCIntersectArguments args;
1000 rtcInitIntersectArguments(&args);
1002 args.context = &
context.context;
1004 auto LoopFn = [&](
const tbb::blocked_range<size_t>& range) {
1005 std::vector<RTCRayHit> rayhits(range.size());
1007 for (
size_t i = range.begin(); i < range.end(); ++i) {
1008 RTCRayHit* rh = &rayhits[i - range.begin()];
1009 const float* r = &rays[i * 6];
1010 rh->ray.org_x = r[0];
1011 rh->ray.org_y = r[1];
1012 rh->ray.org_z = r[2];
1013 rh->ray.dir_x = r[3];
1014 rh->ray.dir_y = r[4];
1015 rh->ray.dir_z = r[5];
1021 rh->hit.geomID = RTC_INVALID_GEOMETRY_ID;
1022 rh->hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
1024 rtcIntersect1(scene_, rh, &args);
1029 tbb::task_arena arena(nthreads);
1030 arena.execute([&]() {
1032 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1037 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1043 const size_t num_rays,
1044 const size_t num_intersections,
1046 unsigned int* track_intersections,
1047 unsigned int* ray_ids,
1048 unsigned int* geometry_ids,
1049 unsigned int* primitive_ids,
1050 float* primitive_uvs,
1052 const int nthreads)
override {
1055 std::memset(track_intersections, 0,
sizeof(uint32_t) * num_rays);
1056 std::memset(ray_ids, 0,
sizeof(uint32_t) * num_intersections);
1057 std::memset(geometry_ids, 0,
sizeof(uint32_t) * num_intersections);
1058 std::memset(primitive_ids, 0,
sizeof(uint32_t) * num_intersections);
1059 std::memset(primitive_uvs, 0,
sizeof(
float) * num_intersections * 2);
1060 std::memset(t_hit, 0,
sizeof(
float) * num_intersections);
1062 auto previous_geom_prim_ID_tfar =
1064 std::default_delete<callbacks::GeomPrimID[]>>(
1066 for (
size_t i = 0; i < num_rays; ++i) {
1067 previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID),
1068 uint32_t(RTC_INVALID_GEOMETRY_ID),
1073 rtcInitRayQueryContext(&
context.context);
1074 context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar.get();
1076 context.geometry_ids = geometry_ids;
1077 context.primitive_ids = primitive_ids;
1078 context.primitive_uvs = primitive_uvs;
1081 context.track_intersections = track_intersections;
1083 RTCIntersectArguments args;
1084 rtcInitIntersectArguments(&args);
1086 args.context = &
context.context;
1088 auto LoopFn = [&](
const tbb::blocked_range<size_t>& range) {
1089 std::vector<RTCRayHit> rayhits(range.size());
1091 for (
size_t i = range.begin(); i < range.end(); ++i) {
1092 RTCRayHit* rh = &rayhits[i - range.begin()];
1093 const float* r = &rays[i * 6];
1094 rh->ray.org_x = r[0];
1095 rh->ray.org_y = r[1];
1096 rh->ray.org_z = r[2];
1097 rh->ray.dir_x = r[3];
1098 rh->ray.dir_y = r[4];
1099 rh->ray.dir_z = r[5];
1105 rh->hit.geomID = RTC_INVALID_GEOMETRY_ID;
1106 rh->hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
1108 rtcIntersect1(scene_, rh, &args);
1113 tbb::task_arena arena(nthreads);
1114 arena.execute([&]() {
1116 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1121 tbb::blocked_range<size_t>(0, num_rays, BATCH_SIZE),
1127 const size_t num_query_points,
1128 float* closest_points,
1129 unsigned int* geometry_ids,
1130 unsigned int* primitive_ids,
1131 float* primitive_uvs,
1132 float* primitive_normals,
1133 const int nthreads)
override {
1136 auto LoopFn = [&](
const tbb::blocked_range<size_t>& range) {
1137 for (
size_t i = range.begin(); i < range.end(); ++i) {
1138 RTCPointQuery query;
1139 query.x = query_points[i * 3 + 0];
1140 query.y = query_points[i * 3 + 1];
1141 query.z = query_points[i * 3 + 2];
1145 ClosestPointResult<Vec3f, Vec2f>
result;
1146 result.geometry_ptrs_ptr = geometry_ptrs_.data();
1148 RTCPointQueryContext instStack;
1149 rtcInitPointQueryContext(&instStack);
1150 rtcPointQuery(scene_, &query, &instStack,
1151 &ClosestPointFunc<Vec3f, Vec3fa, Vec2f>,
1154 closest_points[3 * i + 0] =
result.p.x();
1155 closest_points[3 * i + 1] =
result.p.y();
1156 closest_points[3 * i + 2] =
result.p.z();
1157 geometry_ids[i] =
result.geomID;
1158 primitive_ids[i] =
result.primID;
1159 primitive_uvs[2 * i + 0] =
result.uv.x();
1160 primitive_uvs[2 * i + 1] =
result.uv.y();
1161 primitive_normals[3 * i + 0] =
result.n.x();
1162 primitive_normals[3 * i + 1] =
result.n.y();
1163 primitive_normals[3 * i + 2] =
result.n.z();
1168 tbb::task_arena arena(nthreads);
1169 arena.execute([&]() {
1170 tbb::parallel_for(tbb::blocked_range<size_t>(
1171 0, num_query_points, BATCH_SIZE),
1176 tbb::blocked_range<size_t>(0, num_query_points, BATCH_SIZE),
1182 result = std::accumulate(data_ptr, data_ptr + num_elements,
result);
1187 size_t num_elements)
override {
1189 std::partial_sum(input, input + num_elements - 1, output + 1);
1192 void CopyArray(
int* src, uint32_t* dst,
size_t num_elements)
override {
1193 std::copy(src, src + num_elements, dst);
1197 RaycastingScene::RaycastingScene(int64_t nthreads,
const core::Device& device) {
1198 #ifdef BUILD_SYCL_MODULE
1200 impl_ = std::make_unique<SYCLImpl>();
1201 dynamic_cast<RaycastingScene::SYCLImpl*
>(impl_.get())
1202 ->InitializeDevice();
1203 impl_->tensor_device_ = device;
1206 impl_ = std::make_unique<CPUImpl>();
1210 impl_->device_ = rtcNewDevice(config.c_str());
1212 impl_->device_ = rtcNewDevice(
NULL);
1214 #ifdef BUILD_SYCL_MODULE
1220 impl_->scene_ = rtcNewScene(impl_->device_);
1222 rtcSetSceneFlags(impl_->scene_,
1223 RTC_SCENE_FLAG_ROBUST |
1224 RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS);
1226 impl_->devprop_join_commit = rtcGetDeviceProperty(
1227 impl_->device_, RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED);
1229 impl_->scene_committed_ =
false;
1232 RaycastingScene::~RaycastingScene() {
1233 rtcReleaseScene(impl_->scene_);
1234 rtcReleaseDevice(impl_->device_);
1237 uint32_t RaycastingScene::AddTriangles(
const core::Tensor& vertex_positions,
1246 const size_t num_vertices = vertex_positions.
GetLength();
1247 const size_t num_triangles = triangle_indices.
GetLength();
1250 impl_->scene_committed_ =
false;
1252 rtcNewGeometry(impl_->device_, RTC_GEOMETRY_TYPE_TRIANGLE);
1255 float* vertex_buffer = (
float*)rtcSetNewGeometryBuffer(
1256 geom, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3,
1257 3 *
sizeof(
float), num_vertices);
1259 uint32_t* index_buffer = (uint32_t*)rtcSetNewGeometryBuffer(
1260 geom, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3,
1261 3 *
sizeof(uint32_t), num_triangles);
1265 #ifdef BUILD_SYCL_MODULE
1266 if (impl_->tensor_device_.IsSYCL()) {
1267 dynamic_cast<RaycastingScene::SYCLImpl*
>(impl_.get())
1269 .memcpy(vertex_buffer, data.GetDataPtr(),
1270 sizeof(float) * 3 * num_vertices)
1274 std::memcpy(vertex_buffer, data.GetDataPtr(),
1275 sizeof(
float) * 3 * num_vertices);
1276 #ifdef BUILD_SYCL_MODULE
1282 #ifdef BUILD_SYCL_MODULE
1283 if (impl_->tensor_device_.IsSYCL()) {
1284 dynamic_cast<RaycastingScene::SYCLImpl*
>(impl_.get())
1286 .memcpy(index_buffer, data.GetDataPtr(),
1287 sizeof(uint32_t) * 3 * num_triangles)
1291 std::memcpy(index_buffer, data.GetDataPtr(),
1292 sizeof(uint32_t) * 3 * num_triangles);
1293 #ifdef BUILD_SYCL_MODULE
1297 rtcSetGeometryEnableFilterFunctionFromArguments(geom,
true);
1298 rtcCommitGeometry(geom);
1300 uint32_t geom_id = rtcAttachGeometry(impl_->scene_, geom);
1301 rtcReleaseGeometry(geom);
1303 GeometryPtr geometry_ptr = {RTC_GEOMETRY_TYPE_TRIANGLE,
1304 (
const void*)vertex_buffer,
1305 (
const void*)index_buffer};
1306 impl_->geometry_ptrs_.push_back(geometry_ptr);
1314 "Cannot add mesh with more than {} vertices to the scene",
1321 std::unordered_map<std::string, core::Tensor> RaycastingScene::CastRays(
1323 AssertTensorDtypeLastDimDeviceMinNDim<float>(rays,
"rays", 6,
1324 impl_->tensor_device_);
1328 size_t num_rays = shape.NumElements();
1330 std::unordered_map<std::string, core::Tensor>
result;
1334 result[
"primitive_ids"] =
1337 result[
"primitive_uvs"] =
1340 result[
"primitive_normals"] =
1344 impl_->CastRays(data.GetDataPtr<
float>(), num_rays,
1345 result[
"t_hit"].GetDataPtr<
float>(),
1346 result[
"geometry_ids"].GetDataPtr<uint32_t>(),
1347 result[
"primitive_ids"].GetDataPtr<uint32_t>(),
1348 result[
"primitive_uvs"].GetDataPtr<
float>(),
1349 result[
"primitive_normals"].GetDataPtr<
float>(), nthreads,
1358 const int nthreads) {
1359 AssertTensorDtypeLastDimDeviceMinNDim<float>(rays,
"rays", 6,
1360 impl_->tensor_device_);
1364 size_t num_rays = shape.NumElements();
1369 impl_->TestOcclusions(data.GetDataPtr<
float>(), num_rays, tnear, tfar,
1370 reinterpret_cast<int8_t*
>(
result.GetDataPtr<
bool>()),
1377 const int nthreads) {
1378 AssertTensorDtypeLastDimDeviceMinNDim<float>(rays,
"rays", 6,
1379 impl_->tensor_device_);
1383 size_t num_rays = shape.NumElements();
1385 core::Tensor intersections(shape, core::Dtype::FromType<int>(),
1386 impl_->tensor_device_);
1390 impl_->CountIntersections(data.GetDataPtr<
float>(), num_rays,
1392 return intersections;
1395 std::unordered_map<std::string, core::Tensor>
1397 const int nthreads) {
1398 AssertTensorDtypeLastDimDeviceMinNDim<float>(rays,
"rays", 6,
1399 impl_->tensor_device_);
1404 size_t num_rays = shape.NumElements();
1407 core::Tensor intersections(shape, core::Dtype::FromType<int>(),
1408 impl_->tensor_device_);
1409 core::Tensor track_intersections(shape, core::Dtype::FromType<uint32_t>(),
1410 impl_->tensor_device_);
1412 impl_->CountIntersections(data.GetDataPtr<
float>(), num_rays,
1416 int* data_ptr = intersections.
GetDataPtr<
int>();
1417 size_t num_intersections = 0;
1418 impl_->ArraySum(data_ptr, num_rays, num_intersections);
1423 core::Tensor cumsum_tensor = cumsum_tensor_cpu.
To(impl_->tensor_device_);
1424 int* cumsum_ptr = cumsum_tensor.
GetDataPtr<
int>();
1425 impl_->ArrayPartialSum(data_ptr, cumsum_ptr, num_rays);
1428 std::unordered_map<std::string, core::Tensor>
result;
1430 shape.push_back(num_rays + 1);
1433 uint32_t* ptr =
result[
"ray_splits"].GetDataPtr<uint32_t>();
1434 impl_->CopyArray(cumsum_ptr, ptr, num_rays);
1436 ptr[num_rays] = num_intersections;
1437 shape[0] = num_intersections;
1442 result[
"primitive_ids"] =
1446 result[
"primitive_uvs"] =
1449 impl_->ListIntersections(data.GetDataPtr<
float>(), num_rays,
1450 num_intersections, cumsum_ptr,
1452 result[
"ray_ids"].GetDataPtr<uint32_t>(),
1453 result[
"geometry_ids"].GetDataPtr<uint32_t>(),
1454 result[
"primitive_ids"].GetDataPtr<uint32_t>(),
1455 result[
"primitive_uvs"].GetDataPtr<
float>(),
1456 result[
"t_hit"].GetDataPtr<
float>(), nthreads);
1461 std::unordered_map<std::string, core::Tensor>
1463 const int nthreads) {
1464 AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points,
"query_points",
1465 3, impl_->tensor_device_);
1466 auto shape = query_points.
GetShape();
1469 size_t num_query_points = shape.NumElements();
1471 std::unordered_map<std::string, core::Tensor>
result;
1481 impl_->ComputeClosestPoints(data.GetDataPtr<
float>(), num_query_points,
1482 result[
"points"].GetDataPtr<
float>(),
1483 result[
"geometry_ids"].GetDataPtr<uint32_t>(),
1484 result[
"primitive_ids"].GetDataPtr<uint32_t>(),
1485 result[
"primitive_uvs"].GetDataPtr<
float>(),
1486 result[
"primitive_normals"].GetDataPtr<
float>(),
1493 const int nthreads) {
1494 AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points,
"query_points",
1495 3, impl_->tensor_device_);
1496 auto shape = query_points.
GetShape();
1501 auto closest_points = ComputeClosestPoints(data, nthreads);
1503 size_t num_query_points = shape.NumElements();
1504 Eigen::Map<Eigen::MatrixXf> query_points_map(data.GetDataPtr<
float>(), 3,
1506 Eigen::Map<Eigen::MatrixXf> closest_points_map(
1507 closest_points[
"points"].GetDataPtr<float>(), 3, num_query_points);
1509 Eigen::Map<Eigen::VectorXf> distance_map(distance.
GetDataPtr<
float>(),
1512 distance_map = (closest_points_map - query_points_map).colwise().norm();
1520 const int nthreads = 0,
1521 const int num_votes = 3,
1522 const int inside_val = 1,
1523 const int outside_val = 0) {
1524 auto shape = query_points.
GetShape();
1527 size_t num_query_points = shape.NumElements();
1531 std::mt19937 gen(42);
1532 std::uniform_real_distribution<float>
dist(-0.001, 0.001);
1533 Eigen::MatrixXf ray_dirs(3, num_votes);
1534 ray_dirs = ray_dirs.unaryExpr([&](
float) {
return 1 +
dist(gen); });
1536 auto query_points_ = query_points.
Contiguous();
1537 Eigen::Map<Eigen::MatrixXf> query_points_map(
1538 query_points_.GetDataPtr<
float>(), 3, num_query_points);
1540 core::Tensor rays({int64_t(num_votes * num_query_points), 6},
1542 Eigen::Map<Eigen::MatrixXf> rays_map(rays.GetDataPtr<
float>(), 6,
1543 num_votes * num_query_points);
1544 if (num_votes > 1) {
1545 for (
size_t i = 0; i < num_query_points; ++i) {
1546 for (
int j = 0; j < num_votes; ++j) {
1547 rays_map.col(i * num_votes + j).topRows<3>() =
1548 query_points_map.col(i);
1549 rays_map.col(i * num_votes + j).bottomRows<3>() =
1554 for (
size_t i = 0; i < num_query_points; ++i) {
1555 rays_map.col(i).topRows<3>() = query_points_map.col(i);
1556 rays_map.col(i).bottomRows<3>() = ray_dirs;
1561 Eigen::Map<Eigen::MatrixXi> intersections_map(
1562 intersections.GetDataPtr<
int>(), num_votes, num_query_points);
1564 if (num_votes > 1) {
1566 Eigen::Map<Eigen::VectorXi> result_map(
result.GetDataPtr<
int>(),
1569 intersections_map.unaryExpr([&](
const int x) {
return x % 2; })
1572 .unaryExpr([&](
const int x) {
1573 return (x > num_votes / 2) ? inside_val
1576 return result.Reshape(shape);
1578 intersections_map = intersections_map.unaryExpr([&](
const int x) {
1579 return (x % 2) ? inside_val : outside_val;
1581 return intersections.Reshape(shape);
1589 const int nsamples) {
1590 AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points,
"query_points",
1591 3, impl_->tensor_device_);
1593 if (nsamples < 1 || (nsamples % 2) != 1) {
1595 "nsamples must be odd and >= 1 but is {}", nsamples);
1597 auto shape = query_points.
GetShape();
1600 size_t num_query_points = shape.NumElements();
1602 auto distance = ComputeDistance(data, nthreads);
1603 Eigen::Map<Eigen::VectorXf> distance_map(distance.GetDataPtr<
float>(),
1606 auto inside_outside =
1607 VoteInsideOutside(*
this, data, nthreads, nsamples, -1, 1);
1608 Eigen::Map<Eigen::VectorXi> inside_outside_map(
1609 inside_outside.GetDataPtr<
int>(), num_query_points);
1610 distance_map.array() *= inside_outside_map.array().cast<
float>();
1616 const int nsamples) {
1617 AssertTensorDtypeLastDimDeviceMinNDim<float>(query_points,
"query_points",
1618 3, impl_->tensor_device_);
1620 if (nsamples < 1 || (nsamples % 2) != 1) {
1625 auto result = VoteInsideOutside(*
this, query_points, nthreads, nsamples);
1639 auto intrinsic_matrix_contig =
1641 auto extrinsic_matrix_contig =
1644 Eigen::Map<Eigen::MatrixXd> KT(intrinsic_matrix_contig.GetDataPtr<
double>(),
1646 Eigen::Map<Eigen::MatrixXd> TT(extrinsic_matrix_contig.GetDataPtr<
double>(),
1649 Eigen::Matrix3d invK = KT.transpose().inverse();
1650 Eigen::Matrix3d RT = TT.block(0, 0, 3, 3);
1651 Eigen::Vector3d t = TT.transpose().block(0, 3, 3, 1);
1652 Eigen::Vector3d C = -RT * t;
1653 Eigen::Matrix3f RT_invK = (RT * invK).cast<float>();
1656 Eigen::Map<Eigen::MatrixXf> rays_map(rays.GetDataPtr<
float>(), 6,
1657 height_px * width_px);
1659 Eigen::Matrix<float, 6, 1> r;
1660 r.topRows<3>() = C.cast<
float>();
1661 int64_t linear_idx = 0;
1662 for (
int y = 0; y < height_px; ++y) {
1663 for (
int x = 0; x < width_px; ++x, ++linear_idx) {
1664 Eigen::Vector3f px(x + 0.5f, y + 0.5f, 1);
1665 Eigen::Vector3f ray_dir = RT_invK * px;
1666 r.bottomRows<3>() = ray_dir;
1667 rays_map.col(linear_idx) = r;
1686 double focal_length =
1687 0.5 * width_px / std::tan(0.5 * (
M_PI / 180) * fov_deg);
1691 Eigen::Map<Eigen::MatrixXd> intrinsic_matrix_map(
1692 intrinsic_matrix.
GetDataPtr<
double>(), 3, 3);
1693 intrinsic_matrix_map(0, 0) = focal_length;
1694 intrinsic_matrix_map(1, 1) = focal_length;
1695 intrinsic_matrix_map(2, 0) = 0.5 * width_px;
1696 intrinsic_matrix_map(2, 1) = 0.5 * height_px;
1702 Eigen::Map<const Eigen::Vector3d> center_map(
1703 center_contig.GetDataPtr<
double>());
1704 Eigen::Map<const Eigen::Vector3d> eye_map(eye_contig.GetDataPtr<
double>());
1705 Eigen::Map<const Eigen::Vector3d> up_map(up_contig.GetDataPtr<
double>());
1707 Eigen::Matrix3d R = Eigen::Matrix3d::Identity();
1708 R.row(1) = up_map / up_map.norm();
1709 R.row(2) = center_map - eye_map;
1710 R.row(2) /= R.row(2).norm();
1711 R.row(0) = R.row(1).cross(R.row(2));
1712 R.row(0) /= R.row(0).norm();
1713 R.row(1) = R.row(2).cross(R.row(0));
1714 Eigen::Vector3d t = -R * eye_map;
1718 Eigen::Map<Eigen::MatrixXd> extrinsic_matrix_map(
1719 extrinsic_matrix.
GetDataPtr<
double>(), 4, 4);
1720 extrinsic_matrix_map.block(3, 0, 1, 3) = t.transpose();
1721 extrinsic_matrix_map.block(0, 0, 3, 3) = R.transpose();
1723 return CreateRaysPinhole(intrinsic_matrix, extrinsic_matrix, width_px,
1727 uint32_t RaycastingScene::INVALID_ID() {
return RTC_INVALID_GEOMETRY_ID; }
1734 struct formatter<RTCError> {
1735 template <
typename FormatContext>
1736 auto format(
const RTCError& c, FormatContext& ctx)
const {
1737 const char*
name = rtcGetErrorString(c);
1738 return format_to(ctx.out(),
name);
1741 template <
typename ParseContext>
1742 constexpr
auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
#define AssertTensorDevice(tensor,...)
#define AssertTensorDtype(tensor,...)
#define AssertTensorShape(tensor,...)
std::vector< UVAtlasVertex > vb
A class to compute the Error function (erf)
bool IsSYCL() const
Returns true iff device type is SYCL GPU.
std::string ToString() const
Tensor Contiguous() const
int64_t GetLength() const
Device GetDevice() const override
SizeVector GetShape() const
Tensor To(Dtype dtype, bool copy=false) const
A scene class with basic ray casting and closest point queries.
core::Tensor CountIntersections(const core::Tensor &rays, const int nthreads=0)
Computes the number of intersection of the rays with the scene.
A triangle mesh contains vertices and triangles.
core::Tensor & GetTriangleIndices()
core::Tensor & GetVertexPositions()
__device__ __forceinline__ float infinity()
Helper functions for the ml ops.
static double dist(double x1, double y1, double x2, double y2)
static void error(char *msg)
void CountIntersectionsFunc(const RTCFilterFunctionNArguments *args)
void ListIntersectionsFunc(const RTCFilterFunctionNArguments *args)
void Zeros(benchmark::State &state, const Device &device)
constexpr nullopt_t nullopt
Generic file read and write utility for python interface.
std::string to_string(const T &n)
RTCRayQueryContext context
GeomPrimID * previous_geom_prim_ID_tfar
RTCRayQueryContext context
unsigned int * geometry_ids
unsigned int * track_intersections
GeomPrimID * previous_geom_prim_ID_tfar
unsigned int * primitive_ids
void ComputeClosestPoints(const float *const query_points, const size_t num_query_points, float *closest_points, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads) override
void ListIntersections(const float *const rays, const size_t num_rays, const size_t num_intersections, int *cumsum, unsigned int *track_intersections, unsigned int *ray_ids, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *t_hit, const int nthreads) override
void ArraySum(int *data_ptr, size_t num_elements, size_t &result) override
void CopyArray(int *src, uint32_t *dst, size_t num_elements) override
void ArrayPartialSum(int *input, int *output, size_t num_elements) override
void CastRays(const float *const rays, const size_t num_rays, float *t_hit, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads, const bool line_intersection) override
void TestOcclusions(const float *const rays, const size_t num_rays, const float tnear, const float tfar, int8_t *occluded, const int nthreads) override
void CountIntersections(const float *const rays, const size_t num_rays, int *intersections, const int nthreads) override
virtual void ListIntersections(const float *const rays, const size_t num_rays, const size_t num_intersections, int *cumsum, unsigned int *track_intersections, unsigned int *ray_ids, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *t_hit, const int nthreads)=0
virtual void TestOcclusions(const float *const rays, const size_t num_rays, const float tnear, const float tfar, int8_t *occluded, const int nthreads)=0
virtual void CopyArray(int *src, uint32_t *dst, size_t num_elements)=0
virtual void ArraySum(int *data_ptr, size_t num_elements, size_t &result)=0
std::vector< GeometryPtr > geometry_ptrs_
core::Device tensor_device_
virtual void CastRays(const float *const rays, const size_t num_rays, float *t_hit, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads, const bool line_intersection)=0
virtual void ComputeClosestPoints(const float *const query_points, const size_t num_query_points, float *closest_points, unsigned int *geometry_ids, unsigned int *primitive_ids, float *primitive_uvs, float *primitive_normals, const int nthreads)=0
virtual void ArrayPartialSum(int *input, int *output, size_t num_elements)=0
virtual void CountIntersections(const float *const rays, const size_t num_rays, int *intersections, const int nthreads)=0