30 #ifndef FLANN_CUDA_KD_TREE_BUILDER_H_
31 #define FLANN_CUDA_KD_TREE_BUILDER_H_
32 #include <thrust/host_vector.h>
33 #include <thrust/device_vector.h>
34 #include <thrust/sort.h>
35 #include <thrust/partition.h>
36 #include <thrust/unique.h>
37 #include <thrust/scan.h>
98 namespace kd_tree_builder_detail
136 return (thrust::get<1>(i)& 1)==0;
160 MovePointsToChildNodes(
int* child1,
SplitInfo* splits,
float* x,
float* y,
float* z,
int* ox,
int* oy,
int* oz,
int* lrx,
int* lry,
int* lrz )
161 :
child1_(child1),
splits_(splits),
x_(x),
y_(y),
z_(z),
ox_(ox),
oy_(oy),
oz_(oz),
lrx_(lrx),
lry_(lry),
lrz_(lrz){}
175 void operator()(
const thrust::tuple<int, int, int, int>& data )
177 int index = thrust::get<0>(data);
178 int owner =
ox_[index];
179 int point_ind1=thrust::get<1>(data);
180 int point_ind2=thrust::get<2>(data);
181 int point_ind3=thrust::get<3>(data);
184 float dim_val1, dim_val2, dim_val3;
190 if( leftChild==-1 ) {
196 switch( split_dim ) {
198 dim_val1=
x_[point_ind1];
199 dim_val2=
x_[point_ind2];
200 dim_val3=
x_[point_ind3];
203 dim_val1=
y_[point_ind1];
204 dim_val2=
y_[point_ind2];
205 dim_val3=
y_[point_ind3];
208 dim_val1=
z_[point_ind1];
209 dim_val2=
z_[point_ind2];
210 dim_val3=
z_[point_ind3];
216 int r1=leftChild +(dim_val1 > split.
split_val);
218 int r2=leftChild+(dim_val2 > split.
split_val);
282 void operator()( thrust::tuple<int&, int&,SplitInfo&,float4&,float4&, int> node )
284 int& parent=thrust::get<0>(node);
285 int& child1=thrust::get<1>(node);
287 const float4& aabbMin=thrust::get<3>(node);
288 const float4& aabbMax=thrust::get<4>(node);
289 int my_index = thrust::get<5>(node);
290 bool split_node=
false;
292 __shared__
int block_nodes_to_allocate;
293 if( threadIdx.x== 0 ) block_nodes_to_allocate=0;
298 bool all_points_in_node_are_equal=aabbMin.x == aabbMax.x && aabbMin.y==aabbMax.y && aabbMin.z==aabbMax.z;
300 int offset_to_global=0;
305 offset_to_global = atomicAdd( &block_nodes_to_allocate,2 );
309 __shared__
int block_left;
310 __shared__
bool enough_space;
312 if( threadIdx.x==0) {
313 block_left = atomicAdd(
node_count, block_nodes_to_allocate );
316 if( !enough_space ) {
317 atomicAdd(
node_count, -block_nodes_to_allocate );
328 if( split_node && enough_space ) {
329 int left = block_left + offset_to_global;
337 float4 aabbDim=aabbMax-aabbMin;
339 float maxDimLength=aabbDim.x;
340 float4 splitVal=(aabbMax+aabbMin);
342 for(
int i=1; i<=2; i++ ) {
344 if( val > maxDimLength ) {
377 return val_[id] ? f : t;
386 thrust::tuple<float,float,float>
operator()(
const float4& val )
388 return thrust::make_tuple(val.x, val.y, val.z);
415 child1_=
new thrust::device_vector<int>(prealloc,-1);
416 parent_=
new thrust::device_vector<int>(prealloc,-1);
420 splits_=
new thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>(prealloc,s);
424 aabb_min_=
new thrust::device_vector<float4>(prealloc);
425 aabb_max_=
new thrust::device_vector<float4>(prealloc);
491 thrust::counting_iterator<int> it(0);
497 thrust::device_vector<float> tmpv(
points_->size());
501 thrust::sort_by_key( tmpv.begin(), tmpv.end(),
index_x_->begin() );
503 thrust::sort_by_key( tmpv.begin(), tmpv.end(),
index_y_->begin() );
505 thrust::sort_by_key( tmpv.begin(), tmpv.end(),
index_z_->begin() );
511 #ifdef PRINT_DEBUG_TIMING
512 cudaDeviceSynchronize();
513 std::cout<<
" initial stuff:"<<stepTimer.elapsed()<<
std::endl;
516 int last_node_count=0;
517 for(
int i=0;; i++ ) {
528 thrust::counting_iterator<int> cit(0);
530 thrust::make_zip_iterator(thrust::make_tuple(
parent_->begin()+last_node_count,
child1_->begin()+last_node_count,
splits_->begin()+last_node_count,
aabb_min_->begin()+last_node_count,
aabb_max_->begin()+last_node_count,cit+last_node_count )),
535 if( last_node_count == alloc_info[
NodeCount] ) {
547 #ifdef PRINT_DEBUG_TIMING
548 cudaDeviceSynchronize();
549 std::cout<<
" node split:"<<stepTimer.elapsed()<<
std::endl;
555 thrust::raw_pointer_cast(&(*
splits_)[0]),
556 thrust::raw_pointer_cast(&(*
points_x_)[0]),
557 thrust::raw_pointer_cast(&(*
points_y_)[0]),
558 thrust::raw_pointer_cast(&(*
points_z_)[0]),
559 thrust::raw_pointer_cast(&(*
owners_x_)[0]),
560 thrust::raw_pointer_cast(&(*
owners_y_)[0]),
561 thrust::raw_pointer_cast(&(*
owners_z_)[0]),
566 thrust::counting_iterator<int> ci0(0);
567 thrust::for_each( thrust::make_zip_iterator( thrust::make_tuple( ci0,
index_x_->begin(),
index_y_->begin(),
index_z_->begin()) ),
570 #ifdef PRINT_DEBUG_TIMING
571 cudaDeviceSynchronize();
572 std::cout<<
" set new owners:"<<stepTimer.elapsed()<<
std::endl;
585 #ifdef PRINT_DEBUG_TIMING
586 cudaDeviceSynchronize();
587 std::cout<<
" split:"<<stepTimer.elapsed()<<
std::endl;
592 #ifdef PRINT_DEBUG_TIMING
593 cudaDeviceSynchronize();
594 std::cout<<
" update_leftright_and_aabb:"<<stepTimer.elapsed()<<
std::endl;
596 print_vector(node_count_);
602 template<
class Distance>
610 update_leftright_and_aabb(
const thrust::device_vector<float>& x,
const thrust::device_vector<float>& y,
const thrust::device_vector<float>& z,
611 const thrust::device_vector<int>& ix,
const thrust::device_vector<int>& iy,
const thrust::device_vector<int>& iz,
612 const thrust::device_vector<int>& owners,
613 thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>& splits, thrust::device_vector<float4>& aabbMin,thrust::device_vector<float4>& aabbMax)
615 thrust::device_vector<int>* labelsUnique=
tmp_owners_;
616 thrust::device_vector<int>* countsUnique=
tmp_index_;
620 int unique_labels = thrust::unique_by_key_copy( owners.begin(), owners.end(), thrust::counting_iterator<int>(0), labelsUnique->begin(), countsUnique->begin()).first - labelsUnique->begin();
626 s.
nodes=thrust::raw_pointer_cast(&(splits[0]));
627 s.
counts=thrust::raw_pointer_cast(&( (*countsUnique)[0]));
628 s.
labels=thrust::raw_pointer_cast(&( (*labelsUnique)[0]));
629 s.
x=thrust::raw_pointer_cast(&x[0]);
630 s.
y=thrust::raw_pointer_cast(&y[0]);
631 s.
z=thrust::raw_pointer_cast(&z[0]);
632 s.
ix=thrust::raw_pointer_cast(&ix[0]);
633 s.
iy=thrust::raw_pointer_cast(&iy[0]);
634 s.
iz=thrust::raw_pointer_cast(&iz[0]);
635 s.
aabbMin=thrust::raw_pointer_cast(&aabbMin[0]);
636 s.
aabbMax=thrust::raw_pointer_cast(&aabbMax[0]);
638 thrust::counting_iterator<int> it(0);
639 thrust::for_each(it, it+unique_labels, s);
649 void separate_left_and_right_children( thrust::device_vector<int>& key_in, thrust::device_vector<int>& val_in, thrust::device_vector<int>& key_out, thrust::device_vector<int>& val_out, thrust::device_vector<int>& left_right_marks,
bool scatter_val_out=
true )
651 thrust::device_vector<int>* f_tmp = &val_out;
652 thrust::device_vector<int>* addr_tmp =
tmp_misc_;
654 thrust::exclusive_scan( left_right_marks.begin()
655 , left_right_marks.end()
658 sa.
val_=thrust::raw_pointer_cast(&left_right_marks[0]);
659 sa.
f_=thrust::raw_pointer_cast(&(*f_tmp)[0]);
661 thrust::counting_iterator<int> it(0);
662 thrust::transform(it, it+val_in.size(), addr_tmp->begin(), sa);
664 thrust::scatter(key_in.begin(), key_in.end(), addr_tmp->begin(), key_out.begin());
665 if( scatter_val_out ) thrust::scatter(val_in.begin(), val_in.end(), addr_tmp->begin(), val_out.begin());
672 size_t add = new_size -
child1_->size();
695 thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>*
splits_;
void resize_node_vectors(size_t new_size)
thrust::device_vector< float4 > * aabb_max_
max aabb value of each node
thrust::device_vector< int > * index_x_
thrust::device_vector< int > * leftright_z_
thrust::device_vector< int > * owners_y_
thrust::device_vector< int > * tmp_index_
thrust::device_vector< int > * index_z_
void separate_left_and_right_children(thrust::device_vector< int > &key_in, thrust::device_vector< int > &val_in, thrust::device_vector< int > &key_out, thrust::device_vector< int > &val_out, thrust::device_vector< int > &left_right_marks, bool scatter_val_out=true)
thrust::device_vector< int > * index_y_
thrust::device_vector< int > * leftright_x_
thrust::device_vector< int > * owners_z_
thrust::device_vector< float > * points_x_
const thrust::device_vector< float4 > * points_
thrust::device_vector< float4 > * aabb_min_
min aabb value of each node
thrust::device_vector< int > allocation_info_
thrust::device_vector< float > * points_y_
thrust::device_vector< int > * tmp_owners_
thrust::device_vector< int > * tmp_misc_
CudaKdTreeBuilder(const thrust::device_vector< float4 > &points, int max_leaf_size)
thrust::device_vector< float > * points_z_
thrust::device_vector< int > * leftright_y_
void update_leftright_and_aabb(const thrust::device_vector< float > &x, const thrust::device_vector< float > &y, const thrust::device_vector< float > &z, const thrust::device_vector< int > &ix, const thrust::device_vector< int > &iy, const thrust::device_vector< int > &iz, const thrust::device_vector< int > &owners, thrust::device_vector< cuda::kd_tree_builder_detail::SplitInfo > &splits, thrust::device_vector< float4 > &aabbMin, thrust::device_vector< float4 > &aabbMax)
takes the partitioned nodes, and sets the left-/right info of leaf nodes, as well as the AABBs
thrust::device_vector< int > * child1_
thrust::device_vector< cuda::kd_tree_builder_detail::SplitInfo > * splits_
split info (dim/value or left/right pointers)
thrust::device_vector< int > * parent_
parent node of each node
thrust::device_vector< int > * owners_x_
__host__ __device__ float4 make_float4(float s)
QTextStream & endl(QTextStream &stream)
__host__ __device__ float get_value_by_index(const float4 &f, int i)
std::ostream & operator<<(std::ostream &stream, const cuda::kd_tree_builder_detail::SplitInfo &s)
void swap(cloudViewer::core::SmallVectorImpl< T > &LHS, cloudViewer::core::SmallVectorImpl< T > &RHS)
Implement std::swap in terms of SmallVector swap.
__device__ int operator()(int i)
MovePointsToChildNodes(int *child1, SplitInfo *splits, float *x, float *y, float *z, int *ox, int *oy, int *oz, int *lrx, int *lry, int *lrz)
__device__ void operator()(const thrust::tuple< int, int, int, int > &data)
__host__ __device__ bool operator()(const thrust::tuple< int, int > &i)
used to update the left/right pointers and aabb infos after the node splits
__host__ __device__ void operator()(int i)
__device__ void operator()(thrust::tuple< int &, int &, SplitInfo &, float4 &, float4 &, int > node)
__device__ thrust::tuple< float, float, float > operator()(const float4 &val)
__device__ int operator()(int id)