cpp_api/api/kdtree__cuda__builder_8h_source.html

 /***********************************************************************

  * Software License Agreement (BSD License)

  *

  * Copyright 2011       Andreas Muetzel (amuetzel@uni-koblenz.de). All rights reserved.

  *

  * THE BSD LICENSE

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions

  * are met:

  *

  * 1. Redistributions of source code must retain the above copyright

  *    notice, this list of conditions and the following disclaimer.

  * 2. Redistributions in binary form must reproduce the above copyright

  *    notice, this list of conditions and the following disclaimer in the

  *    documentation and/or other materials provided with the distribution.

  *

  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

  *************************************************************************/


 #ifndef FLANN_CUDA_KD_TREE_BUILDER_H_

 #define FLANN_CUDA_KD_TREE_BUILDER_H_

 #include <thrust/host_vector.h>

 #include <thrust/device_vector.h>

 #include <thrust/sort.h>

 #include <thrust/partition.h>

 #include <thrust/unique.h>

 #include <thrust/scan.h>

 #include <FLANN/util/cutil_math.h>

 #include <stdlib.h>


 // #define PRINT_DEBUG_TIMING


 namespace flann

 {

 //      template< typename T >

 //      void print_vector(  const thrust::device_vector<T>& v )

 //      {

 //              for( int i=0; i< v.size(); i++ )

 //              {

 //                      std::cout<<v[i]<<std::endl;

 //              }

 //      }

 //

 //      template< typename T1, typename T2 >

 //      void print_vector(  const thrust::device_vector<T1>& v1, const thrust::device_vector<T2>& v2 )

 //      {

 //              for( int i=0; i< v1.size(); i++ )

 //              {

 //                      std::cout<<i<<": "<<v1[i]<<" "<<v2[i]<<std::endl;

 //              }

 //      }

 //

 //      template< typename T1, typename T2, typename T3 >

 //      void print_vector(  const thrust::device_vector<T1>& v1, const thrust::device_vector<T2>& v2, const thrust::device_vector<T3>& v3 )

 //      {

 //              for( int i=0; i< v1.size(); i++ )

 //              {

 //                      std::cout<<i<<": "<<v1[i]<<" "<<v2[i]<<" "<<v3[i]<<std::endl;

 //              }

 //      }

 //

 //      template< typename T >

 //      void print_vector_by_index(  const thrust::device_vector<T>& v,const thrust::device_vector<int>& ind )

 //      {

 //              for( int i=0; i< v.size(); i++ )

 //              {

 //                      std::cout<<v[ind[i]]<<std::endl;

 //              }

 //      }


 //      std::ostream& operator <<(std::ostream& stream, const cuda::kd_tree_builder_detail::SplitInfo& s) {

 //      stream<<"(split l/r: "<< s.left <<" "<< s.right<< "  split:"<<s.split_dim<<" "<<s.split_val<<")";

 //      return stream;

 // }

 //

 //

 // std::ostream& operator <<(std::ostream& stream, const cuda::kd_tree_builder_detail::NodeInfo& s) {

 //      stream<<"(node: "<<s.child1()<<" "<<s.parent()<<" "<<s.child2()<<")";

 //      return stream;

 // }

 //

 // std::ostream& operator <<(std::ostream& stream, const float4& s) {

 //      stream<<"("<<s.x<<","<<s.y<<","<<s.z<<","<<s.w<<")";

 //      return stream;

 // }

 namespace cuda

 {

 namespace kd_tree_builder_detail

 {

 struct SplitInfo

 {

     union {

         struct

         {

             // begin of child nodes

             int left;

             // end of child nodes

             int right;

         };

         struct

         {

             int split_dim;

             float split_val;

         };

     };


 };


 struct IsEven

 {

     typedef int result_type;

     __device__

     int operator()(int i )

     {

         return (i& 1)==0;

     }

 };


 struct SecondElementIsEven

 {

     __host__ __device__

     bool operator()( const thrust::tuple<int,int>& i )

     {

         return (thrust::get<1>(i)& 1)==0;

     }

 };


 __host__ __device__

 float get_value_by_index( const float4& f, int i )

 {

     switch(i) {

     case 0:

         return f.x;

     case 1:

         return f.y;

     default:

         return f.z;

     }


 }


 struct MovePointsToChildNodes

 {

     MovePointsToChildNodes( int* child1, SplitInfo* splits, float* x, float* y, float* z, int* ox, int* oy, int* oz, int* lrx, int* lry, int* lrz )

         : child1_(child1), splits_(splits), x_(x), y_(y), z_(z), ox_(ox), oy_(oy), oz_(oz), lrx_(lrx), lry_(lry), lrz_(lrz){}

     //  int dim;

     //  float threshold;

     int* child1_;

     SplitInfo* splits_;


     // coordinate values

     float* x_, * y_, * z_;

     // owner indices -> which node does the point belong to?

     int* ox_, * oy_, * oz_;

     // temp info: will be set to 1 of a point is moved to the right child node, 0 otherwise

     // (used later in the scan op to separate the points of the children into continuous ranges)

     int* lrx_, * lry_, * lrz_;

     __device__

     void operator()( const thrust::tuple<int, int, int, int>& data )

     {

         int index = thrust::get<0>(data);

         int owner = ox_[index]; // before a split, all points at the same position in the index array have the same owner

         int point_ind1=thrust::get<1>(data);

         int point_ind2=thrust::get<2>(data);

         int point_ind3=thrust::get<3>(data);

         int leftChild=child1_[owner];

         int split_dim;

         float dim_val1, dim_val2, dim_val3;

         SplitInfo split;

         lrx_[index]=0;

         lry_[index]=0;

         lrz_[index]=0;

         // this element already belongs to a leaf node -> everything alright, no need to change anything

         if( leftChild==-1 ) {

             return;

         }

         // otherwise: load split data, and assign this index to the new owner

         split = splits_[owner];

         split_dim=split.split_dim;

         switch( split_dim ) {

         case 0:

             dim_val1=x_[point_ind1];

             dim_val2=x_[point_ind2];

             dim_val3=x_[point_ind3];

             break;

         case 1:

             dim_val1=y_[point_ind1];

             dim_val2=y_[point_ind2];

             dim_val3=y_[point_ind3];

             break;

         default:

             dim_val1=z_[point_ind1];

             dim_val2=z_[point_ind2];

             dim_val3=z_[point_ind3];

             break;


         }


         int r1=leftChild +(dim_val1 > split.split_val);

         ox_[index]=r1;

         int r2=leftChild+(dim_val2 > split.split_val);

         oy_[index]=r2;

         oz_[index]=leftChild+(dim_val3 > split.split_val);


         lrx_[index] = (dim_val1 > split.split_val);

         lry_[index] = (dim_val2 > split.split_val);

         lrz_[index] = (dim_val3 > split.split_val);

         //                      return thrust::make_tuple( r1, r2, leftChild+(dim_val > split.split_val) );

     }

 };


 struct SetLeftAndRightAndAABB

 {

     int maxPoints;

     int nElements;


     SplitInfo* nodes;

     int* counts;

     int* labels;

     float4* aabbMin;

     float4* aabbMax;

     const float* x,* y,* z;

     const int* ix, * iy, * iz;


     __host__ __device__

     void operator()( int i )

     {

         int index=labels[i];

         int right;

         int left = counts[i];

         nodes[index].left=left;

         if( i < nElements-1 ) {

             right=counts[i+1];

         }

         else { // index==nNodes

             right=maxPoints;

         }

         nodes[index].right=right;

         aabbMin[index].x=x[ix[left]];

         aabbMin[index].y=y[iy[left]];

         aabbMin[index].z=z[iz[left]];

         aabbMax[index].x=x[ix[right-1]];

         aabbMax[index].y=y[iy[right-1]];

         aabbMax[index].z=z[iz[right-1]];

     }

 };


 struct SplitNodes

 {

     int maxPointsPerNode;

     int* node_count;

     int* nodes_allocated;

     int* out_of_space;

     int* child1_;

     int* parent_;

     SplitInfo* splits;


     __device__

     void operator()( thrust::tuple<int&, int&,SplitInfo&,float4&,float4&, int> node ) // float4: aabbMin, aabbMax

     {

         int& parent=thrust::get<0>(node);

         int& child1=thrust::get<1>(node);

         SplitInfo& s=thrust::get<2>(node);

         const float4& aabbMin=thrust::get<3>(node);

         const float4& aabbMax=thrust::get<4>(node);

         int my_index = thrust::get<5>(node);

         bool split_node=false;

         // first, each thread block counts the number of nodes that it needs to allocate...

         __shared__ int block_nodes_to_allocate;

         if( threadIdx.x== 0 ) block_nodes_to_allocate=0;

         __syncthreads();


         // don't split if all points are equal

         // (could lead to an infinite loop, and doesn't make any sense anyway)

         bool all_points_in_node_are_equal=aabbMin.x == aabbMax.x && aabbMin.y==aabbMax.y && aabbMin.z==aabbMax.z;


         int offset_to_global=0;


         // maybe this could be replaced with a reduction...

         if(( child1==-1) &&( s.right-s.left > maxPointsPerNode) && !all_points_in_node_are_equal ) { // leaf node

             split_node=true;

             offset_to_global = atomicAdd( &block_nodes_to_allocate,2 );

         }


         __syncthreads();

         __shared__ int block_left;

         __shared__ bool enough_space;

         // ... then the first thread tries to allocate this many nodes...

         if( threadIdx.x==0) {

             block_left = atomicAdd( node_count, block_nodes_to_allocate );

             enough_space = block_left+block_nodes_to_allocate < *nodes_allocated;

             // if it doesn't succeed, no nodes will be created by this block

             if( !enough_space ) {

                 atomicAdd( node_count, -block_nodes_to_allocate );

                 *out_of_space=1;

             }

         }


         __syncthreads();

         // this thread needs to split it's node && there was enough space for all the nodes

         // in this block.

         //(The whole "allocate-per-block-thing" is much faster than letting each element allocate

         // its space on its own, because shared memory atomics are A LOT faster than

         // global mem atomics!)

         if( split_node && enough_space ) {

             int left = block_left + offset_to_global;


             splits[left].left=s.left;

             splits[left].right=s.right;

             splits[left+1].left=0;

             splits[left+1].right=0;


             // split axis/position: middle of longest aabb extent

             float4 aabbDim=aabbMax-aabbMin;

             int maxDim=0;

             float maxDimLength=aabbDim.x;

             float4 splitVal=(aabbMax+aabbMin);

             splitVal*=0.5f;

             for( int i=1; i<=2; i++ ) {

                 float val = get_value_by_index(aabbDim,i);

                 if( val > maxDimLength ) {

                     maxDim=i;

                     maxDimLength=val;

                 }

             }

             s.split_dim=maxDim;

             s.split_val=get_value_by_index(splitVal,maxDim);


             child1_[my_index]=left;

             splits[my_index]=s;


             parent_[left]=my_index;

             parent_[left+1]=my_index;

             child1_[left]=-1;

             child1_[left+1]=-1;

         }

     }

 };


 struct set_addr3

 {

     const int* val_, * f_;


     int npoints_;

     __device__

     int operator()( int id )

     {

         int nf = f_[npoints_-1] + (val_[npoints_-1]);

         int f=f_[id];

         int t = id -f+nf;

         return val_[id] ? f : t;

     }

 };


 struct pointxyz_to_px_py_pz

 {

     __device__

     thrust::tuple<float,float,float> operator()( const float4& val )

     {

         return thrust::make_tuple(val.x, val.y, val.z);

     }

 };

 } // namespace kd_tree_builder_detail


 } // namespace cuda


 std::ostream& operator <<(std::ostream& stream, const cuda::kd_tree_builder_detail::SplitInfo& s)

 {

     stream<<"(split l/r: "<< s.left <<" "<< s.right<< "  split:"<<s.split_dim<<" "<<s.split_val<<")";

     return stream;

 }

 class CudaKdTreeBuilder

 {

 public:

     CudaKdTreeBuilder( const thrust::device_vector<float4>& points, int max_leaf_size ) : /*out_of_space_(1,0),node_count_(1,1),*/ max_leaf_size_(max_leaf_size)

     {

         points_=&points;

         int prealloc = points.size()/max_leaf_size_*16;

         allocation_info_.resize(3);

         allocation_info_[NodeCount]=1;

         allocation_info_[NodesAllocated]=prealloc;

         allocation_info_[OutOfSpace]=0;


         //              std::cout<<points_->size()<<std::endl;


         child1_=new thrust::device_vector<int>(prealloc,-1);

         parent_=new thrust::device_vector<int>(prealloc,-1);

         cuda::kd_tree_builder_detail::SplitInfo s;

         s.left=0;

         s.right=0;

         splits_=new thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>(prealloc,s);

         s.right=points.size();

         (*splits_)[0]=s;


         aabb_min_=new thrust::device_vector<float4>(prealloc);

         aabb_max_=new thrust::device_vector<float4>(prealloc);


         index_x_=new thrust::device_vector<int>(points_->size());

         index_y_=new thrust::device_vector<int>(points_->size());

         index_z_=new thrust::device_vector<int>(points_->size());


         owners_x_=new thrust::device_vector<int>(points_->size(),0);

         owners_y_=new thrust::device_vector<int>(points_->size(),0);

         owners_z_=new thrust::device_vector<int>(points_->size(),0);


         leftright_x_ = new thrust::device_vector<int>(points_->size(),0);

         leftright_y_ = new thrust::device_vector<int>(points_->size(),0);

         leftright_z_ = new thrust::device_vector<int>(points_->size(),0);


         tmp_index_=new thrust::device_vector<int>(points_->size());

         tmp_owners_=new thrust::device_vector<int>(points_->size());

         tmp_misc_=new thrust::device_vector<int>(points_->size());


         points_x_=new thrust::device_vector<float>(points_->size());

         points_y_=new thrust::device_vector<float>(points_->size());

         points_z_=new thrust::device_vector<float>(points_->size());

         delete_node_info_=false;

     }


     ~CudaKdTreeBuilder()

     {

         if( delete_node_info_ ) {

             delete child1_;

             delete parent_;

             delete splits_;

             delete aabb_min_;

             delete aabb_max_;

             delete index_x_;

         }


         delete index_y_;

         delete index_z_;

         delete owners_x_;

         delete owners_y_;

         delete owners_z_;

         delete points_x_;

         delete points_y_;

         delete points_z_;

         delete leftright_x_;

         delete leftright_y_;

         delete leftright_z_;

         delete tmp_index_;

         delete tmp_owners_;

         delete tmp_misc_;

     }


     void buildTree()

     {

         //              std::cout<<"buildTree()"<<std::endl;

         //              sleep(1);

         //              Util::Timer stepTimer;

         thrust::transform( points_->begin(), points_->end(), thrust::make_zip_iterator(thrust::make_tuple(points_x_->begin(), points_y_->begin(),points_z_->begin()) ), cuda::kd_tree_builder_detail::pointxyz_to_px_py_pz() );


         thrust::counting_iterator<int> it(0);

         thrust::copy( it, it+points_->size(), index_x_->begin() );


         thrust::copy( index_x_->begin(), index_x_->end(), index_y_->begin() );

         thrust::copy( index_x_->begin(), index_x_->end(), index_z_->begin() );


         thrust::device_vector<float> tmpv(points_->size());


         // create sorted index list -> can be used to compute AABBs in O(1)

         thrust::copy(points_x_->begin(), points_x_->end(), tmpv.begin());

         thrust::sort_by_key( tmpv.begin(), tmpv.end(), index_x_->begin() );

         thrust::copy(points_y_->begin(), points_y_->end(), tmpv.begin());

         thrust::sort_by_key( tmpv.begin(), tmpv.end(), index_y_->begin() );

         thrust::copy(points_z_->begin(), points_z_->end(), tmpv.begin());

         thrust::sort_by_key( tmpv.begin(), tmpv.end(), index_z_->begin() );


         (*aabb_min_)[0]=make_float4((*points_x_)[(*index_x_)[0]],(*points_y_)[(*index_y_)[0]],(*points_z_)[(*index_z_)[0]],0);


         (*aabb_max_)[0]=make_float4((*points_x_)[(*index_x_)[points_->size()-1]],(*points_y_)[(*index_y_)[points_->size()-1]],(*points_z_)[(*index_z_)[points_->size()-1]],0);

         #ifdef PRINT_DEBUG_TIMING

         cudaDeviceSynchronize();

         std::cout<<" initial stuff:"<<stepTimer.elapsed()<<std::endl;

         stepTimer.restart();

         #endif

         int last_node_count=0;

         for( int i=0;; i++ ) {

             cuda::kd_tree_builder_detail::SplitNodes sn;


             sn.maxPointsPerNode=max_leaf_size_;

             sn.node_count=thrust::raw_pointer_cast(&allocation_info_[NodeCount]);

             sn.nodes_allocated=thrust::raw_pointer_cast(&allocation_info_[NodesAllocated]);

             sn.out_of_space=thrust::raw_pointer_cast(&allocation_info_[OutOfSpace]);

             sn.child1_=thrust::raw_pointer_cast(&(*child1_)[0]);

             sn.parent_=thrust::raw_pointer_cast(&(*parent_)[0]);

             sn.splits=thrust::raw_pointer_cast(&(*splits_)[0]);


             thrust::counting_iterator<int> cit(0);

             thrust::for_each( thrust::make_zip_iterator(thrust::make_tuple( parent_->begin(), child1_->begin(),  splits_->begin(), aabb_min_->begin(), aabb_max_->begin(), cit  )),

                               thrust::make_zip_iterator(thrust::make_tuple( parent_->begin()+last_node_count, child1_->begin()+last_node_count,splits_->begin()+last_node_count, aabb_min_->begin()+last_node_count, aabb_max_->begin()+last_node_count,cit+last_node_count  )),

                               sn   );

             // copy allocation info to host

             thrust::host_vector<int> alloc_info = allocation_info_;


             if( last_node_count == alloc_info[NodeCount] ) { // no more nodes were split -> done

                 break;

             }

             last_node_count=alloc_info[NodeCount];


             // a node was un-splittable due to a lack of space

             if( alloc_info[OutOfSpace]==1 ) {

                 resize_node_vectors(alloc_info[NodesAllocated]*2);

                 alloc_info[OutOfSpace]=0;

                 alloc_info[NodesAllocated]*=2;

                 allocation_info_=alloc_info;

             }

             #ifdef PRINT_DEBUG_TIMING

             cudaDeviceSynchronize();

             std::cout<<" node split:"<<stepTimer.elapsed()<<std::endl;

             stepTimer.restart();

             #endif


             // foreach point: point was in node that was split?move it to child (leaf) node : do nothing

             cuda::kd_tree_builder_detail::MovePointsToChildNodes sno( thrust::raw_pointer_cast(&(*child1_)[0]),

                                                                       thrust::raw_pointer_cast(&(*splits_)[0]),

                                                                       thrust::raw_pointer_cast(&(*points_x_)[0]),

                                                                       thrust::raw_pointer_cast(&(*points_y_)[0]),

                                                                       thrust::raw_pointer_cast(&(*points_z_)[0]),

                                                                       thrust::raw_pointer_cast(&(*owners_x_)[0]),

                                                                       thrust::raw_pointer_cast(&(*owners_y_)[0]),

                                                                       thrust::raw_pointer_cast(&(*owners_z_)[0]),

                                                                       thrust::raw_pointer_cast(&(*leftright_x_)[0]),

                                                                       thrust::raw_pointer_cast(&(*leftright_y_)[0]),

                                                                       thrust::raw_pointer_cast(&(*leftright_z_)[0])

                                                                       );

             thrust::counting_iterator<int> ci0(0);

             thrust::for_each( thrust::make_zip_iterator( thrust::make_tuple( ci0, index_x_->begin(), index_y_->begin(), index_z_->begin()) ),

                               thrust::make_zip_iterator( thrust::make_tuple( ci0+points_->size(), index_x_->end(), index_y_->end(), index_z_->end()) ),sno  );


             #ifdef PRINT_DEBUG_TIMING

             cudaDeviceSynchronize();

             std::cout<<" set new owners:"<<stepTimer.elapsed()<<std::endl;

             stepTimer.restart();

             #endif


             // move points around so that each leaf node's points are continuous

             separate_left_and_right_children(*index_x_,*owners_x_,*tmp_index_,*tmp_owners_, *leftright_x_);

             std::swap(tmp_index_, index_x_);

             std::swap(tmp_owners_, owners_x_);

             separate_left_and_right_children(*index_y_,*owners_y_,*tmp_index_,*tmp_owners_, *leftright_y_,false);

             std::swap(tmp_index_, index_y_);

             separate_left_and_right_children(*index_z_,*owners_z_,*tmp_index_,*tmp_owners_, *leftright_z_,false);

             std::swap(tmp_index_, index_z_);


             #ifdef PRINT_DEBUG_TIMING

             cudaDeviceSynchronize();

             std::cout<<" split:"<<stepTimer.elapsed()<<std::endl;

             stepTimer.restart();

             #endif

             // calculate new AABB etc

             update_leftright_and_aabb( *points_x_, *points_y_, *points_z_, *index_x_, *index_y_, *index_z_, *owners_x_, *splits_,*aabb_min_, *aabb_max_);

             #ifdef PRINT_DEBUG_TIMING

             cudaDeviceSynchronize();

             std::cout<<" update_leftright_and_aabb:"<<stepTimer.elapsed()<<std::endl;

             stepTimer.restart();

             print_vector(node_count_);

             #endif


         }

     }


     template<class Distance>

     friend class KDTreeCuda3dIndex;


 protected:


     void

     update_leftright_and_aabb( const thrust::device_vector<float>& x, const thrust::device_vector<float>& y,const thrust::device_vector<float>& z,

                                const thrust::device_vector<int>& ix, const thrust::device_vector<int>& iy,const thrust::device_vector<int>& iz,

                                const thrust::device_vector<int>& owners,

                                thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>& splits, thrust::device_vector<float4>& aabbMin,thrust::device_vector<float4>& aabbMax)

     {

         thrust::device_vector<int>* labelsUnique=tmp_owners_;

         thrust::device_vector<int>* countsUnique=tmp_index_;

         // assume: points of each node are continuous in the array


         // find which nodes are here, and where each node's points begin and end

         int unique_labels = thrust::unique_by_key_copy( owners.begin(), owners.end(), thrust::counting_iterator<int>(0), labelsUnique->begin(), countsUnique->begin()).first - labelsUnique->begin();


         // update the info

         cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB s;

         s.maxPoints=x.size();

         s.nElements=unique_labels;

         s.nodes=thrust::raw_pointer_cast(&(splits[0]));

         s.counts=thrust::raw_pointer_cast(&( (*countsUnique)[0]));

         s.labels=thrust::raw_pointer_cast(&( (*labelsUnique)[0]));

         s.x=thrust::raw_pointer_cast(&x[0]);

         s.y=thrust::raw_pointer_cast(&y[0]);

         s.z=thrust::raw_pointer_cast(&z[0]);

         s.ix=thrust::raw_pointer_cast(&ix[0]);

         s.iy=thrust::raw_pointer_cast(&iy[0]);

         s.iz=thrust::raw_pointer_cast(&iz[0]);

         s.aabbMin=thrust::raw_pointer_cast(&aabbMin[0]);

         s.aabbMax=thrust::raw_pointer_cast(&aabbMax[0]);


         thrust::counting_iterator<int> it(0);

         thrust::for_each(it, it+unique_labels, s);

     }


     void separate_left_and_right_children( thrust::device_vector<int>& key_in, thrust::device_vector<int>& val_in, thrust::device_vector<int>& key_out, thrust::device_vector<int>& val_out, thrust::device_vector<int>& left_right_marks, bool scatter_val_out=true )

     {

         thrust::device_vector<int>* f_tmp = &val_out;

         thrust::device_vector<int>* addr_tmp = tmp_misc_;


         thrust::exclusive_scan( /*thrust::make_transform_iterator(*/ left_right_marks.begin() /*,cuda::kd_tree_builder_detail::IsEven*/

                                                                      /*())*/, /*thrust::make_transform_iterator(*/ left_right_marks.end() /*,cuda::kd_tree_builder_detail::IsEven*/

                                                                      /*())*/,     f_tmp->begin() );

         cuda::kd_tree_builder_detail::set_addr3 sa;

         sa.val_=thrust::raw_pointer_cast(&left_right_marks[0]);

         sa.f_=thrust::raw_pointer_cast(&(*f_tmp)[0]);

         sa.npoints_=key_in.size();

         thrust::counting_iterator<int> it(0);

         thrust::transform(it, it+val_in.size(), addr_tmp->begin(), sa);


         thrust::scatter(key_in.begin(), key_in.end(), addr_tmp->begin(), key_out.begin());

         if( scatter_val_out ) thrust::scatter(val_in.begin(), val_in.end(), addr_tmp->begin(), val_out.begin());

     }


     void resize_node_vectors( size_t new_size )

     {

         size_t add = new_size - child1_->size();

         child1_->insert(child1_->end(), add, -1);

         parent_->insert(parent_->end(), add, -1);

         cuda::kd_tree_builder_detail::SplitInfo s;

         s.left=0;

         s.right=0;

         splits_->insert(splits_->end(), add, s);

         float4 f;

         aabb_min_->insert(aabb_min_->end(), add, f);

         aabb_max_->insert(aabb_max_->end(), add, f);

     }


     const thrust::device_vector<float4>* points_;


     // tree data, those are stored per-node


     thrust::device_vector<int>* child1_;

     thrust::device_vector<int>* parent_;

     thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>* splits_;

     thrust::device_vector<float4>* aabb_min_;

     thrust::device_vector<float4>* aabb_max_;


     enum AllocationInfo

     {

         NodeCount=0,

         NodesAllocated=1,

         OutOfSpace=2

     };

     // those were put into a single vector of 3 elements so that only one mem transfer will be needed for all three of them

     //  thrust::device_vector<int> out_of_space_;

     //  thrust::device_vector<int> node_count_;

     //  thrust::device_vector<int> nodes_allocated_;

     thrust::device_vector<int> allocation_info_;


     int max_leaf_size_;


     // coordinate values of the points

     thrust::device_vector<float>* points_x_, * points_y_, * points_z_;

     // indices

     thrust::device_vector<int>* index_x_,  * index_y_,  * index_z_;

     // owner node

     thrust::device_vector<int>* owners_x_, * owners_y_, * owners_z_;

     // contains info about whether a point was partitioned to the left or right child after a split

     thrust::device_vector<int>* leftright_x_, * leftright_y_, * leftright_z_;

     thrust::device_vector<int>* tmp_index_, * tmp_owners_, * tmp_misc_;

     bool delete_node_info_;

 };


 } // namespace flann

 #endif

points
int points
Definition: FileIOFactory.cpp:144

copy
bool copy
Definition: VtkUtils.cpp:74

flann::CudaKdTreeBuilder
Definition: kdtree_cuda_builder.h:402

flann::CudaKdTreeBuilder::resize_node_vectors
void resize_node_vectors(size_t new_size)
Definition: kdtree_cuda_builder.h:670

flann::CudaKdTreeBuilder::aabb_max_
thrust::device_vector< float4 > * aabb_max_
max aabb value of each node
Definition: kdtree_cuda_builder.h:699

flann::CudaKdTreeBuilder::index_x_
thrust::device_vector< int > * index_x_
Definition: kdtree_cuda_builder.h:718

flann::CudaKdTreeBuilder::buildTree
void buildTree()
Definition: kdtree_cuda_builder.h:484

flann::CudaKdTreeBuilder::leftright_z_
thrust::device_vector< int > * leftright_z_
Definition: kdtree_cuda_builder.h:722

flann::CudaKdTreeBuilder::owners_y_
thrust::device_vector< int > * owners_y_
Definition: kdtree_cuda_builder.h:720

flann::CudaKdTreeBuilder::tmp_index_
thrust::device_vector< int > * tmp_index_
Definition: kdtree_cuda_builder.h:723

flann::CudaKdTreeBuilder::max_leaf_size_
int max_leaf_size_
Definition: kdtree_cuda_builder.h:713

flann::CudaKdTreeBuilder::index_z_
thrust::device_vector< int > * index_z_
Definition: kdtree_cuda_builder.h:718

flann::CudaKdTreeBuilder::separate_left_and_right_children
void separate_left_and_right_children(thrust::device_vector< int > &key_in, thrust::device_vector< int > &val_in, thrust::device_vector< int > &key_out, thrust::device_vector< int > &val_out, thrust::device_vector< int > &left_right_marks, bool scatter_val_out=true)
Definition: kdtree_cuda_builder.h:649

flann::CudaKdTreeBuilder::index_y_
thrust::device_vector< int > * index_y_
Definition: kdtree_cuda_builder.h:718

flann::CudaKdTreeBuilder::AllocationInfo
AllocationInfo
Definition: kdtree_cuda_builder.h:702

flann::CudaKdTreeBuilder::NodesAllocated
@ NodesAllocated
Definition: kdtree_cuda_builder.h:704

flann::CudaKdTreeBuilder::OutOfSpace
@ OutOfSpace
Definition: kdtree_cuda_builder.h:705

flann::CudaKdTreeBuilder::NodeCount
@ NodeCount
Definition: kdtree_cuda_builder.h:703

flann::CudaKdTreeBuilder::leftright_x_
thrust::device_vector< int > * leftright_x_
Definition: kdtree_cuda_builder.h:722

flann::CudaKdTreeBuilder::owners_z_
thrust::device_vector< int > * owners_z_
Definition: kdtree_cuda_builder.h:720

flann::CudaKdTreeBuilder::points_x_
thrust::device_vector< float > * points_x_
Definition: kdtree_cuda_builder.h:716

flann::CudaKdTreeBuilder::~CudaKdTreeBuilder
~CudaKdTreeBuilder()
Definition: kdtree_cuda_builder.h:449

flann::CudaKdTreeBuilder::points_
const thrust::device_vector< float4 > * points_
Definition: kdtree_cuda_builder.h:685

flann::CudaKdTreeBuilder::aabb_min_
thrust::device_vector< float4 > * aabb_min_
min aabb value of each node
Definition: kdtree_cuda_builder.h:697

flann::CudaKdTreeBuilder::allocation_info_
thrust::device_vector< int > allocation_info_
Definition: kdtree_cuda_builder.h:711

flann::CudaKdTreeBuilder::points_y_
thrust::device_vector< float > * points_y_
Definition: kdtree_cuda_builder.h:716

flann::CudaKdTreeBuilder::tmp_owners_
thrust::device_vector< int > * tmp_owners_
Definition: kdtree_cuda_builder.h:723

flann::CudaKdTreeBuilder::tmp_misc_
thrust::device_vector< int > * tmp_misc_
Definition: kdtree_cuda_builder.h:723

flann::CudaKdTreeBuilder::CudaKdTreeBuilder
CudaKdTreeBuilder(const thrust::device_vector< float4 > &points, int max_leaf_size)
Definition: kdtree_cuda_builder.h:404

flann::CudaKdTreeBuilder::points_z_
thrust::device_vector< float > * points_z_
Definition: kdtree_cuda_builder.h:716

flann::CudaKdTreeBuilder::leftright_y_
thrust::device_vector< int > * leftright_y_
Definition: kdtree_cuda_builder.h:722

flann::CudaKdTreeBuilder::update_leftright_and_aabb
void update_leftright_and_aabb(const thrust::device_vector< float > &x, const thrust::device_vector< float > &y, const thrust::device_vector< float > &z, const thrust::device_vector< int > &ix, const thrust::device_vector< int > &iy, const thrust::device_vector< int > &iz, const thrust::device_vector< int > &owners, thrust::device_vector< cuda::kd_tree_builder_detail::SplitInfo > &splits, thrust::device_vector< float4 > &aabbMin, thrust::device_vector< float4 > &aabbMax)
takes the partitioned nodes, and sets the left-/right info of leaf nodes, as well as the AABBs
Definition: kdtree_cuda_builder.h:610

flann::CudaKdTreeBuilder::child1_
thrust::device_vector< int > * child1_
Definition: kdtree_cuda_builder.h:691

flann::CudaKdTreeBuilder::splits_
thrust::device_vector< cuda::kd_tree_builder_detail::SplitInfo > * splits_
split info (dim/value or left/right pointers)
Definition: kdtree_cuda_builder.h:695

flann::CudaKdTreeBuilder::parent_
thrust::device_vector< int > * parent_
parent node of each node
Definition: kdtree_cuda_builder.h:693

flann::CudaKdTreeBuilder::owners_x_
thrust::device_vector< int > * owners_x_
Definition: kdtree_cuda_builder.h:720

flann::CudaKdTreeBuilder::delete_node_info_
bool delete_node_info_
Definition: kdtree_cuda_builder.h:724

flann::KDTreeCuda3dIndex
Definition: kdtree_cuda_3d_index.h:71

cutil_math.h

make_float4
__host__ __device__ float4 make_float4(float s)
Definition: cutil_math.h:182

QtCompat::endl
QTextStream & endl(QTextStream &stream)
Definition: QtCompat.h:718

flann::cuda::kd_tree_builder_detail::get_value_by_index
__host__ __device__ float get_value_by_index(const float4 &f, int i)
Definition: kdtree_cuda_builder.h:143

flann
Definition: all_indices.h:50

flann::operator<<
std::ostream & operator<<(std::ostream &stream, const cuda::kd_tree_builder_detail::SplitInfo &s)
Definition: kdtree_cuda_builder.h:396

std::swap
void swap(cloudViewer::core::SmallVectorImpl< T > &LHS, cloudViewer::core::SmallVectorImpl< T > &RHS)
Implement std::swap in terms of SmallVector swap.
Definition: SmallVector.h:1370

flann::cuda::kd_tree_builder_detail::IsEven
Definition: kdtree_cuda_builder.h:122

flann::cuda::kd_tree_builder_detail::IsEven::result_type
int result_type
Definition: kdtree_cuda_builder.h:123

flann::cuda::kd_tree_builder_detail::IsEven::operator()
__device__ int operator()(int i)
Definition: kdtree_cuda_builder.h:125

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes
Definition: kdtree_cuda_builder.h:159

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::x_
float * x_
Definition: kdtree_cuda_builder.h:168

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::MovePointsToChildNodes
MovePointsToChildNodes(int *child1, SplitInfo *splits, float *x, float *y, float *z, int *ox, int *oy, int *oz, int *lrx, int *lry, int *lrz)
Definition: kdtree_cuda_builder.h:160

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::lrz_
int * lrz_
Definition: kdtree_cuda_builder.h:173

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::z_
float * z_
Definition: kdtree_cuda_builder.h:168

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::lry_
int * lry_
Definition: kdtree_cuda_builder.h:173

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::operator()
__device__ void operator()(const thrust::tuple< int, int, int, int > &data)
Definition: kdtree_cuda_builder.h:175

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::y_
float * y_
Definition: kdtree_cuda_builder.h:168

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::ox_
int * ox_
Definition: kdtree_cuda_builder.h:170

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::oy_
int * oy_
Definition: kdtree_cuda_builder.h:170

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::child1_
int * child1_
Definition: kdtree_cuda_builder.h:164

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::oz_
int * oz_
Definition: kdtree_cuda_builder.h:170

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::lrx_
int * lrx_
Definition: kdtree_cuda_builder.h:173

flann::cuda::kd_tree_builder_detail::MovePointsToChildNodes::splits_
SplitInfo * splits_
Definition: kdtree_cuda_builder.h:165

flann::cuda::kd_tree_builder_detail::SecondElementIsEven
Definition: kdtree_cuda_builder.h:132

flann::cuda::kd_tree_builder_detail::SecondElementIsEven::operator()
__host__ __device__ bool operator()(const thrust::tuple< int, int > &i)
Definition: kdtree_cuda_builder.h:134

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB
used to update the left/right pointers and aabb infos after the node splits
Definition: kdtree_cuda_builder.h:231

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::nodes
SplitInfo * nodes
Definition: kdtree_cuda_builder.h:235

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::x
const float * x
Definition: kdtree_cuda_builder.h:240

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::iy
const int * iy
Definition: kdtree_cuda_builder.h:241

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::aabbMax
float4 * aabbMax
Definition: kdtree_cuda_builder.h:239

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::z
const float * z
Definition: kdtree_cuda_builder.h:240

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::operator()
__host__ __device__ void operator()(int i)
Definition: kdtree_cuda_builder.h:244

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::nElements
int nElements
Definition: kdtree_cuda_builder.h:233

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::labels
int * labels
Definition: kdtree_cuda_builder.h:237

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::aabbMin
float4 * aabbMin
Definition: kdtree_cuda_builder.h:238

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::ix
const int * ix
Definition: kdtree_cuda_builder.h:241

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::counts
int * counts
Definition: kdtree_cuda_builder.h:236

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::maxPoints
int maxPoints
Definition: kdtree_cuda_builder.h:232

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::iz
const int * iz
Definition: kdtree_cuda_builder.h:241

flann::cuda::kd_tree_builder_detail::SetLeftAndRightAndAABB::y
const float * y
Definition: kdtree_cuda_builder.h:240

flann::cuda::kd_tree_builder_detail::SplitInfo
Definition: kdtree_cuda_builder.h:103

flann::cuda::kd_tree_builder_detail::SplitInfo::split_val
float split_val
Definition: kdtree_cuda_builder.h:115

flann::cuda::kd_tree_builder_detail::SplitInfo::right
int right
Definition: kdtree_cuda_builder.h:110

flann::cuda::kd_tree_builder_detail::SplitInfo::split_dim
int split_dim
Definition: kdtree_cuda_builder.h:114

flann::cuda::kd_tree_builder_detail::SplitInfo::left
int left
Definition: kdtree_cuda_builder.h:108

flann::cuda::kd_tree_builder_detail::SplitNodes
Definition: kdtree_cuda_builder.h:272

flann::cuda::kd_tree_builder_detail::SplitNodes::maxPointsPerNode
int maxPointsPerNode
Definition: kdtree_cuda_builder.h:273

flann::cuda::kd_tree_builder_detail::SplitNodes::out_of_space
int * out_of_space
Definition: kdtree_cuda_builder.h:276

flann::cuda::kd_tree_builder_detail::SplitNodes::splits
SplitInfo * splits
Definition: kdtree_cuda_builder.h:279

flann::cuda::kd_tree_builder_detail::SplitNodes::operator()
__device__ void operator()(thrust::tuple< int &, int &, SplitInfo &, float4 &, float4 &, int > node)
Definition: kdtree_cuda_builder.h:282

flann::cuda::kd_tree_builder_detail::SplitNodes::parent_
int * parent_
Definition: kdtree_cuda_builder.h:278

flann::cuda::kd_tree_builder_detail::SplitNodes::nodes_allocated
int * nodes_allocated
Definition: kdtree_cuda_builder.h:275

flann::cuda::kd_tree_builder_detail::SplitNodes::node_count
int * node_count
Definition: kdtree_cuda_builder.h:274

flann::cuda::kd_tree_builder_detail::SplitNodes::child1_
int * child1_
Definition: kdtree_cuda_builder.h:277

flann::cuda::kd_tree_builder_detail::pointxyz_to_px_py_pz
Definition: kdtree_cuda_builder.h:384

flann::cuda::kd_tree_builder_detail::pointxyz_to_px_py_pz::operator()
__device__ thrust::tuple< float, float, float > operator()(const float4 &val)
Definition: kdtree_cuda_builder.h:386

flann::cuda::kd_tree_builder_detail::set_addr3
Definition: kdtree_cuda_builder.h:367

flann::cuda::kd_tree_builder_detail::set_addr3::val_
const int * val_
Definition: kdtree_cuda_builder.h:368

flann::cuda::kd_tree_builder_detail::set_addr3::operator()
__device__ int operator()(int id)
Definition: kdtree_cuda_builder.h:372

flann::cuda::kd_tree_builder_detail::set_addr3::npoints_
int npoints_
Definition: kdtree_cuda_builder.h:370

flann::cuda::kd_tree_builder_detail::set_addr3::f_
const int * f_
Definition: kdtree_cuda_builder.h:368