ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
kdtree_cuda_builder.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2011 Andreas Muetzel (amuetzel@uni-koblenz.de). All rights reserved.
5  *
6  * THE BSD LICENSE
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *************************************************************************/
29 
30 #ifndef FLANN_CUDA_KD_TREE_BUILDER_H_
31 #define FLANN_CUDA_KD_TREE_BUILDER_H_
32 #include <thrust/host_vector.h>
33 #include <thrust/device_vector.h>
34 #include <thrust/sort.h>
35 #include <thrust/partition.h>
36 #include <thrust/unique.h>
37 #include <thrust/scan.h>
38 #include <FLANN/util/cutil_math.h>
39 #include <stdlib.h>
40 
41 // #define PRINT_DEBUG_TIMING
42 
43 namespace flann
44 {
45 // template< typename T >
46 // void print_vector( const thrust::device_vector<T>& v )
47 // {
48 // for( int i=0; i< v.size(); i++ )
49 // {
50 // std::cout<<v[i]<<std::endl;
51 // }
52 // }
53 //
54 // template< typename T1, typename T2 >
55 // void print_vector( const thrust::device_vector<T1>& v1, const thrust::device_vector<T2>& v2 )
56 // {
57 // for( int i=0; i< v1.size(); i++ )
58 // {
59 // std::cout<<i<<": "<<v1[i]<<" "<<v2[i]<<std::endl;
60 // }
61 // }
62 //
63 // template< typename T1, typename T2, typename T3 >
64 // void print_vector( const thrust::device_vector<T1>& v1, const thrust::device_vector<T2>& v2, const thrust::device_vector<T3>& v3 )
65 // {
66 // for( int i=0; i< v1.size(); i++ )
67 // {
68 // std::cout<<i<<": "<<v1[i]<<" "<<v2[i]<<" "<<v3[i]<<std::endl;
69 // }
70 // }
71 //
72 // template< typename T >
73 // void print_vector_by_index( const thrust::device_vector<T>& v,const thrust::device_vector<int>& ind )
74 // {
75 // for( int i=0; i< v.size(); i++ )
76 // {
77 // std::cout<<v[ind[i]]<<std::endl;
78 // }
79 // }
80 
81 // std::ostream& operator <<(std::ostream& stream, const cuda::kd_tree_builder_detail::SplitInfo& s) {
82 // stream<<"(split l/r: "<< s.left <<" "<< s.right<< " split:"<<s.split_dim<<" "<<s.split_val<<")";
83 // return stream;
84 // }
85 //
86 //
87 // std::ostream& operator <<(std::ostream& stream, const cuda::kd_tree_builder_detail::NodeInfo& s) {
88 // stream<<"(node: "<<s.child1()<<" "<<s.parent()<<" "<<s.child2()<<")";
89 // return stream;
90 // }
91 //
92 // std::ostream& operator <<(std::ostream& stream, const float4& s) {
93 // stream<<"("<<s.x<<","<<s.y<<","<<s.z<<","<<s.w<<")";
94 // return stream;
95 // }
96 namespace cuda
97 {
98 namespace kd_tree_builder_detail
99 {
102 struct SplitInfo
103 {
104  union {
105  struct
106  {
107  // begin of child nodes
108  int left;
109  // end of child nodes
110  int right;
111  };
112  struct
113  {
115  float split_val;
116  };
117  };
118 
119 };
120 
121 struct IsEven
122 {
123  typedef int result_type;
124  __device__
125  int operator()(int i )
126  {
127  return (i& 1)==0;
128  }
129 };
130 
132 {
133  __host__ __device__
134  bool operator()( const thrust::tuple<int,int>& i )
135  {
136  return (thrust::get<1>(i)& 1)==0;
137  }
138 };
139 
142 __host__ __device__
143 float get_value_by_index( const float4& f, int i )
144 {
145  switch(i) {
146  case 0:
147  return f.x;
148  case 1:
149  return f.y;
150  default:
151  return f.z;
152  }
153 
154 }
155 
159 {
160  MovePointsToChildNodes( int* child1, SplitInfo* splits, float* x, float* y, float* z, int* ox, int* oy, int* oz, int* lrx, int* lry, int* lrz )
161  : child1_(child1), splits_(splits), x_(x), y_(y), z_(z), ox_(ox), oy_(oy), oz_(oz), lrx_(lrx), lry_(lry), lrz_(lrz){}
162  // int dim;
163  // float threshold;
164  int* child1_;
166 
167  // coordinate values
168  float* x_, * y_, * z_;
169  // owner indices -> which node does the point belong to?
170  int* ox_, * oy_, * oz_;
171  // temp info: will be set to 1 of a point is moved to the right child node, 0 otherwise
172  // (used later in the scan op to separate the points of the children into continuous ranges)
173  int* lrx_, * lry_, * lrz_;
174  __device__
175  void operator()( const thrust::tuple<int, int, int, int>& data )
176  {
177  int index = thrust::get<0>(data);
178  int owner = ox_[index]; // before a split, all points at the same position in the index array have the same owner
179  int point_ind1=thrust::get<1>(data);
180  int point_ind2=thrust::get<2>(data);
181  int point_ind3=thrust::get<3>(data);
182  int leftChild=child1_[owner];
183  int split_dim;
184  float dim_val1, dim_val2, dim_val3;
185  SplitInfo split;
186  lrx_[index]=0;
187  lry_[index]=0;
188  lrz_[index]=0;
189  // this element already belongs to a leaf node -> everything alright, no need to change anything
190  if( leftChild==-1 ) {
191  return;
192  }
193  // otherwise: load split data, and assign this index to the new owner
194  split = splits_[owner];
195  split_dim=split.split_dim;
196  switch( split_dim ) {
197  case 0:
198  dim_val1=x_[point_ind1];
199  dim_val2=x_[point_ind2];
200  dim_val3=x_[point_ind3];
201  break;
202  case 1:
203  dim_val1=y_[point_ind1];
204  dim_val2=y_[point_ind2];
205  dim_val3=y_[point_ind3];
206  break;
207  default:
208  dim_val1=z_[point_ind1];
209  dim_val2=z_[point_ind2];
210  dim_val3=z_[point_ind3];
211  break;
212 
213  }
214 
215 
216  int r1=leftChild +(dim_val1 > split.split_val);
217  ox_[index]=r1;
218  int r2=leftChild+(dim_val2 > split.split_val);
219  oy_[index]=r2;
220  oz_[index]=leftChild+(dim_val3 > split.split_val);
221 
222  lrx_[index] = (dim_val1 > split.split_val);
223  lry_[index] = (dim_val2 > split.split_val);
224  lrz_[index] = (dim_val3 > split.split_val);
225  // return thrust::make_tuple( r1, r2, leftChild+(dim_val > split.split_val) );
226  }
227 };
228 
231 {
234 
236  int* counts;
237  int* labels;
238  float4* aabbMin;
239  float4* aabbMax;
240  const float* x,* y,* z;
241  const int* ix, * iy, * iz;
242 
243  __host__ __device__
244  void operator()( int i )
245  {
246  int index=labels[i];
247  int right;
248  int left = counts[i];
249  nodes[index].left=left;
250  if( i < nElements-1 ) {
251  right=counts[i+1];
252  }
253  else { // index==nNodes
254  right=maxPoints;
255  }
256  nodes[index].right=right;
257  aabbMin[index].x=x[ix[left]];
258  aabbMin[index].y=y[iy[left]];
259  aabbMin[index].z=z[iz[left]];
260  aabbMax[index].x=x[ix[right-1]];
261  aabbMax[index].y=y[iy[right-1]];
262  aabbMax[index].z=z[iz[right-1]];
263  }
264 };
265 
266 
272 {
277  int* child1_;
278  int* parent_;
280 
281  __device__
282  void operator()( thrust::tuple<int&, int&,SplitInfo&,float4&,float4&, int> node ) // float4: aabbMin, aabbMax
283  {
284  int& parent=thrust::get<0>(node);
285  int& child1=thrust::get<1>(node);
286  SplitInfo& s=thrust::get<2>(node);
287  const float4& aabbMin=thrust::get<3>(node);
288  const float4& aabbMax=thrust::get<4>(node);
289  int my_index = thrust::get<5>(node);
290  bool split_node=false;
291  // first, each thread block counts the number of nodes that it needs to allocate...
292  __shared__ int block_nodes_to_allocate;
293  if( threadIdx.x== 0 ) block_nodes_to_allocate=0;
294  __syncthreads();
295 
296  // don't split if all points are equal
297  // (could lead to an infinite loop, and doesn't make any sense anyway)
298  bool all_points_in_node_are_equal=aabbMin.x == aabbMax.x && aabbMin.y==aabbMax.y && aabbMin.z==aabbMax.z;
299 
300  int offset_to_global=0;
301 
302  // maybe this could be replaced with a reduction...
303  if(( child1==-1) &&( s.right-s.left > maxPointsPerNode) && !all_points_in_node_are_equal ) { // leaf node
304  split_node=true;
305  offset_to_global = atomicAdd( &block_nodes_to_allocate,2 );
306  }
307 
308  __syncthreads();
309  __shared__ int block_left;
310  __shared__ bool enough_space;
311  // ... then the first thread tries to allocate this many nodes...
312  if( threadIdx.x==0) {
313  block_left = atomicAdd( node_count, block_nodes_to_allocate );
314  enough_space = block_left+block_nodes_to_allocate < *nodes_allocated;
315  // if it doesn't succeed, no nodes will be created by this block
316  if( !enough_space ) {
317  atomicAdd( node_count, -block_nodes_to_allocate );
318  *out_of_space=1;
319  }
320  }
321 
322  __syncthreads();
323  // this thread needs to split it's node && there was enough space for all the nodes
324  // in this block.
325  //(The whole "allocate-per-block-thing" is much faster than letting each element allocate
326  // its space on its own, because shared memory atomics are A LOT faster than
327  // global mem atomics!)
328  if( split_node && enough_space ) {
329  int left = block_left + offset_to_global;
330 
331  splits[left].left=s.left;
332  splits[left].right=s.right;
333  splits[left+1].left=0;
334  splits[left+1].right=0;
335 
336  // split axis/position: middle of longest aabb extent
337  float4 aabbDim=aabbMax-aabbMin;
338  int maxDim=0;
339  float maxDimLength=aabbDim.x;
340  float4 splitVal=(aabbMax+aabbMin);
341  splitVal*=0.5f;
342  for( int i=1; i<=2; i++ ) {
343  float val = get_value_by_index(aabbDim,i);
344  if( val > maxDimLength ) {
345  maxDim=i;
346  maxDimLength=val;
347  }
348  }
349  s.split_dim=maxDim;
350  s.split_val=get_value_by_index(splitVal,maxDim);
351 
352  child1_[my_index]=left;
353  splits[my_index]=s;
354 
355  parent_[left]=my_index;
356  parent_[left+1]=my_index;
357  child1_[left]=-1;
358  child1_[left+1]=-1;
359  }
360  }
361 };
362 
363 
366 struct set_addr3
367 {
368  const int* val_, * f_;
369 
370  int npoints_;
371  __device__
372  int operator()( int id )
373  {
374  int nf = f_[npoints_-1] + (val_[npoints_-1]);
375  int f=f_[id];
376  int t = id -f+nf;
377  return val_[id] ? f : t;
378  }
379 };
380 
384 {
385  __device__
386  thrust::tuple<float,float,float> operator()( const float4& val )
387  {
388  return thrust::make_tuple(val.x, val.y, val.z);
389  }
390 };
391 } // namespace kd_tree_builder_detail
392 
393 } // namespace cuda
394 
395 
396 std::ostream& operator <<(std::ostream& stream, const cuda::kd_tree_builder_detail::SplitInfo& s)
397 {
398  stream<<"(split l/r: "<< s.left <<" "<< s.right<< " split:"<<s.split_dim<<" "<<s.split_val<<")";
399  return stream;
400 }
402 {
403 public:
404  CudaKdTreeBuilder( const thrust::device_vector<float4>& points, int max_leaf_size ) : /*out_of_space_(1,0),node_count_(1,1),*/ max_leaf_size_(max_leaf_size)
405  {
406  points_=&points;
407  int prealloc = points.size()/max_leaf_size_*16;
408  allocation_info_.resize(3);
412 
413  // std::cout<<points_->size()<<std::endl;
414 
415  child1_=new thrust::device_vector<int>(prealloc,-1);
416  parent_=new thrust::device_vector<int>(prealloc,-1);
418  s.left=0;
419  s.right=0;
420  splits_=new thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>(prealloc,s);
421  s.right=points.size();
422  (*splits_)[0]=s;
423 
424  aabb_min_=new thrust::device_vector<float4>(prealloc);
425  aabb_max_=new thrust::device_vector<float4>(prealloc);
426 
427  index_x_=new thrust::device_vector<int>(points_->size());
428  index_y_=new thrust::device_vector<int>(points_->size());
429  index_z_=new thrust::device_vector<int>(points_->size());
430 
431  owners_x_=new thrust::device_vector<int>(points_->size(),0);
432  owners_y_=new thrust::device_vector<int>(points_->size(),0);
433  owners_z_=new thrust::device_vector<int>(points_->size(),0);
434 
435  leftright_x_ = new thrust::device_vector<int>(points_->size(),0);
436  leftright_y_ = new thrust::device_vector<int>(points_->size(),0);
437  leftright_z_ = new thrust::device_vector<int>(points_->size(),0);
438 
439  tmp_index_=new thrust::device_vector<int>(points_->size());
440  tmp_owners_=new thrust::device_vector<int>(points_->size());
441  tmp_misc_=new thrust::device_vector<int>(points_->size());
442 
443  points_x_=new thrust::device_vector<float>(points_->size());
444  points_y_=new thrust::device_vector<float>(points_->size());
445  points_z_=new thrust::device_vector<float>(points_->size());
446  delete_node_info_=false;
447  }
448 
450  {
451  if( delete_node_info_ ) {
452  delete child1_;
453  delete parent_;
454  delete splits_;
455  delete aabb_min_;
456  delete aabb_max_;
457  delete index_x_;
458  }
459 
460  delete index_y_;
461  delete index_z_;
462  delete owners_x_;
463  delete owners_y_;
464  delete owners_z_;
465  delete points_x_;
466  delete points_y_;
467  delete points_z_;
468  delete leftright_x_;
469  delete leftright_y_;
470  delete leftright_z_;
471  delete tmp_index_;
472  delete tmp_owners_;
473  delete tmp_misc_;
474  }
475 
484  void buildTree()
485  {
486  // std::cout<<"buildTree()"<<std::endl;
487  // sleep(1);
488  // Util::Timer stepTimer;
489  thrust::transform( points_->begin(), points_->end(), thrust::make_zip_iterator(thrust::make_tuple(points_x_->begin(), points_y_->begin(),points_z_->begin()) ), cuda::kd_tree_builder_detail::pointxyz_to_px_py_pz() );
490 
491  thrust::counting_iterator<int> it(0);
492  thrust::copy( it, it+points_->size(), index_x_->begin() );
493 
494  thrust::copy( index_x_->begin(), index_x_->end(), index_y_->begin() );
495  thrust::copy( index_x_->begin(), index_x_->end(), index_z_->begin() );
496 
497  thrust::device_vector<float> tmpv(points_->size());
498 
499  // create sorted index list -> can be used to compute AABBs in O(1)
500  thrust::copy(points_x_->begin(), points_x_->end(), tmpv.begin());
501  thrust::sort_by_key( tmpv.begin(), tmpv.end(), index_x_->begin() );
502  thrust::copy(points_y_->begin(), points_y_->end(), tmpv.begin());
503  thrust::sort_by_key( tmpv.begin(), tmpv.end(), index_y_->begin() );
504  thrust::copy(points_z_->begin(), points_z_->end(), tmpv.begin());
505  thrust::sort_by_key( tmpv.begin(), tmpv.end(), index_z_->begin() );
506 
507 
508  (*aabb_min_)[0]=make_float4((*points_x_)[(*index_x_)[0]],(*points_y_)[(*index_y_)[0]],(*points_z_)[(*index_z_)[0]],0);
509 
510  (*aabb_max_)[0]=make_float4((*points_x_)[(*index_x_)[points_->size()-1]],(*points_y_)[(*index_y_)[points_->size()-1]],(*points_z_)[(*index_z_)[points_->size()-1]],0);
511  #ifdef PRINT_DEBUG_TIMING
512  cudaDeviceSynchronize();
513  std::cout<<" initial stuff:"<<stepTimer.elapsed()<<std::endl;
514  stepTimer.restart();
515  #endif
516  int last_node_count=0;
517  for( int i=0;; i++ ) {
519 
521  sn.node_count=thrust::raw_pointer_cast(&allocation_info_[NodeCount]);
522  sn.nodes_allocated=thrust::raw_pointer_cast(&allocation_info_[NodesAllocated]);
523  sn.out_of_space=thrust::raw_pointer_cast(&allocation_info_[OutOfSpace]);
524  sn.child1_=thrust::raw_pointer_cast(&(*child1_)[0]);
525  sn.parent_=thrust::raw_pointer_cast(&(*parent_)[0]);
526  sn.splits=thrust::raw_pointer_cast(&(*splits_)[0]);
527 
528  thrust::counting_iterator<int> cit(0);
529  thrust::for_each( thrust::make_zip_iterator(thrust::make_tuple( parent_->begin(), child1_->begin(), splits_->begin(), aabb_min_->begin(), aabb_max_->begin(), cit )),
530  thrust::make_zip_iterator(thrust::make_tuple( parent_->begin()+last_node_count, child1_->begin()+last_node_count,splits_->begin()+last_node_count, aabb_min_->begin()+last_node_count, aabb_max_->begin()+last_node_count,cit+last_node_count )),
531  sn );
532  // copy allocation info to host
533  thrust::host_vector<int> alloc_info = allocation_info_;
534 
535  if( last_node_count == alloc_info[NodeCount] ) { // no more nodes were split -> done
536  break;
537  }
538  last_node_count=alloc_info[NodeCount];
539 
540  // a node was un-splittable due to a lack of space
541  if( alloc_info[OutOfSpace]==1 ) {
542  resize_node_vectors(alloc_info[NodesAllocated]*2);
543  alloc_info[OutOfSpace]=0;
544  alloc_info[NodesAllocated]*=2;
545  allocation_info_=alloc_info;
546  }
547  #ifdef PRINT_DEBUG_TIMING
548  cudaDeviceSynchronize();
549  std::cout<<" node split:"<<stepTimer.elapsed()<<std::endl;
550  stepTimer.restart();
551  #endif
552 
553  // foreach point: point was in node that was split?move it to child (leaf) node : do nothing
554  cuda::kd_tree_builder_detail::MovePointsToChildNodes sno( thrust::raw_pointer_cast(&(*child1_)[0]),
555  thrust::raw_pointer_cast(&(*splits_)[0]),
556  thrust::raw_pointer_cast(&(*points_x_)[0]),
557  thrust::raw_pointer_cast(&(*points_y_)[0]),
558  thrust::raw_pointer_cast(&(*points_z_)[0]),
559  thrust::raw_pointer_cast(&(*owners_x_)[0]),
560  thrust::raw_pointer_cast(&(*owners_y_)[0]),
561  thrust::raw_pointer_cast(&(*owners_z_)[0]),
562  thrust::raw_pointer_cast(&(*leftright_x_)[0]),
563  thrust::raw_pointer_cast(&(*leftright_y_)[0]),
564  thrust::raw_pointer_cast(&(*leftright_z_)[0])
565  );
566  thrust::counting_iterator<int> ci0(0);
567  thrust::for_each( thrust::make_zip_iterator( thrust::make_tuple( ci0, index_x_->begin(), index_y_->begin(), index_z_->begin()) ),
568  thrust::make_zip_iterator( thrust::make_tuple( ci0+points_->size(), index_x_->end(), index_y_->end(), index_z_->end()) ),sno );
569 
570  #ifdef PRINT_DEBUG_TIMING
571  cudaDeviceSynchronize();
572  std::cout<<" set new owners:"<<stepTimer.elapsed()<<std::endl;
573  stepTimer.restart();
574  #endif
575 
576  // move points around so that each leaf node's points are continuous
584 
585  #ifdef PRINT_DEBUG_TIMING
586  cudaDeviceSynchronize();
587  std::cout<<" split:"<<stepTimer.elapsed()<<std::endl;
588  stepTimer.restart();
589  #endif
590  // calculate new AABB etc
592  #ifdef PRINT_DEBUG_TIMING
593  cudaDeviceSynchronize();
594  std::cout<<" update_leftright_and_aabb:"<<stepTimer.elapsed()<<std::endl;
595  stepTimer.restart();
596  print_vector(node_count_);
597  #endif
598 
599  }
600  }
601 
602  template<class Distance>
603  friend class KDTreeCuda3dIndex;
604 
605 protected:
606 
607 
609  void
610  update_leftright_and_aabb( const thrust::device_vector<float>& x, const thrust::device_vector<float>& y,const thrust::device_vector<float>& z,
611  const thrust::device_vector<int>& ix, const thrust::device_vector<int>& iy,const thrust::device_vector<int>& iz,
612  const thrust::device_vector<int>& owners,
613  thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>& splits, thrust::device_vector<float4>& aabbMin,thrust::device_vector<float4>& aabbMax)
614  {
615  thrust::device_vector<int>* labelsUnique=tmp_owners_;
616  thrust::device_vector<int>* countsUnique=tmp_index_;
617  // assume: points of each node are continuous in the array
618 
619  // find which nodes are here, and where each node's points begin and end
620  int unique_labels = thrust::unique_by_key_copy( owners.begin(), owners.end(), thrust::counting_iterator<int>(0), labelsUnique->begin(), countsUnique->begin()).first - labelsUnique->begin();
621 
622  // update the info
624  s.maxPoints=x.size();
625  s.nElements=unique_labels;
626  s.nodes=thrust::raw_pointer_cast(&(splits[0]));
627  s.counts=thrust::raw_pointer_cast(&( (*countsUnique)[0]));
628  s.labels=thrust::raw_pointer_cast(&( (*labelsUnique)[0]));
629  s.x=thrust::raw_pointer_cast(&x[0]);
630  s.y=thrust::raw_pointer_cast(&y[0]);
631  s.z=thrust::raw_pointer_cast(&z[0]);
632  s.ix=thrust::raw_pointer_cast(&ix[0]);
633  s.iy=thrust::raw_pointer_cast(&iy[0]);
634  s.iz=thrust::raw_pointer_cast(&iz[0]);
635  s.aabbMin=thrust::raw_pointer_cast(&aabbMin[0]);
636  s.aabbMax=thrust::raw_pointer_cast(&aabbMax[0]);
637 
638  thrust::counting_iterator<int> it(0);
639  thrust::for_each(it, it+unique_labels, s);
640  }
641 
649  void separate_left_and_right_children( thrust::device_vector<int>& key_in, thrust::device_vector<int>& val_in, thrust::device_vector<int>& key_out, thrust::device_vector<int>& val_out, thrust::device_vector<int>& left_right_marks, bool scatter_val_out=true )
650  {
651  thrust::device_vector<int>* f_tmp = &val_out;
652  thrust::device_vector<int>* addr_tmp = tmp_misc_;
653 
654  thrust::exclusive_scan( /*thrust::make_transform_iterator(*/ left_right_marks.begin() /*,cuda::kd_tree_builder_detail::IsEven*/
655  /*())*/, /*thrust::make_transform_iterator(*/ left_right_marks.end() /*,cuda::kd_tree_builder_detail::IsEven*/
656  /*())*/, f_tmp->begin() );
658  sa.val_=thrust::raw_pointer_cast(&left_right_marks[0]);
659  sa.f_=thrust::raw_pointer_cast(&(*f_tmp)[0]);
660  sa.npoints_=key_in.size();
661  thrust::counting_iterator<int> it(0);
662  thrust::transform(it, it+val_in.size(), addr_tmp->begin(), sa);
663 
664  thrust::scatter(key_in.begin(), key_in.end(), addr_tmp->begin(), key_out.begin());
665  if( scatter_val_out ) thrust::scatter(val_in.begin(), val_in.end(), addr_tmp->begin(), val_out.begin());
666  }
667 
670  void resize_node_vectors( size_t new_size )
671  {
672  size_t add = new_size - child1_->size();
673  child1_->insert(child1_->end(), add, -1);
674  parent_->insert(parent_->end(), add, -1);
676  s.left=0;
677  s.right=0;
678  splits_->insert(splits_->end(), add, s);
679  float4 f;
680  aabb_min_->insert(aabb_min_->end(), add, f);
681  aabb_max_->insert(aabb_max_->end(), add, f);
682  }
683 
684 
685  const thrust::device_vector<float4>* points_;
686 
687  // tree data, those are stored per-node
688 
691  thrust::device_vector<int>* child1_;
693  thrust::device_vector<int>* parent_;
695  thrust::device_vector<cuda::kd_tree_builder_detail::SplitInfo>* splits_;
697  thrust::device_vector<float4>* aabb_min_;
699  thrust::device_vector<float4>* aabb_max_;
700 
702  {
705  OutOfSpace=2
706  };
707  // those were put into a single vector of 3 elements so that only one mem transfer will be needed for all three of them
708  // thrust::device_vector<int> out_of_space_;
709  // thrust::device_vector<int> node_count_;
710  // thrust::device_vector<int> nodes_allocated_;
711  thrust::device_vector<int> allocation_info_;
712 
714 
715  // coordinate values of the points
716  thrust::device_vector<float>* points_x_, * points_y_, * points_z_;
717  // indices
718  thrust::device_vector<int>* index_x_, * index_y_, * index_z_;
719  // owner node
720  thrust::device_vector<int>* owners_x_, * owners_y_, * owners_z_;
721  // contains info about whether a point was partitioned to the left or right child after a split
722  thrust::device_vector<int>* leftright_x_, * leftright_y_, * leftright_z_;
723  thrust::device_vector<int>* tmp_index_, * tmp_owners_, * tmp_misc_;
725 };
726 
727 
728 } // namespace flann
729 #endif
int points
bool copy
Definition: VtkUtils.cpp:74
void resize_node_vectors(size_t new_size)
thrust::device_vector< float4 > * aabb_max_
max aabb value of each node
thrust::device_vector< int > * index_x_
thrust::device_vector< int > * leftright_z_
thrust::device_vector< int > * owners_y_
thrust::device_vector< int > * tmp_index_
thrust::device_vector< int > * index_z_
void separate_left_and_right_children(thrust::device_vector< int > &key_in, thrust::device_vector< int > &val_in, thrust::device_vector< int > &key_out, thrust::device_vector< int > &val_out, thrust::device_vector< int > &left_right_marks, bool scatter_val_out=true)
thrust::device_vector< int > * index_y_
thrust::device_vector< int > * leftright_x_
thrust::device_vector< int > * owners_z_
thrust::device_vector< float > * points_x_
const thrust::device_vector< float4 > * points_
thrust::device_vector< float4 > * aabb_min_
min aabb value of each node
thrust::device_vector< int > allocation_info_
thrust::device_vector< float > * points_y_
thrust::device_vector< int > * tmp_owners_
thrust::device_vector< int > * tmp_misc_
CudaKdTreeBuilder(const thrust::device_vector< float4 > &points, int max_leaf_size)
thrust::device_vector< float > * points_z_
thrust::device_vector< int > * leftright_y_
void update_leftright_and_aabb(const thrust::device_vector< float > &x, const thrust::device_vector< float > &y, const thrust::device_vector< float > &z, const thrust::device_vector< int > &ix, const thrust::device_vector< int > &iy, const thrust::device_vector< int > &iz, const thrust::device_vector< int > &owners, thrust::device_vector< cuda::kd_tree_builder_detail::SplitInfo > &splits, thrust::device_vector< float4 > &aabbMin, thrust::device_vector< float4 > &aabbMax)
takes the partitioned nodes, and sets the left-/right info of leaf nodes, as well as the AABBs
thrust::device_vector< int > * child1_
thrust::device_vector< cuda::kd_tree_builder_detail::SplitInfo > * splits_
split info (dim/value or left/right pointers)
thrust::device_vector< int > * parent_
parent node of each node
thrust::device_vector< int > * owners_x_
__host__ __device__ float4 make_float4(float s)
Definition: cutil_math.h:182
QTextStream & endl(QTextStream &stream)
Definition: QtCompat.h:718
__host__ __device__ float get_value_by_index(const float4 &f, int i)
std::ostream & operator<<(std::ostream &stream, const cuda::kd_tree_builder_detail::SplitInfo &s)
void swap(cloudViewer::core::SmallVectorImpl< T > &LHS, cloudViewer::core::SmallVectorImpl< T > &RHS)
Implement std::swap in terms of SmallVector swap.
Definition: SmallVector.h:1370
MovePointsToChildNodes(int *child1, SplitInfo *splits, float *x, float *y, float *z, int *ox, int *oy, int *oz, int *lrx, int *lry, int *lrz)
__device__ void operator()(const thrust::tuple< int, int, int, int > &data)
__host__ __device__ bool operator()(const thrust::tuple< int, int > &i)
used to update the left/right pointers and aabb infos after the node splits
__device__ void operator()(thrust::tuple< int &, int &, SplitInfo &, float4 &, float4 &, int > node)
__device__ thrust::tuple< float, float, float > operator()(const float4 &val)