30 #ifndef FLANN_AUTOTUNED_INDEX_H_
31 #define FLANN_AUTOTUNED_INDEX_H_
49 template<
typename Distance>
50 inline NNIndex<Distance>*
57 AutotunedIndexParams(
float target_precision = 0.8,
float build_weight = 0.01,
float memory_weight = 0,
float sample_fraction = 0.1)
61 (*this)[
"target_precision"] = target_precision;
63 (*this)[
"build_weight"] = build_weight;
65 (*this)[
"memory_weight"] = memory_weight;
67 (*this)[
"sample_fraction"] = sample_fraction;
72 template <
typename Distance>
104 bestParams_(other.bestParams_),
105 bestSearchParams_(other.bestSearchParams_),
106 speedup_(other.speedup_),
107 dataset_(other.dataset_),
108 target_precision_(other.target_precision_),
109 build_weight_(other.build_weight_),
110 memory_weight_(other.memory_weight_),
111 sample_fraction_(other.sample_fraction_)
113 bestIndex_ = other.bestIndex_->clone();
137 bestParams_ = estimateBuildParams();
138 Logger::info(
"----------------------------------------------------\n");
142 Logger::info(
"----------------------------------------------------\n");
144 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,
"algorithm");
146 bestIndex_->buildIndex();
147 speedup_ = estimateSearchParams(bestSearchParams_);
148 Logger::info(
"----------------------------------------------------\n");
152 Logger::info(
"----------------------------------------------------\n");
153 bestParams_[
"search_params"] = bestSearchParams_;
154 bestParams_[
"speedup"] = speedup_;
167 bestIndex_->addPoints(
points, rebuild_threshold);
174 bestIndex_->removePoint(
id);
179 template<
typename Archive>
186 ar & target_precision_;
189 ar & sample_fraction_;
192 if (Archive::is_saving::value) {
193 index_type = get_param<flann_algorithm_t>(bestParams_,
"algorithm");
196 ar & bestSearchParams_.
checks;
198 if (Archive::is_loading::value) {
199 bestParams_[
"algorithm"] = index_type;
216 bestIndex_->saveIndex(stream);
227 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,
"algorithm");
229 bestIndex_->loadIndex(stream);
241 return bestIndex_->knnSearch(queries, indices, dists, knn, autotuned_params);
244 return bestIndex_->knnSearch(queries, indices, dists, knn,
params);
249 std::vector< std::vector<size_t> >& indices,
250 std::vector<std::vector<DistanceType> >& dists,
257 return bestIndex_->knnSearch(queries, indices, dists, knn, autotuned_params);
260 return bestIndex_->knnSearch(queries, indices, dists, knn,
params);
274 return bestIndex_->radiusSearch(queries, indices, dists, radius, autotuned_params);
277 return bestIndex_->radiusSearch(queries, indices, dists, radius,
params);
282 std::vector< std::vector<size_t> >& indices,
283 std::vector<std::vector<DistanceType> >& dists,
290 return bestIndex_->radiusSearch(queries, indices, dists, radius, autotuned_params);
293 return bestIndex_->radiusSearch(queries, indices, dists, radius,
params);
315 return bestSearchParams_;
329 return bestIndex_->size();
337 return bestIndex_->veclen();
345 return bestIndex_->usedMemory();
371 float searchTimeCost;
378 void evaluate_kmeans(CostData& cost)
384 Logger::info(
"KMeansTree using params: max_iterations=%d, branching=%d\n",
385 get_param<int>(cost.params,
"iterations"),
386 get_param<int>(cost.params,
"branching"));
387 KMeansIndex<Distance> kmeans(sampledDataset_, cost.params,
distance_);
392 float buildTime = (float)t.value;
397 float datasetMemory = float(sampledDataset_.
rows * sampledDataset_.
cols *
sizeof(
float));
398 cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
399 cost.searchTimeCost = searchTime;
400 cost.buildTimeCost = buildTime;
401 Logger::info(
"KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
405 void evaluate_kdtree(CostData& cost)
411 Logger::info(
"KDTree using params: trees=%d\n", get_param<int>(cost.params,
"trees"));
412 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params,
distance_);
417 float buildTime = (float)t.value;
422 float datasetMemory = float(sampledDataset_.
rows * sampledDataset_.
cols *
sizeof(
float));
423 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
424 cost.searchTimeCost = searchTime;
425 cost.buildTimeCost = buildTime;
426 Logger::info(
"KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
478 void optimizeKMeans(std::vector<CostData>& costs)
480 Logger::info(
"KMEANS, Step 1: Exploring parameter space\n");
483 int maxIterations[] = { 1, 5, 10, 15 };
484 int branchingFactors[] = { 16, 32, 64, 128, 256 };
487 costs.reserve(costs.size() + kmeansParamSpaceSize);
495 cost.params[
"iterations"] = maxIterations[i];
496 cost.params[
"branching"] = branchingFactors[j];
498 evaluate_kmeans(cost);
499 costs.push_back(cost);
526 void optimizeKDTree(std::vector<CostData>& costs)
528 Logger::info(
"KD-TREE, Step 1: Exploring parameter space\n");
531 int testTrees[] = { 1, 4, 8, 16, 32, 64, 128};
537 cost.params[
"trees"] = testTrees[i];
539 evaluate_kdtree(cost);
540 costs.push_back(cost);
570 std::vector<CostData> costs;
572 int sampleSize = int(sample_fraction_ * dataset_.
rows);
573 int testSampleSize =
std::min(sampleSize / 10, 1000);
575 Logger::info(
"Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.
rows, sampleSize, testSampleSize, target_precision_);
579 if (testSampleSize < 10) {
581 return LinearIndexParams();
587 testDataset_ =
random_sample(sampledDataset_, testSampleSize,
true);
591 gt_matches_ = Matrix<size_t>(
new size_t[testDataset_.
rows], testDataset_.
rows, 1);
595 while (t.value<0.2) {
598 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0,
distance_);
602 CostData linear_cost;
603 linear_cost.searchTimeCost = (float)t.value/repeats;
604 linear_cost.buildTimeCost = 0;
605 linear_cost.memoryCost = 0;
608 costs.push_back(linear_cost);
613 optimizeKMeans(costs);
614 optimizeKDTree(costs);
616 float bestTimeCost = costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost;
617 for (
size_t i = 0; i < costs.size(); ++i) {
618 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
620 if (timeCost < bestTimeCost) {
621 bestTimeCost = timeCost;
627 if (bestTimeCost > 0) {
628 float bestCost = (costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost) / bestTimeCost;
629 for (
size_t i = 0; i < costs.size(); ++i) {
630 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
631 memory_weight_ * costs[i].memoryCost;
633 if (crtCost < bestCost) {
635 bestParams = costs[i].params;
641 delete[] gt_matches_.
ptr();
642 delete[] testDataset_.
ptr();
643 delete[] sampledDataset_.
ptr();
655 float estimateSearchParams(SearchParams& searchParams)
658 const size_t SAMPLE_COUNT = 1000;
660 assert(bestIndex_ !=
NULL);
664 int samples = (int)
std::min(dataset_.
rows / 10, SAMPLE_COUNT);
666 Matrix<ElementType> testDataset =
random_sample(dataset_, samples);
671 Matrix<size_t> gt_matches(
new size_t[testDataset.rows], testDataset.rows, 1);
675 while (t.value<0.2) {
678 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1,
distance_);
681 float linear = (float)t.value/repeats;
689 Logger::info(
"KMeans algorithm, estimating cluster border factor\n");
690 KMeansIndex<Distance>* kmeans =
static_cast<KMeansIndex<Distance>*
>(bestIndex_);
691 float bestSearchTime = -1;
692 float best_cb_index = -1;
693 int best_checks = -1;
694 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
695 kmeans->set_cb_index(cb_index);
697 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
698 bestSearchTime = searchTime;
699 best_cb_index = cb_index;
700 best_checks = checks;
703 searchTime = bestSearchTime;
704 cb_index = best_cb_index;
705 checks = best_checks;
707 kmeans->set_cb_index(best_cb_index);
709 bestParams_[
"cb_index"] = cb_index;
715 Logger::info(
"Required number of checks: %d \n", checks);
716 searchParams.checks = checks;
718 speedup =
linear / searchTime;
720 delete[] gt_matches.ptr();
721 delete[] testDataset.ptr();
732 std::swap(bestParams_, other.bestParams_);
733 std::swap(bestSearchParams_, other.bestSearchParams_);
736 std::swap(target_precision_, other.target_precision_);
737 std::swap(build_weight_, other.build_weight_);
738 std::swap(memory_weight_, other.memory_weight_);
739 std::swap(sample_fraction_, other.sample_fraction_);
743 NNIndex<Distance>* bestIndex_;
746 SearchParams bestSearchParams_;
748 Matrix<ElementType> sampledDataset_;
749 Matrix<ElementType> testDataset_;
750 Matrix<size_t> gt_matches_;
757 Matrix<ElementType> dataset_;
762 float target_precision_;
764 float memory_weight_;
765 float sample_fraction_;
double Distance(const Point3D< Real > &p1, const Point3D< Real > &p2)
cmdLineReadable * params[]
void loadIndex(FILE *stream)
void buildIndex(const Matrix< ElementType > &dataset)
Distance::ResultType DistanceType
FLANN_DEPRECATED float getSpeedup() const
void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams) const
int radiusSearch(const Matrix< ElementType > &queries, std::vector< std::vector< size_t > > &indices, std::vector< std::vector< DistanceType > > &dists, DistanceType radius, const SearchParams ¶ms) const
AutotunedIndex(const Matrix< ElementType > &inputData, const IndexParams ¶ms=AutotunedIndexParams(), Distance d=Distance())
IndexParams getParameters() const
NNIndex< Distance > BaseClass
void addPoints(const Matrix< ElementType > &points, float rebuild_threshold=2)
Incrementally add points to the index.
Distance::ElementType ElementType
BaseClass * clone() const
int knnSearch(const Matrix< ElementType > &queries, std::vector< std::vector< size_t > > &indices, std::vector< std::vector< DistanceType > > &dists, size_t knn, const SearchParams ¶ms) const
AutotunedIndex & operator=(AutotunedIndex other)
bool needs_kdtree_distance
virtual ~AutotunedIndex()
void removePoint(size_t id)
void serialize(Archive &ar)
AutotunedIndex(const AutotunedIndex &other)
AutotunedIndex(const IndexParams ¶ms=AutotunedIndexParams(), Distance d=Distance())
FLANN_DEPRECATED SearchParams getSearchParameters() const
void saveIndex(FILE *stream)
flann_algorithm_t getType() const
int radiusSearch(const Matrix< ElementType > &queries, Matrix< size_t > &indices, Matrix< DistanceType > &dists, DistanceType radius, const SearchParams ¶ms) const
AutotunedIndex< Distance > IndexType
int knnSearch(const Matrix< ElementType > &queries, Matrix< size_t > &indices, Matrix< DistanceType > &dists, size_t knn, const SearchParams ¶ms) const
Perform k-nearest neighbor search.
static int debug(const char *fmt,...)
static int info(const char *fmt,...)
void swap(NNIndex &other)
IndexParams index_params_
#define FLANN_ARRAY_LEN(a)
T get_param(const IndexParams ¶ms, std::string name, const T &default_value)
Matrix< T > random_sample(Matrix< T > &srcMatrix, size_t size, bool remove=false)
void print_params(const IndexParams ¶ms)
float test_index_precision(Index &index, const Matrix< typename Distance::ElementType > &inputData, const Matrix< typename Distance::ElementType > &testData, const Matrix< size_t > &matches, float precision, int &checks, const Distance &distance, int nn=1, int skipMatches=0)
NNIndex< Distance > * create_index_by_type(const flann_algorithm_t index_type, const Matrix< typename Distance::ElementType > &dataset, const IndexParams ¶ms, const Distance &distance)
std::map< std::string, any > IndexParams
void swap(cloudViewer::core::SmallVectorImpl< T > &LHS, cloudViewer::core::SmallVectorImpl< T > &RHS)
Implement std::swap in terms of SmallVector swap.
#define USING_BASECLASS_SYMBOLS
AutotunedIndexParams(float target_precision=0.8, float build_weight=0.01, float memory_weight=0, float sample_fraction=0.1)