10 #include <tbb/parallel_for.h>
14 #include <nanoflann.hpp>
25 template <
int METRIC,
class TReal,
class TIndex>
32 const TReal *
const data_ptr)
54 template <
int M,
typename fake =
void>
57 template <
typename fake>
59 typedef nanoflann::L2_Adaptor<TReal, DataAdaptor, TReal>
adaptor_t;
62 template <
typename fake>
64 typedef nanoflann::L1_Adaptor<TReal, DataAdaptor, TReal>
adaptor_t;
68 typedef nanoflann::KDTreeSingleIndexAdaptor<
77 const TReal *data_ptr) {
89 template <
class T,
class TIndex,
int METRIC>
90 void _BuildKdTree(
size_t num_points,
98 template <
class T,
class TIndex,
class OUTPUT_ALLOCATOR,
int METRIC>
100 int64_t *query_neighbors_row_splits,
104 const T *
const queries,
105 const size_t dimension,
107 bool ignore_query_point,
108 bool return_distances,
109 OUTPUT_ALLOCATOR &output_allocator) {
111 if (num_queries == 0 || num_points == 0 || holder ==
nullptr) {
112 std::fill(query_neighbors_row_splits,
113 query_neighbors_row_splits + num_queries + 1, 0);
115 output_allocator.AllocIndices(&indices_ptr, 0);
118 output_allocator.AllocDistances(&distances_ptr, 0);
122 std::vector<std::vector<TIndex>> neighbors_indices(num_queries);
123 std::vector<std::vector<T>> neighbors_distances(num_queries);
124 std::vector<uint32_t> neighbors_count(num_queries, 0);
131 tbb::blocked_range<size_t>(0, num_queries),
132 [&](
const tbb::blocked_range<size_t> &r) {
133 std::vector<TIndex> result_indices(knn);
134 std::vector<T> result_distances(knn);
135 for (
size_t i = r.begin(); i != r.end(); ++i) {
136 size_t num_valid = holder_->index_->knnSearch(
137 &queries[i * dimension], knn, result_indices.data(),
138 result_distances.data());
140 int num_neighbors = 0;
141 for (
size_t valid_i = 0; valid_i < num_valid; ++valid_i) {
142 TIndex idx = result_indices[valid_i];
143 if (ignore_query_point &&
144 std::equal(&queries[i * dimension],
145 &queries[i * dimension] + dimension,
146 &
points[idx * dimension])) {
149 neighbors_indices[i].push_back(idx);
150 if (return_distances) {
151 T
dist = result_distances[valid_i];
152 neighbors_distances[i].push_back(
dist);
156 neighbors_count[i] = num_neighbors;
160 query_neighbors_row_splits[0] = 0;
162 neighbors_count.data() + neighbors_count.size(),
163 query_neighbors_row_splits + 1);
165 int64_t num_indices = query_neighbors_row_splits[num_queries];
168 output_allocator.AllocIndices(&indices_ptr, num_indices);
170 if (return_distances)
171 output_allocator.AllocDistances(&distances_ptr, num_indices);
173 output_allocator.AllocDistances(&distances_ptr, 0);
175 std::fill(neighbors_count.begin(), neighbors_count.end(), 0);
178 tbb::parallel_for(tbb::blocked_range<size_t>(0, num_queries),
179 [&](
const tbb::blocked_range<size_t> &r) {
180 for (
size_t i = r.begin(); i != r.end(); ++i) {
181 int64_t start_idx = query_neighbors_row_splits[i];
182 std::copy(neighbors_indices[i].begin(),
183 neighbors_indices[i].end(),
184 &indices_ptr[start_idx]);
186 if (return_distances) {
187 std::copy(neighbors_distances[i].begin(),
188 neighbors_distances[i].end(),
189 &distances_ptr[start_idx]);
195 template <
class T,
class TIndex,
class OUTPUT_ALLOCATOR,
int METRIC>
196 void _RadiusSearchCPU(NanoFlannIndexHolderBase *holder,
197 int64_t *query_neighbors_row_splits,
201 const T *
const queries,
202 const size_t dimension,
203 const T *
const radii,
204 bool ignore_query_point,
205 bool return_distances,
206 bool normalize_distances,
208 OUTPUT_ALLOCATOR &output_allocator) {
209 if (num_queries == 0 || num_points == 0 || holder ==
nullptr) {
210 std::fill(query_neighbors_row_splits,
211 query_neighbors_row_splits + num_queries + 1, 0);
213 output_allocator.AllocIndices(&indices_ptr, 0);
216 output_allocator.AllocDistances(&distances_ptr, 0);
220 std::vector<std::vector<TIndex>> neighbors_indices(num_queries);
221 std::vector<std::vector<T>> neighbors_distances(num_queries);
222 std::vector<uint32_t> neighbors_count(num_queries, 0);
224 nanoflann::SearchParameters
params;
228 static_cast<NanoFlannIndexHolder<METRIC, T, TIndex> *
>(holder);
230 tbb::blocked_range<size_t>(0, num_queries),
231 [&](
const tbb::blocked_range<size_t> &r) {
232 std::vector<nanoflann::ResultItem<TIndex, T>> search_result;
233 for (
size_t i = r.begin(); i != r.end(); ++i) {
236 radius = radius * radius;
239 holder_->index_->radiusSearch(&queries[i * dimension],
240 radius, search_result,
243 int num_neighbors = 0;
244 for (
const auto &idx_dist : search_result) {
245 if (ignore_query_point &&
246 std::equal(&queries[i * dimension],
247 &queries[i * dimension] + dimension,
248 &
points[
static_cast<TIndex
>(
253 neighbors_indices[i].push_back(
254 static_cast<TIndex
>(idx_dist.first));
255 if (return_distances) {
256 neighbors_distances[i].push_back(idx_dist.second);
260 neighbors_count[i] = num_neighbors;
264 query_neighbors_row_splits[0] = 0;
266 neighbors_count.data() + neighbors_count.size(),
267 query_neighbors_row_splits + 1);
269 int64_t num_indices = query_neighbors_row_splits[num_queries];
272 output_allocator.AllocIndices(&indices_ptr, num_indices);
274 if (return_distances)
275 output_allocator.AllocDistances(&distances_ptr, num_indices);
277 output_allocator.AllocDistances(&distances_ptr, 0);
279 std::fill(neighbors_count.begin(), neighbors_count.end(), 0);
283 tbb::blocked_range<size_t>(0, num_queries),
284 [&](
const tbb::blocked_range<size_t> &r) {
285 for (
size_t i = r.begin(); i != r.end(); ++i) {
286 int64_t start_idx = query_neighbors_row_splits[i];
287 std::copy(neighbors_indices[i].begin(),
288 neighbors_indices[i].end(),
289 &indices_ptr[start_idx]);
290 if (return_distances) {
291 std::transform(neighbors_distances[i].begin(),
292 neighbors_distances[i].end(),
293 &distances_ptr[start_idx], [&](T dist) {
295 if (normalize_distances) {
297 d /= (radii[i] * radii[i]);
309 template <
class T,
class TIndex,
class OUTPUT_ALLOCATOR,
int METRIC>
310 void _HybridSearchCPU(NanoFlannIndexHolderBase *holder,
314 const T *
const queries,
315 const size_t dimension,
318 bool ignore_query_point,
319 bool return_distances,
320 OUTPUT_ALLOCATOR &output_allocator) {
321 if (num_queries == 0 || num_points == 0 || holder ==
nullptr) {
322 TIndex *indices_ptr, *counts_ptr;
323 output_allocator.AllocIndices(&indices_ptr, 0);
324 output_allocator.AllocCounts(&counts_ptr, 0);
327 output_allocator.AllocDistances(&distances_ptr, 0);
331 T radius_squared = radius * radius;
332 TIndex *indices_ptr, *counts_ptr;
335 size_t num_indices =
static_cast<size_t>(max_knn) * num_queries;
336 output_allocator.AllocIndices(&indices_ptr, num_indices);
337 output_allocator.AllocDistances(&distances_ptr, num_indices);
338 output_allocator.AllocCounts(&counts_ptr, num_queries);
340 nanoflann::SearchParameters
params;
344 static_cast<NanoFlannIndexHolder<METRIC, T, TIndex> *
>(holder);
346 tbb::blocked_range<size_t>(0, num_queries),
347 [&](
const tbb::blocked_range<size_t> &r) {
348 std::vector<nanoflann::ResultItem<TIndex, T>> ret_matches;
349 for (
size_t i = r.begin(); i != r.end(); ++i) {
350 size_t num_results = holder_->index_->radiusSearch(
351 &queries[i * dimension], radius_squared,
352 ret_matches, params);
353 ret_matches.resize(num_results);
355 TIndex count_i = static_cast<TIndex>(num_results);
356 count_i = count_i < max_knn ? count_i : max_knn;
357 counts_ptr[i] = count_i;
359 int neighbor_idx = 0;
360 for (auto it = ret_matches.begin();
361 it < ret_matches.end() && neighbor_idx < max_knn;
362 it++, neighbor_idx++) {
363 indices_ptr[i * max_knn + neighbor_idx] =
364 static_cast<TIndex>(it->first);
365 distances_ptr[i * max_knn + neighbor_idx] = it->second;
368 while (neighbor_idx < max_knn) {
369 indices_ptr[i * max_knn + neighbor_idx] = -1;
370 distances_ptr[i * max_knn + neighbor_idx] = 0;
393 template <
class T,
class TIndex>
394 std::unique_ptr<NanoFlannIndexHolderBase>
BuildKdTree(
size_t num_points,
399 #define FN_PARAMETERS num_points, points, dimension, &holder
401 #define CALL_TEMPLATE(METRIC) \
402 if (METRIC == metric) { \
403 _BuildKdTree<T, TIndex, METRIC>(FN_PARAMETERS); \
406 #define CALL_TEMPLATE2 \
413 #undef CALL_TEMPLATE2
416 return std::unique_ptr<NanoFlannIndexHolderBase>(holder);
471 template <
class T,
class TIndex,
class OUTPUT_ALLOCATOR>
473 int64_t *query_neighbors_row_splits,
477 const T *
const queries,
478 const size_t dimension,
481 bool ignore_query_point,
482 bool return_distances,
483 OUTPUT_ALLOCATOR &output_allocator) {
484 #define FN_PARAMETERS \
485 holder, query_neighbors_row_splits, num_points, points, num_queries, \
486 queries, dimension, knn, ignore_query_point, return_distances, \
489 #define CALL_TEMPLATE(METRIC) \
490 if (METRIC == metric) { \
491 _KnnSearchCPU<T, TIndex, OUTPUT_ALLOCATOR, METRIC>(FN_PARAMETERS); \
494 #define CALL_TEMPLATE2 \
501 #undef CALL_TEMPLATE2
565 template <
class T,
class TIndex,
class OUTPUT_ALLOCATOR>
567 int64_t *query_neighbors_row_splits,
571 const T *
const queries,
572 const size_t dimension,
573 const T *
const radii,
575 bool ignore_query_point,
576 bool return_distances,
577 bool normalize_distances,
579 OUTPUT_ALLOCATOR &output_allocator) {
580 #define FN_PARAMETERS \
581 holder, query_neighbors_row_splits, num_points, points, num_queries, \
582 queries, dimension, radii, ignore_query_point, return_distances, \
583 normalize_distances, sort, output_allocator
585 #define CALL_TEMPLATE(METRIC) \
586 if (METRIC == metric) { \
587 _RadiusSearchCPU<T, TIndex, OUTPUT_ALLOCATOR, METRIC>(FN_PARAMETERS); \
590 #define CALL_TEMPLATE2 \
597 #undef CALL_TEMPLATE2
653 template <
class T,
class TIndex,
class OUTPUT_ALLOCATOR>
658 const T *
const queries,
659 const size_t dimension,
663 bool ignore_query_point,
664 bool return_distances,
665 OUTPUT_ALLOCATOR &output_allocator) {
666 #define FN_PARAMETERS \
667 holder, num_points, points, num_queries, queries, dimension, radius, \
668 max_knn, ignore_query_point, return_distances, output_allocator
670 #define CALL_TEMPLATE(METRIC) \
671 if (METRIC == metric) { \
672 _HybridSearchCPU<T, TIndex, OUTPUT_ALLOCATOR, METRIC>(FN_PARAMETERS); \
675 #define CALL_TEMPLATE2 \
682 #undef CALL_TEMPLATE2
cmdLineReadable * params[]
Helper functions for the ml ops.
static double dist(double x1, double y1, double x2, double y2)
void HybridSearchCPU(NanoFlannIndexHolderBase *holder, size_t num_points, const T *const points, size_t num_queries, const T *const queries, const size_t dimension, const T radius, const int max_knn, const Metric metric, bool ignore_query_point, bool return_distances, OUTPUT_ALLOCATOR &output_allocator)
void RadiusSearchCPU(NanoFlannIndexHolderBase *holder, int64_t *query_neighbors_row_splits, size_t num_points, const T *const points, size_t num_queries, const T *const queries, const size_t dimension, const T *const radii, const Metric metric, bool ignore_query_point, bool return_distances, bool normalize_distances, bool sort, OUTPUT_ALLOCATOR &output_allocator)
void KnnSearchCPU(NanoFlannIndexHolderBase *holder, int64_t *query_neighbors_row_splits, size_t num_points, const T *const points, size_t num_queries, const T *const queries, const size_t dimension, int knn, const Metric metric, bool ignore_query_point, bool return_distances, OUTPUT_ALLOCATOR &output_allocator)
std::unique_ptr< NanoFlannIndexHolderBase > BuildKdTree(size_t num_points, const T *const points, size_t dimension, const Metric metric)
void InclusivePrefixSum(const Tin *first, const Tin *last, Tout *out)
Generic file read and write utility for python interface.
Base struct for NanoFlann index holder.
const TReal *const data_ptr_
size_t kdtree_get_point_count() const
TReal kdtree_get_pt(const size_t idx, const size_t dim) const
DataAdaptor(size_t dataset_size, int dimension, const TReal *const data_ptr)
bool kdtree_get_bbox(BBOX &) const
nanoflann::L1_Adaptor< TReal, DataAdaptor, TReal > adaptor_t
nanoflann::L2_Adaptor< TReal, DataAdaptor, TReal > adaptor_t
NanoFlannIndexHolder(size_t dataset_size, int dimension, const TReal *data_ptr)
nanoflann::KDTreeSingleIndexAdaptor< typename SelectNanoflannAdaptor< METRIC >::adaptor_t, DataAdaptor, -1, TIndex > KDTree_t
typedef for KDtree.
std::unique_ptr< DataAdaptor > adaptor_
std::unique_ptr< KDTree_t > index_