ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
ParallelScan.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <tbb/parallel_for.h>
11 #include <tbb/parallel_scan.h>
12 
13 #if TBB_INTERFACE_VERSION >= 20000
14 
15 // Check if the C++ standard library implements parallel algorithms
16 // and use this over parallelstl to avoid conflicts.
17 // Clang does not implement it so far, so checking for C++17 is not sufficient.
18 #ifdef __cpp_lib_parallel_algorithm
19 #include <execution>
20 #include <numeric>
21 #else
22 #include <pstl/execution>
23 #include <pstl/numeric>
24 
25 // parallelstl incorrectly assumes MSVC to unconditionally implement
26 // parallel algorithms even if __cpp_lib_parallel_algorithm is not defined.
27 // So manually include the header which pulls all "pstl::execution" definitions
28 // into the "std" namespace.
29 #if __PSTL_CPP17_EXECUTION_POLICIES_PRESENT
30 #include <pstl/internal/glue_execution_defs.h>
31 #endif
32 
33 #endif
34 #endif
35 
36 namespace cloudViewer {
37 namespace utility {
38 
39 namespace {
40 template <class Tin, class Tout>
41 class ScanSumBody {
42  Tout sum;
43  const Tin* in;
44  Tout* const out;
45 
46 public:
47  ScanSumBody(Tout* out_, const Tin* in_) : sum(0), in(in_), out(out_) {}
48  Tout get_sum() const { return sum; }
49 
50  template <class Tag>
51  void operator()(const tbb::blocked_range<size_t>& r, Tag) {
52  Tout temp = sum;
53  for (size_t i = r.begin(); i < r.end(); ++i) {
54  temp = temp + in[i];
55  if (Tag::is_final_scan()) out[i] = temp;
56  }
57  sum = temp;
58  }
59  ScanSumBody(ScanSumBody& b, tbb::split) : sum(0), in(b.in), out(b.out) {}
60  void reverse_join(ScanSumBody& a) { sum = a.sum + sum; }
61  void assign(ScanSumBody& b) { sum = b.sum; }
62 };
63 } // namespace
64 
65 template <class Tin, class Tout>
66 void InclusivePrefixSum(const Tin* first, const Tin* last, Tout* out) {
67 #if TBB_INTERFACE_VERSION >= 20000
68  // use parallelstl if we have TBB 2018 or later
69  std::inclusive_scan(pstl::execution::par_unseq, first, last, out);
70 #else
71  ScanSumBody<Tin, Tout> body(out, first);
72  size_t n = std::distance(first, last);
73  tbb::parallel_scan(tbb::blocked_range<size_t>(0, n), body);
74 #endif
75 }
76 
77 } // namespace utility
78 } // namespace cloudViewer
void InclusivePrefixSum(const Tin *first, const Tin *last, Tout *out)
Definition: ParallelScan.h:66
Generic file read and write utility for python interface.