ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
ParallelFor.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <benchmark/benchmark.h>
11 
12 #include <numeric>
13 #include <vector>
14 
15 namespace cloudViewer {
16 namespace core {
17 
18 void ParallelForScalar(benchmark::State& state, int size) {
19  std::vector<float> input(size);
20  std::vector<float> output(size);
21  std::iota(input.begin(), input.end(), 0.0f);
22 
23  // Warmup.
24  {
25  core::ParallelFor(core::Device("CPU:0"), size, [&](int64_t idx) {
26  float x = input[idx];
27  float x2 = x * x;
28  output[idx] = x2;
29  });
30  }
31 
32  for (auto _ : state) {
33  core::ParallelFor(core::Device("CPU:0"), size, [&](int64_t idx) {
34  float x = input[idx];
35  float x2 = x * x;
36  output[idx] = x2;
37  });
38  }
39 }
40 
41 void ParallelForVectorized(benchmark::State& state, int size) {
42  std::vector<float> input(size);
43  std::vector<float> output(size);
44  std::iota(input.begin(), input.end(), 0.0f);
45 
46  // Warmup.
47  {
49  core::Device("CPU:0"), size,
50  [&](int64_t idx) {
51  float x = input[idx];
52  float x2 = x * x;
53  output[idx] = x2;
54  },
55  CLOUDVIEWER_VECTORIZED(SquareKernel, input.data(),
56  output.data()));
57  }
58 
59  for (auto _ : state) {
61  core::Device("CPU:0"), size,
62  [&](int64_t idx) {
63  float x = input[idx];
64  float x2 = x * x;
65  output[idx] = x2;
66  },
67  CLOUDVIEWER_VECTORIZED(SquareKernel, input.data(),
68  output.data()));
69  }
70 }
71 
72 #define ENUM_BM_SIZE(FN) \
73  BENCHMARK_CAPTURE(FN, CPU##100, 100)->Unit(benchmark::kMicrosecond); \
74  BENCHMARK_CAPTURE(FN, CPU##1000, 1000)->Unit(benchmark::kMicrosecond); \
75  BENCHMARK_CAPTURE(FN, CPU##10000, 10000)->Unit(benchmark::kMicrosecond); \
76  BENCHMARK_CAPTURE(FN, CPU##100000, 100000)->Unit(benchmark::kMicrosecond); \
77  BENCHMARK_CAPTURE(FN, CPU##1000000, 1000000) \
78  ->Unit(benchmark::kMicrosecond); \
79  BENCHMARK_CAPTURE(FN, CPU##10000000, 10000000) \
80  ->Unit(benchmark::kMicrosecond); \
81  BENCHMARK_CAPTURE(FN, CPU##100000000, 100000000) \
82  ->Unit(benchmark::kMicrosecond);
83 
86 
87 } // namespace core
88 } // namespace cloudViewer
int size
#define ENUM_BM_SIZE(FN)
Definition: ParallelFor.cpp:72
#define CLOUDVIEWER_VECTORIZED(ISPCKernel,...)
Definition: ParallelFor.h:228
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:111
void ParallelForScalar(benchmark::State &state, int size)
Definition: ParallelFor.cpp:18
void ParallelForVectorized(benchmark::State &state, int size)
Definition: ParallelFor.cpp:41
Generic file read and write utility for python interface.