ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
IoU.cu
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include "ml/contrib/IoU.h"
9 #include "ml/contrib/IoUImpl.h"
10 
11 namespace cloudViewer {
12 namespace ml {
13 namespace contrib {
14 
15 static constexpr int block_size = 128;
16 static constexpr int thread_size = 4;
17 
18 __global__ void IoUBevElementKernel(const float *boxes_a,
19  const float *boxes_b,
20  float *iou,
21  int num_a,
22  int num_b) {
23  // Using the "grid-stride loops" pattern.
24  int n = num_a * num_b;
25  int items_per_block = block_size * thread_size;
26  int idx = blockIdx.x * items_per_block + threadIdx.x;
27 #pragma unroll
28  for (int i = 0; i < thread_size; i++) {
29  if (idx < n) {
30  int idx_a = idx / num_b;
31  int idx_b = idx % num_b;
32 
33  const float *box_a = boxes_a + idx_a * 5;
34  const float *box_b = boxes_b + idx_b * 5;
35  float *out = iou + idx_a * num_b + idx_b;
36  *out = IoUBev2DWithCenterAndSize(box_a, box_b);
37 
38  idx += block_size;
39  }
40  }
41 }
42 
43 void IoUBevCUDAKernel(const float *boxes_a,
44  const float *boxes_b,
45  float *iou,
46  int num_a,
47  int num_b) {
48  int n = num_a * num_b;
49  if (n == 0) {
50  return;
51  }
52  int items_per_block = block_size * thread_size;
53  int grid_size = (n + items_per_block - 1) / items_per_block;
54  IoUBevElementKernel<<<grid_size, block_size>>>(boxes_a, boxes_b, iou, num_a,
55  num_b);
56 }
57 
58 __global__ void IoU3dElementKernel(const float *boxes_a,
59  const float *boxes_b,
60  float *iou,
61  int num_a,
62  int num_b) {
63  // Using the "grid-stride loops" pattern.
64  int n = num_a * num_b;
65  int items_per_block = block_size * thread_size;
66  int idx = blockIdx.x * items_per_block + threadIdx.x;
67 #pragma unroll
68  for (int i = 0; i < thread_size; i++) {
69  if (idx < n) {
70  int idx_a = idx / num_b;
71  int idx_b = idx % num_b;
72 
73  const float *box_a = boxes_a + idx_a * 7;
74  const float *box_b = boxes_b + idx_b * 7;
75  float *out = iou + idx_a * num_b + idx_b;
76  *out = IoU3DWithCenterAndSize(box_a, box_b);
77 
78  idx += block_size;
79  }
80  }
81 }
82 
83 void IoU3dCUDAKernel(const float *boxes_a,
84  const float *boxes_b,
85  float *iou,
86  int num_a,
87  int num_b) {
88  int n = num_a * num_b;
89  if (n == 0) {
90  return;
91  }
92  int items_per_block = block_size * thread_size;
93  int grid_size = (n + items_per_block - 1) / items_per_block;
94  IoU3dElementKernel<<<grid_size, block_size>>>(boxes_a, boxes_b, iou, num_a,
95  num_b);
96 }
97 
98 } // namespace contrib
99 } // namespace ml
100 } // namespace cloudViewer