1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
8 #include "ml/contrib/IoU.h"
9 #include "ml/contrib/IoUImpl.h"
11 namespace cloudViewer {
15 static constexpr int block_size = 128;
16 static constexpr int thread_size = 4;
18 __global__ void IoUBevElementKernel(const float *boxes_a,
23 // Using the "grid-stride loops" pattern.
24 int n = num_a * num_b;
25 int items_per_block = block_size * thread_size;
26 int idx = blockIdx.x * items_per_block + threadIdx.x;
28 for (int i = 0; i < thread_size; i++) {
30 int idx_a = idx / num_b;
31 int idx_b = idx % num_b;
33 const float *box_a = boxes_a + idx_a * 5;
34 const float *box_b = boxes_b + idx_b * 5;
35 float *out = iou + idx_a * num_b + idx_b;
36 *out = IoUBev2DWithCenterAndSize(box_a, box_b);
43 void IoUBevCUDAKernel(const float *boxes_a,
48 int n = num_a * num_b;
52 int items_per_block = block_size * thread_size;
53 int grid_size = (n + items_per_block - 1) / items_per_block;
54 IoUBevElementKernel<<<grid_size, block_size>>>(boxes_a, boxes_b, iou, num_a,
58 __global__ void IoU3dElementKernel(const float *boxes_a,
63 // Using the "grid-stride loops" pattern.
64 int n = num_a * num_b;
65 int items_per_block = block_size * thread_size;
66 int idx = blockIdx.x * items_per_block + threadIdx.x;
68 for (int i = 0; i < thread_size; i++) {
70 int idx_a = idx / num_b;
71 int idx_b = idx % num_b;
73 const float *box_a = boxes_a + idx_a * 7;
74 const float *box_b = boxes_b + idx_b * 7;
75 float *out = iou + idx_a * num_b + idx_b;
76 *out = IoU3DWithCenterAndSize(box_a, box_b);
83 void IoU3dCUDAKernel(const float *boxes_a,
88 int n = num_a * num_b;
92 int items_per_block = block_size * thread_size;
93 int grid_size = (n + items_per_block - 1) / items_per_block;
94 IoU3dElementKernel<<<grid_size, block_size>>>(boxes_a, boxes_b, iou, num_a,
98 } // namespace contrib
100 } // namespace cloudViewer