ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
RaggedToDense.cuh
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <Helper.h>
11 
12 #include <cub/cub.cuh>
13 
14 namespace cloudViewer {
15 namespace ml {
16 namespace impl {
17 
18 namespace {
19 
20 /// Kernel for RaggedToDenseCUDA
21 template <class T>
22 __global__ void RaggedToDenseCUDAKernel(
23  const T* const __restrict__ values,
24  const int64_t* const __restrict__ row_splits,
25  const size_t row_splits_size,
26  const size_t out_col_size,
27  const T* const __restrict__ default_value,
28  const size_t default_value_size,
29  T* __restrict__ out_values) {
30  const int i = blockDim.x * blockIdx.x + threadIdx.x;
31  if (i + 1 >= row_splits_size) return;
32 
33  const int64_t start = row_splits[i];
34  const int64_t end = min(int64_t(out_col_size) + start, row_splits[i + 1]);
35 
36  T* out_ptr = out_values + i * out_col_size * default_value_size;
37 
38  for (int64_t inp_idx = start * default_value_size;
39  inp_idx < end * default_value_size; ++inp_idx, ++out_ptr) {
40  *out_ptr = values[inp_idx];
41  }
42 
43  // fill remaining columns with the default value
44  out_ptr = out_values + i * out_col_size * default_value_size;
45  out_ptr = out_ptr + (end - start) * default_value_size;
46  for (int64_t j = end - start; j < out_col_size;
47  ++j, out_ptr += default_value_size) {
48  for (int64_t k = 0; k < default_value_size; ++k) {
49  out_ptr[k] = default_value[k];
50  }
51  }
52 }
53 } // namespace
54 
55 /// Creates a dense tensor from a ragged tensor.
56 /// All pointer arguments point to device memory unless stated otherwise.
57 ///
58 /// Example where each value has size 2:
59 /// values = [[0,0],[1,1],[2,2],[3,3],[4,4]]
60 /// row_splits = [0,2,5]
61 /// out_col_size=3
62 /// default_value=[-1,-1]
63 /// default_value_size = 2
64 ///
65 /// will return
66 ///
67 /// out_values = [[[0,0],[1,1],[-1,-1]], [[2,2],[3,3],[4,4]]]
68 ///
69 ///
70 /// \param values Linear memory with all values.
71 ///
72 /// \param row_splits Defines the start and end of each entry in the ragged
73 /// tensor. This is an exclusive prefix sum with 0 as the first element
74 /// and the length of all values as the last element.
75 ///
76 /// \param row_splits_size The length of the row_splits vector.
77 ///
78 /// \param out_col_size The output column size. This is the second dim of
79 /// the dense output tensor.
80 ///
81 /// \param default_value The default value to use if there are not enough
82 /// values for filling the row.
83 ///
84 /// \param default_value_size The size of the default value.
85 ///
86 /// \param out_values This is the output array. The size of the array must
87 /// be [row_splits_size-1, out_col_size, default_value_size].
88 ///
89 template <class T>
90 void RaggedToDenseCUDA(const cudaStream_t& stream,
91  const T* const values,
92  const int64_t* const row_splits,
93  const size_t row_splits_size,
94  const size_t out_col_size,
95  const T* const default_value,
96  const size_t default_value_size,
97  T* out_values) {
98  using namespace cloudViewer::utility;
99  const int BLOCKSIZE = 128;
100  dim3 block(BLOCKSIZE, 1, 1);
101  dim3 grid(DivUp(row_splits_size - 1, block.x));
102 
103  if (grid.x) {
104  RaggedToDenseCUDAKernel<T><<<grid, block, 0, stream>>>(
105  values, row_splits, row_splits_size, out_col_size,
106  default_value, default_value_size, out_values);
107  }
108 }
109 
110 } // namespace impl
111 } // namespace ml
112 } // namespace cloudViewer