ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
ContinuousConvTransposeBackpropFilter.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <tbb/parallel_for.h>
11 
12 #include <mutex>
13 
15 
16 namespace cloudViewer {
17 namespace ml {
18 namespace impl {
19 
22 template <class TFeat,
23  class TOut,
24  class TReal,
25  class TIndex,
26  InterpolationMode INTERPOLATION,
27  CoordinateMapping MAPPING,
28  bool ALIGN_CORNERS,
29  bool INDIVIDUAL_EXTENT,
30  bool ISOTROPIC_EXTENT,
31  bool NORMALIZE>
32 void _CConvTransposeBackpropFilterCPU(TOut* filter_backprop,
33  const std::vector<int>& filter_dims,
34  size_t num_out,
35  const TReal* out_positions,
36  const TFeat* out_importance,
37  size_t num_inp,
38  const TReal* inp_positions,
39  const TFeat* inp_features,
40  const TFeat* inp_neighbors_importance_sum,
41  const int64_t* inp_neighbors_row_splits,
42  size_t neighbors_index_size,
43  const TIndex* neighbors_index,
44  const TFeat* neighbors_importance,
45  const int64_t* neighbors_row_splits,
46  const TReal* extents,
47  const TReal* offsets,
48  const TFeat* out_features_gradient) {
49  const bool NEIGHBOR_IMPORTANCE = neighbors_importance;
50  // const int VECSIZE = 32;
51 #define VECSIZE 32
52  typedef Eigen::Array<TReal, VECSIZE, 1> Vec_t;
53  typedef InterpolationVec<TReal, VECSIZE, INTERPOLATION> InterpolationVec_t;
54  InterpolationVec_t interpolation;
55 
56  const int in_channels = filter_dims[filter_dims.size() - 2];
57  const int out_channels = filter_dims[filter_dims.size() - 1];
58 
59  int spatial_filter_size = 1;
60  for (int i = 0; i < 3; ++i) spatial_filter_size *= filter_dims[i];
61  Eigen::Array<int, 3, 1> filter_size_xyz(filter_dims[2], filter_dims[1],
62  filter_dims[0]);
63 
64  memset(filter_backprop, 0,
65  sizeof(TOut) * spatial_filter_size * in_channels * out_channels);
66  std::mutex filter_backprop_mutex;
67 
68  typedef Eigen::Array<TFeat, VECSIZE, Eigen::Dynamic> Matrix;
69  typedef Eigen::Array<TReal, VECSIZE, 3> Matrix3C;
70 
71  tbb::parallel_for(
72  tbb::blocked_range<size_t>(0, num_out, 32),
73  [&](const tbb::blocked_range<size_t>& r) {
74  int range_length = r.end() - r.begin();
75 
76  Eigen::Matrix<TFeat, Eigen::Dynamic, Eigen::Dynamic> B(
77  in_channels * spatial_filter_size, range_length);
78  B.setZero();
79  Eigen::Matrix<TFeat, Eigen::Dynamic, Eigen::Dynamic> C(
80  out_channels, range_length);
81 
82  Matrix infeat(VECSIZE, in_channels);
83 
84  Eigen::Array<TReal, 3, 1> offsets_(offsets[0], offsets[1],
85  offsets[2]);
86 
87  Matrix3C inv_extents;
88  if (INDIVIDUAL_EXTENT == false) {
89  if (ISOTROPIC_EXTENT) {
90  inv_extents = 1 / extents[0];
91  } else {
92  inv_extents.col(0) = 1 / extents[0];
93  inv_extents.col(1) = 1 / extents[1];
94  inv_extents.col(2) = 1 / extents[2];
95  }
96  }
97 
98  for (size_t out_idx = r.begin(); out_idx != r.end();
99  ++out_idx) {
100  const int out_col = out_idx - r.begin();
101  const size_t neighbor_start = neighbors_row_splits[out_idx];
102  const size_t neighbor_end =
103  neighbors_row_splits[out_idx + 1];
104 
105  C.col(out_col) = Eigen::Map<
106  const Eigen::Array<TFeat, Eigen::Dynamic, 1>>(
107  out_features_gradient + out_idx * out_channels,
108  out_channels, 1);
109 
110  typename InterpolationVec_t::Weight_t interp_weights;
111  typename InterpolationVec_t::Idx_t interp_indices;
112 
113  int vec_valid_count = 0;
114  Vec_t x, y, z;
115 
116  // set to zero to avoid problems with vectors with less than
117  // VECSIZE valid entries
118  x.setZero();
119  y.setZero();
120  z.setZero();
121  for (size_t n = neighbor_start; n < neighbor_end; ++n) {
122  const size_t inp_idx = neighbors_index[n];
123 
124  const int i = vec_valid_count;
125  x(i) = out_positions[out_idx * 3 + 0] -
126  inp_positions[inp_idx * 3 + 0];
127  y(i) = out_positions[out_idx * 3 + 1] -
128  inp_positions[inp_idx * 3 + 1];
129  z(i) = out_positions[out_idx * 3 + 2] -
130  inp_positions[inp_idx * 3 + 2];
131 
132  if (INDIVIDUAL_EXTENT) {
133  if (ISOTROPIC_EXTENT) {
134  inv_extents.row(i) = 1 / extents[inp_idx];
135  } else {
136  inv_extents(i, 0) =
137  1 / extents[3 * inp_idx + 0];
138  inv_extents(i, 1) =
139  1 / extents[3 * inp_idx + 1];
140  inv_extents(i, 2) =
141  1 / extents[3 * inp_idx + 2];
142  }
143  }
144 
145  TFeat n_importance = NEIGHBOR_IMPORTANCE
146  ? neighbors_importance[n]
147  : TFeat(1);
148  for (int ic = 0; ic < in_channels; ++ic)
149  infeat(i, ic) =
150  inp_features[inp_idx * in_channels + ic] *
151  n_importance;
152 
153  if (NORMALIZE) {
154  TFeat normalizer(1);
155  if (NEIGHBOR_IMPORTANCE) {
156  if (inp_neighbors_importance_sum[inp_idx] !=
157  TFeat(0))
158  normalizer /= inp_neighbors_importance_sum
159  [inp_idx];
160  } else {
161  size_t num_inp_neighbors;
162  const size_t inp_neighbor_start =
163  inp_neighbors_row_splits[inp_idx];
164  const size_t inp_neighbor_end =
165  inp_neighbors_row_splits[inp_idx + 1];
166  num_inp_neighbors =
167  inp_neighbor_end - inp_neighbor_start;
168  if (num_inp_neighbors > 0)
169  normalizer /= TFeat(num_inp_neighbors);
170  }
171  for (int ic = 0; ic < in_channels; ++ic)
172  infeat(i, ic) *= normalizer;
173  }
174 
175  ++vec_valid_count;
176  if (vec_valid_count == VECSIZE ||
177  n + 1 == neighbor_end) {
178  ComputeFilterCoordinates<ALIGN_CORNERS, MAPPING,
179  TReal, VECSIZE>(
180  x, y, z, filter_size_xyz, inv_extents,
181  offsets_);
182  interpolation.Interpolate(
183  interp_weights, interp_indices, x, y, z,
184  filter_size_xyz, in_channels);
185  for (int k = 0; k < vec_valid_count; ++k) {
186  for (int j = 0; j < InterpolationVec_t::Size();
187  ++j) {
188  for (int ic = 0; ic < in_channels; ++ic)
189  B(interp_indices(j, k) + ic, out_col) +=
190  TFeat(interp_weights(j, k)) *
191  infeat(k, ic);
192  }
193  }
194  vec_valid_count = 0;
195  }
196  }
197 
198  } // out_idx
199 
200  if (out_importance) {
201  for (size_t out_idx = r.begin(); out_idx != r.end();
202  ++out_idx) {
203  const int out_col = out_idx - r.begin();
204  C.col(out_col) *= out_importance[out_idx];
205  }
206  }
207 
208  Eigen::Matrix<TOut, Eigen::Dynamic, Eigen::Dynamic> A(
209  out_channels, spatial_filter_size * in_channels);
210 
211  A = (C * B.transpose()).template cast<TOut>();
212 
213  {
214  std::lock_guard<std::mutex> lock(filter_backprop_mutex);
215  int linear_i = 0;
216  for (int j = 0; j < spatial_filter_size * in_channels; ++j)
217  for (int i = 0; i < out_channels; ++i, ++linear_i) {
218  filter_backprop[linear_i] += A(i, j);
219  }
220  }
221  });
222 #undef VECSIZE
223 }
224 
307 template <class TFeat, class TOut, class TReal, class TIndex>
308 void CConvTransposeBackpropFilterCPU(TOut* filter_backprop,
309  const std::vector<int>& filter_dims,
310  size_t num_out,
311  const TReal* out_positions,
312  const TFeat* out_importance,
313  size_t num_inp,
314  const TReal* inp_positions,
315  const TFeat* inp_features,
316  const TFeat* inp_neighbors_importance_sum,
317  const int64_t* inp_neighbors_row_splits,
318  size_t neighbors_index_size,
319  const TIndex* neighbors_index,
320  const TFeat* neighbors_importance,
321  const int64_t* neighbors_row_splits,
322  const TReal* extents,
323  const TReal* offsets,
324  const TFeat* out_features_gradient,
325  InterpolationMode interpolation,
326  CoordinateMapping coordinate_mapping,
327  bool align_corners,
328  bool individual_extent,
329  bool isotropic_extent,
330  bool normalize) {
331 #define FN_PARAMETERS \
332  filter_backprop, filter_dims, num_out, out_positions, out_importance, \
333  num_inp, inp_positions, inp_features, \
334  inp_neighbors_importance_sum, inp_neighbors_row_splits, \
335  neighbors_index_size, neighbors_index, neighbors_importance, \
336  neighbors_row_splits, extents, offsets, out_features_gradient
337 
338 #define CALL_TEMPLATE(INTERPOLATION, MAPPING, ALIGN_CORNERS, \
339  INDIVIDUAL_EXTENT, ISOTROPIC_EXTENT, NORMALIZE) \
340  if (INTERPOLATION == interpolation && MAPPING == coordinate_mapping && \
341  ALIGN_CORNERS == align_corners && \
342  INDIVIDUAL_EXTENT == individual_extent && \
343  ISOTROPIC_EXTENT == isotropic_extent && NORMALIZE == normalize) \
344  _CConvTransposeBackpropFilterCPU<TFeat, TOut, TReal, TIndex, \
345  INTERPOLATION, MAPPING, \
346  ALIGN_CORNERS, INDIVIDUAL_EXTENT, \
347  ISOTROPIC_EXTENT, NORMALIZE>( \
348  FN_PARAMETERS);
349 
350 #define CALL_TEMPLATE2(INTERPOLATION, MAPPING) \
351  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, true, true, true) \
352  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, true, true, false) \
353  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, true, false, true) \
354  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, true, false, false) \
355  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, false, true, true) \
356  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, false, true, false) \
357  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, false, false, true) \
358  CALL_TEMPLATE(INTERPOLATION, MAPPING, true, false, false, false) \
359  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, true, true, true) \
360  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, true, true, false) \
361  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, true, false, true) \
362  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, true, false, false) \
363  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, false, true, true) \
364  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, false, true, false) \
365  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, false, false, true) \
366  CALL_TEMPLATE(INTERPOLATION, MAPPING, false, false, false, false)
367 
368 #define CALL_TEMPLATE3(INTERPOLATION) \
369  CALL_TEMPLATE2(INTERPOLATION, CoordinateMapping::BALL_TO_CUBE_RADIAL) \
370  CALL_TEMPLATE2(INTERPOLATION, \
371  CoordinateMapping::BALL_TO_CUBE_VOLUME_PRESERVING) \
372  CALL_TEMPLATE2(INTERPOLATION, CoordinateMapping::IDENTITY)
373 
374 #define CALL_TEMPLATE4 \
375  CALL_TEMPLATE3(InterpolationMode::LINEAR) \
376  CALL_TEMPLATE3(InterpolationMode::LINEAR_BORDER) \
377  CALL_TEMPLATE3(InterpolationMode::NEAREST_NEIGHBOR)
378 
380 
381 #undef CALL_TEMPLATE
382 #undef CALL_TEMPLATE2
383 #undef CALL_TEMPLATE3
384 #undef CALL_TEMPLATE4
385 
386 #undef FN_PARAMETERS
387 }
388 
389 } // namespace impl
390 } // namespace ml
391 } // namespace cloudViewer
#define CALL_TEMPLATE4
#define VECSIZE
__host__ __device__ float2 normalize(float2 v)
Definition: cutil_math.h:1179
void _CConvTransposeBackpropFilterCPU(TOut *filter_backprop, const std::vector< int > &filter_dims, size_t num_out, const TReal *out_positions, const TFeat *out_importance, size_t num_inp, const TReal *inp_positions, const TFeat *inp_features, const TFeat *inp_neighbors_importance_sum, const int64_t *inp_neighbors_row_splits, size_t neighbors_index_size, const TIndex *neighbors_index, const TFeat *neighbors_importance, const int64_t *neighbors_row_splits, const TReal *extents, const TReal *offsets, const TFeat *out_features_gradient)
void CConvTransposeBackpropFilterCPU(TOut *filter_backprop, const std::vector< int > &filter_dims, size_t num_out, const TReal *out_positions, const TFeat *out_importance, size_t num_inp, const TReal *inp_positions, const TFeat *inp_features, const TFeat *inp_neighbors_importance_sum, const int64_t *inp_neighbors_row_splits, size_t neighbors_index_size, const TIndex *neighbors_index, const TFeat *neighbors_importance, const int64_t *neighbors_row_splits, const TReal *extents, const TReal *offsets, const TFeat *out_features_gradient, InterpolationMode interpolation, CoordinateMapping coordinate_mapping, bool align_corners, bool individual_extent, bool isotropic_extent, bool normalize)
void ComputeFilterCoordinates(Eigen::Array< T, VECSIZE, 1 > &x, Eigen::Array< T, VECSIZE, 1 > &y, Eigen::Array< T, VECSIZE, 1 > &z, const Eigen::Array< int, 3, 1 > &filter_size, const Eigen::Array< T, VECSIZE, 3 > &inv_extents, const Eigen::Array< T, 3, 1 > &offset)
Generic file read and write utility for python interface.
Class for computing interpolation weights.