ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
LeastSquares.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <unordered_map>
11 
13 
14 namespace cloudViewer {
15 namespace core {
16 
17 void LeastSquares(const Tensor &A, const Tensor &B, Tensor &X) {
21 
22  const Device device = A.GetDevice();
23  const Dtype dtype = A.GetDtype();
24 
25  // Check dimensions
26  SizeVector A_shape = A.GetShape();
27  SizeVector B_shape = B.GetShape();
28  if (A_shape.size() != 2) {
29  utility::LogError("Tensor A must be 2D, but got {}D", A_shape.size());
30  }
31  if (B_shape.size() != 1 && B_shape.size() != 2) {
33  "Tensor B must be 1D (vector) or 2D (matrix), but got {}D.",
34  B_shape.size());
35  }
36  if (B_shape[0] != A_shape[0]) {
37  utility::LogError("Tensor A and B's first dimension mismatch.");
38  }
39 
40  int64_t m = A_shape[0];
41  int64_t n = A_shape[1];
42  int64_t k = B_shape.size() == 2 ? B_shape[1] : 1;
43  if (m == 0 || n == 0 || k == 0) {
45  "Tensor shapes should not contain dimensions with zero.");
46  }
47 
48  if (m < n) {
49  utility::LogError("Tensor A shape must satisfy rows({}) > cols({}).", m,
50  n);
51  }
52 
53  // A and B are modified in-place
54  Tensor A_copy = A.T().Clone();
55  Tensor B_copy = B.T().Clone();
56 
57  void *A_data = A_copy.GetDataPtr();
58  void *B_data = B_copy.GetDataPtr();
59 
60  if (device.IsCUDA()) {
61 #ifdef BUILD_CUDA_MODULE
62  CUDAScopedDevice scoped_device(device);
63  LeastSquaresCUDA(A_data, B_data, m, n, k, dtype, device);
64 #else
65  utility::LogError("Unimplemented device.");
66 #endif
67  } else if (device.IsSYCL()) {
68 #ifdef BUILD_SYCL_MODULE
69  LeastSquaresSYCL(A_data, B_data, m, n, k, dtype, device);
70 #else
71  utility::LogError("Unimplemented device.");
72 #endif
73  } else {
74  LeastSquaresCPU(A_data, B_data, m, n, k, dtype, device);
75  }
76 
77  X = B_copy.T().Slice(0, 0, n);
78 }
79 } // namespace core
80 } // namespace cloudViewer
Common CUDA utilities.
void * X
Definition: SmallVector.cpp:45
#define AssertTensorDevice(tensor,...)
Definition: TensorCheck.h:45
#define AssertTensorDtype(tensor,...)
Definition: TensorCheck.h:21
#define AssertTensorDtypes(tensor,...)
Definition: TensorCheck.h:33
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214
bool IsCUDA() const
Returns true iff device type is CUDA.
Definition: Device.h:49
bool IsSYCL() const
Returns true iff device type is SYCL GPU.
Definition: Device.h:52
Dtype GetDtype() const
Definition: Tensor.h:1164
Device GetDevice() const override
Definition: Tensor.cpp:1435
Tensor Clone() const
Copy Tensor to the same device.
Definition: Tensor.h:502
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Definition: Tensor.cpp:1079
Tensor Slice(int64_t dim, int64_t start, int64_t stop, int64_t step=1) const
Definition: Tensor.cpp:857
#define LogError(...)
Definition: Logging.h:60
void LeastSquaresCUDA(void *A_data, void *B_data, int64_t m, int64_t n, int64_t k, Dtype dtype, const Device &device)
void LeastSquares(const Tensor &A, const Tensor &B, Tensor &X)
Solve AX = B with QR decomposition. A is a full-rank m x n matrix (m >= n).
const Dtype Float64
Definition: Dtype.cpp:43
void LeastSquaresSYCL(void *A_data, void *B_data, int64_t m, int64_t n, int64_t k, Dtype dtype, const Device &device)
void LeastSquaresCPU(void *A_data, void *B_data, int64_t m, int64_t n, int64_t k, Dtype dtype, const Device &device)
const Dtype Float32
Definition: Dtype.cpp:42
Generic file read and write utility for python interface.