ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
Inverse.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <unordered_map>
11 
14 
15 namespace cloudViewer {
16 namespace core {
17 
18 void Inverse(const Tensor &A, Tensor &output) {
20 
21  const Device device = A.GetDevice();
22  const Dtype dtype = A.GetDtype();
23 
24  // Check dimensions
25  SizeVector A_shape = A.GetShape();
26  if (A_shape.size() != 2) {
27  utility::LogError("Tensor must be 2D, but got {}D.", A_shape.size());
28  }
29  if (A_shape[0] != A_shape[1]) {
30  utility::LogError("Tensor must be square, but got {} x {}.", A_shape[0],
31  A_shape[1]);
32  }
33 
34  int64_t n = A_shape[0];
35  if (n == 0) {
37  "Tensor shapes should not contain dimensions with zero.");
38  }
39 
40  if (device.IsCUDA()) {
41 #ifdef BUILD_CUDA_MODULE
42  CUDAScopedDevice scoped_device(device);
43  Tensor ipiv = Tensor::Zeros({n}, core::Int32, device);
44  void *ipiv_data = ipiv.GetDataPtr();
45 
46  // cuSolver does not support getri, so we have to provide an identity
47  // matrix. This matrix is modified in-place as output.
48  Tensor A_T = A.T().Contiguous();
49  void *A_data = A_T.GetDataPtr();
50 
51  output = Tensor::Eye(n, dtype, device);
52  void *output_data = output.GetDataPtr();
53 
54  InverseCUDA(A_data, ipiv_data, output_data, n, dtype, device);
55  output = output.T();
56 #else
57  utility::LogError("Unimplemented device.");
58 #endif
59  } else if (device.IsSYCL()) {
60 #ifdef BUILD_SYCL_MODULE
61  Tensor ipiv = Tensor::Empty({n}, core::Int64, device);
62  void *ipiv_data = ipiv.GetDataPtr();
63 
64  // LAPACKE supports getri, A is in-place modified as output.
65  Tensor A_T = A.T().To(device, /*copy=*/true);
66  void *A_data = A_T.GetDataPtr();
67 
68  InverseSYCL(A_data, ipiv_data, nullptr, n, dtype, device);
69  output = A_T.T();
70 #else
71  utility::LogError("Unimplemented device.");
72 #endif
73  } else {
74  Dtype ipiv_dtype;
75  if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 4) {
76  ipiv_dtype = core::Int32;
77  } else if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 8) {
78  ipiv_dtype = core::Int64;
79  } else {
80  utility::LogError("Unsupported CLOUDVIEWER_CPU_LINALG_INT type.");
81  }
82  Tensor ipiv = Tensor::Empty({n}, ipiv_dtype, device);
83  void *ipiv_data = ipiv.GetDataPtr();
84 
85  // LAPACKE supports getri, A is in-place modified as output.
86  Tensor A_T = A.T().To(device, /*copy=*/true);
87  void *A_data = A_T.GetDataPtr();
88 
89  InverseCPU(A_data, ipiv_data, nullptr, n, dtype, device);
90  output = A_T.T();
91  }
92 }
93 } // namespace core
94 } // namespace cloudViewer
Common CUDA utilities.
#define CLOUDVIEWER_CPU_LINALG_INT
#define AssertTensorDtypes(tensor,...)
Definition: TensorCheck.h:33
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214
Tensor Contiguous() const
Definition: Tensor.cpp:772
Dtype GetDtype() const
Definition: Tensor.h:1164
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
Definition: Tensor.cpp:418
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
Definition: Tensor.cpp:406
Device GetDevice() const override
Definition: Tensor.cpp:1435
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
Definition: Tensor.cpp:400
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Definition: Tensor.cpp:1079
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739
#define LogError(...)
Definition: Logging.h:60
void InverseCPU(void *A_data, void *ipiv_data, [[maybe_unused]] void *output_data, int64_t n, Dtype dtype, const Device &device)
const Dtype Int64
Definition: Dtype.cpp:47
void InverseSYCL(void *A_data, void *ipiv_data, void *output_data, int64_t n, Dtype dtype, const Device &device)
Definition: InverseSYCL.cpp:19
void Inverse(const Tensor &A, Tensor &output)
Computes A^{-1} with LU factorization, where A is a N x N square matrix.
Definition: Inverse.cpp:18
const Dtype Float64
Definition: Dtype.cpp:43
const Dtype Int32
Definition: Dtype.cpp:46
void InverseCUDA(void *A_data, void *ipiv_data, void *output_data, int64_t n, Dtype dtype, const Device &device)
Definition: InverseCUDA.cpp:16
const Dtype Float32
Definition: Dtype.cpp:42
Generic file read and write utility for python interface.