ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
LU.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
14 
15 namespace cloudViewer {
16 namespace core {
17 
18 // Get column permutation tensor from ipiv (swapping index array).
19 static Tensor GetColPermutation(const Tensor& ipiv,
20  int number_of_indices,
21  int number_of_rows) {
22  Tensor full_ipiv =
23  Tensor::Arange(0, number_of_rows, 1, core::Int32, Device("CPU:0"));
24  Tensor ipiv_cpu = ipiv.To(Device("CPU:0"), core::Int32, /*copy=*/false);
25  const int* ipiv_ptr = static_cast<const int*>(ipiv_cpu.GetDataPtr());
26  int* full_ipiv_ptr = static_cast<int*>(full_ipiv.GetDataPtr());
27  for (int i = 0; i < number_of_indices; i++) {
28  int temp = full_ipiv_ptr[i];
29  full_ipiv_ptr[i] = full_ipiv_ptr[ipiv_ptr[i] - 1];
30  full_ipiv_ptr[ipiv_ptr[i] - 1] = temp;
31  }
32  // This is column permutation for P, where P.A = L.U.
33  // Int64 is required by AdvancedIndexing.
34  return full_ipiv.To(ipiv.GetDevice(), core::Int64, /*copy=*/false);
35 }
36 
37 // Decompose output in P, L, U matrix form.
38 static void OutputToPLU(const Tensor& output,
39  Tensor& permutation,
40  Tensor& lower,
41  Tensor& upper,
42  const Tensor& ipiv,
43  const bool permute_l) {
44  int n = output.GetShape()[0];
45  Device device = output.GetDevice();
46 
47  // Get upper and lower matrix from output matrix.
48  Triul(output, upper, lower, 0);
49  // Get column permutation vector from pivot indices vector.
50  Tensor col_permutation = GetColPermutation(ipiv, ipiv.GetShape()[0], n);
51  // Creating "Permutation Matrix (P in P.A = L.U)".
52  permutation = Tensor::Eye(n, output.GetDtype(), device)
53  .IndexGet({col_permutation});
54  // Calculating P in A = P.L.U. [P.Inverse() = P.T()].
55  permutation = permutation.T().Contiguous();
56  // Permute_l option, to return L as L = P.L.
57  if (permute_l) {
58  lower = permutation.Matmul(lower);
59  }
60 }
61 
62 void LUIpiv(const Tensor& A, Tensor& ipiv, Tensor& output) {
64 
65  const Device device = A.GetDevice();
66  const Dtype dtype = A.GetDtype();
67 
68  // Check dimensions.
69  const SizeVector A_shape = A.GetShape();
70  if (A_shape.size() != 2) {
71  utility::LogError("Tensor must be 2D, but got {}D.", A_shape.size());
72  }
73 
74  const int64_t rows = A_shape[0];
75  const int64_t cols = A_shape[1];
76  if (rows == 0 || cols == 0) {
78  "Tensor shapes should not contain dimensions with zero.");
79  }
80 
81  // "output" tensor is modified in-place as output.
82  // Operations are COL_MAJOR.
83  output = A.T().Clone();
84  void* A_data = output.GetDataPtr();
85 
86  // Returns LU decomposition in form of an output matrix,
87  // with lower triangular elements as L, upper triangular and diagonal
88  // elements as U, (diagonal elements of L are unity), and ipiv array,
89  // which has the pivot indices (for 1 <= i <= min(M,N), row i of the
90  // matrix was interchanged with row IPIV(i).
91  int64_t ipiv_len = std::min(rows, cols);
92  if (device.IsCUDA()) {
93 #ifdef BUILD_CUDA_MODULE
94  CUDAScopedDevice scoped_device(device);
95  ipiv = Tensor::Empty({ipiv_len}, core::Int32, device);
96  void* ipiv_data = ipiv.GetDataPtr();
97  LUCUDA(A_data, ipiv_data, rows, cols, dtype, device);
98 #else
99  utility::LogInfo("Unimplemented device.");
100 #endif
101  } else if (device.IsSYCL()) {
102 #ifdef BUILD_SYCL_MODULE
103  ipiv = Tensor::Empty({ipiv_len}, core::Int64, device);
104  void* ipiv_data = ipiv.GetDataPtr();
105  LUSYCL(A_data, ipiv_data, rows, cols, dtype, device);
106 #else
107  utility::LogInfo("Unimplemented device.");
108 #endif
109  } else {
110  Dtype ipiv_dtype;
111  if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 4) {
112  ipiv_dtype = core::Int32;
113  } else if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 8) {
114  ipiv_dtype = core::Int64;
115  } else {
116  utility::LogError("Unsupported CLOUDVIEWER_CPU_LINALG_INT type.");
117  }
118  ipiv = Tensor::Empty({ipiv_len}, ipiv_dtype, device);
119  void* ipiv_data = ipiv.GetDataPtr();
120  LUCPU(A_data, ipiv_data, rows, cols, dtype, device);
121  }
122  // COL_MAJOR -> ROW_MAJOR.
123  output = output.T().Contiguous();
124 }
125 
126 void LU(const Tensor& A,
127  Tensor& permutation,
128  Tensor& lower,
129  Tensor& upper,
130  const bool permute_l) {
132 
133  // Get output matrix and ipiv.
134  Tensor ipiv, output;
135  LUIpiv(A, ipiv, output);
136 
137  // Decompose output in P, L, U matrix form.
138  OutputToPLU(output, permutation, lower, upper, ipiv, permute_l);
139 
140  // For non-square input case of shape {rows, cols}, shape of P, L, U:
141  // P {rows, rows}; L {rows, min(rows, cols)}; U {min(rows, cols), cols}.
142  if (A.GetShape()[0] != A.GetShape()[1]) {
143  int64_t min_ = std::min(A.GetShape()[0], A.GetShape()[1]);
144  lower = lower.Slice(1, 0, min_);
145  upper = upper.Slice(0, 0, min_);
146  }
147 }
148 
149 } // namespace core
150 } // namespace cloudViewer
Common CUDA utilities.
#define CLOUDVIEWER_CPU_LINALG_INT
#define AssertTensorDtypes(tensor,...)
Definition: TensorCheck.h:33
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214
Tensor Contiguous() const
Definition: Tensor.cpp:772
Tensor Matmul(const Tensor &rhs) const
Definition: Tensor.cpp:1919
static Tensor Arange(const Scalar start, const Scalar stop, const Scalar step=1, const Dtype dtype=core::Int64, const Device &device=core::Device("CPU:0"))
Create a 1D tensor with evenly spaced values in the given interval.
Definition: Tensor.cpp:436
Dtype GetDtype() const
Definition: Tensor.h:1164
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
Definition: Tensor.cpp:418
Tensor IndexGet(const std::vector< Tensor > &index_tensors) const
Advanced indexing getter. This will always allocate a new Tensor.
Definition: Tensor.cpp:905
Device GetDevice() const override
Definition: Tensor.cpp:1435
Tensor Clone() const
Copy Tensor to the same device.
Definition: Tensor.h:502
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
Definition: Tensor.cpp:400
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Definition: Tensor.cpp:1079
Tensor Slice(int64_t dim, int64_t start, int64_t stop, int64_t step=1) const
Definition: Tensor.cpp:857
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739
#define LogInfo(...)
Definition: Logging.h:81
#define LogError(...)
Definition: Logging.h:60
int min(int a, int b)
Definition: cutil_math.h:53
static void OutputToPLU(const Tensor &output, Tensor &permutation, Tensor &lower, Tensor &upper, const Tensor &ipiv, const bool permute_l)
Definition: LU.cpp:38
static Tensor GetColPermutation(const Tensor &ipiv, int number_of_indices, int number_of_rows)
Definition: LU.cpp:19
void LUSYCL(void *A_data, void *ipiv_data, int64_t m, int64_t n, Dtype dtype, const Device &device)
Definition: LUSYCL.cpp:19
void LUCUDA(void *A_data, void *ipiv_data, int64_t rows, int64_t cols, Dtype dtype, const Device &device)
Definition: LUCUDA.cpp:15
void LUIpiv(const Tensor &A, Tensor &ipiv, Tensor &output)
Definition: LU.cpp:62
const Dtype Int64
Definition: Dtype.cpp:47
void Triul(const Tensor &A, Tensor &upper, Tensor &lower, const int diagonal)
Definition: Tri.cpp:79
void LU(const Tensor &A, Tensor &permutation, Tensor &lower, Tensor &upper, const bool permute_l)
Definition: LU.cpp:126
const Dtype Float64
Definition: Dtype.cpp:43
void LUCPU(void *A_data, void *ipiv_data, int64_t rows, int64_t cols, Dtype dtype, const Device &device)
Definition: LUCPU.cpp:15
const Dtype Int32
Definition: Dtype.cpp:46
const Dtype Float32
Definition: Dtype.cpp:42
Generic file read and write utility for python interface.