cpp_api/api/LU_8cpp_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 #include "cloudViewer/core/linalg/LU.h"


 #include "cloudViewer/core/CUDAUtils.h"

 #include "cloudViewer/core/linalg/LUImpl.h"

 #include "cloudViewer/core/linalg/LinalgHeadersCPU.h"

 #include "cloudViewer/core/linalg/Tri.h"


 namespace cloudViewer {

 namespace core {


 // Get column permutation tensor from ipiv (swapping index array).

 static Tensor GetColPermutation(const Tensor& ipiv,

                                 int number_of_indices,

                                 int number_of_rows) {

     Tensor full_ipiv =

             Tensor::Arange(0, number_of_rows, 1, core::Int32, Device("CPU:0"));

     Tensor ipiv_cpu = ipiv.To(Device("CPU:0"), core::Int32, /*copy=*/false);

     const int* ipiv_ptr = static_cast<const int*>(ipiv_cpu.GetDataPtr());

     int* full_ipiv_ptr = static_cast<int*>(full_ipiv.GetDataPtr());

     for (int i = 0; i < number_of_indices; i++) {

         int temp = full_ipiv_ptr[i];

         full_ipiv_ptr[i] = full_ipiv_ptr[ipiv_ptr[i] - 1];

         full_ipiv_ptr[ipiv_ptr[i] - 1] = temp;

     }

     // This is column permutation for P, where P.A = L.U.

     // Int64 is required by AdvancedIndexing.

     return full_ipiv.To(ipiv.GetDevice(), core::Int64, /*copy=*/false);

 }


 // Decompose output in P, L, U matrix form.

 static void OutputToPLU(const Tensor& output,

                         Tensor& permutation,

                         Tensor& lower,

                         Tensor& upper,

                         const Tensor& ipiv,

                         const bool permute_l) {

     int n = output.GetShape()[0];

     Device device = output.GetDevice();


     // Get upper and lower matrix from output matrix.

     Triul(output, upper, lower, 0);

     // Get column permutation vector from pivot indices vector.

     Tensor col_permutation = GetColPermutation(ipiv, ipiv.GetShape()[0], n);

     // Creating "Permutation Matrix (P in P.A = L.U)".

     permutation = Tensor::Eye(n, output.GetDtype(), device)

                           .IndexGet({col_permutation});

     // Calculating P in A = P.L.U. [P.Inverse() = P.T()].

     permutation = permutation.T().Contiguous();

     // Permute_l option, to return L as L = P.L.

     if (permute_l) {

         lower = permutation.Matmul(lower);

     }

 }


 void LUIpiv(const Tensor& A, Tensor& ipiv, Tensor& output) {

     AssertTensorDtypes(A, {Float32, Float64});


     const Device device = A.GetDevice();

     const Dtype dtype = A.GetDtype();


     // Check dimensions.

     const SizeVector A_shape = A.GetShape();

     if (A_shape.size() != 2) {

         utility::LogError("Tensor must be 2D, but got {}D.", A_shape.size());

     }


     const int64_t rows = A_shape[0];

     const int64_t cols = A_shape[1];

     if (rows == 0 || cols == 0) {

         utility::LogError(

                 "Tensor shapes should not contain dimensions with zero.");

     }


     // "output" tensor is modified in-place as output.

     // Operations are COL_MAJOR.

     output = A.T().Clone();

     void* A_data = output.GetDataPtr();


     // Returns LU decomposition in form of an output matrix,

     // with lower triangular elements as L, upper triangular and diagonal

     // elements as U, (diagonal elements of L are unity), and ipiv array,

     // which has the pivot indices (for 1 <= i <= min(M,N), row i of the

     // matrix was interchanged with row IPIV(i).

     int64_t ipiv_len = std::min(rows, cols);

     if (device.IsCUDA()) {

 #ifdef BUILD_CUDA_MODULE

         CUDAScopedDevice scoped_device(device);

         ipiv = Tensor::Empty({ipiv_len}, core::Int32, device);

         void* ipiv_data = ipiv.GetDataPtr();

         LUCUDA(A_data, ipiv_data, rows, cols, dtype, device);

 #else

         utility::LogInfo("Unimplemented device.");

 #endif

     } else if (device.IsSYCL()) {

 #ifdef BUILD_SYCL_MODULE

         ipiv = Tensor::Empty({ipiv_len}, core::Int64, device);

         void* ipiv_data = ipiv.GetDataPtr();

         LUSYCL(A_data, ipiv_data, rows, cols, dtype, device);

 #else

         utility::LogInfo("Unimplemented device.");

 #endif

     } else {

         Dtype ipiv_dtype;

         if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 4) {

             ipiv_dtype = core::Int32;

         } else if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 8) {

             ipiv_dtype = core::Int64;

         } else {

             utility::LogError("Unsupported CLOUDVIEWER_CPU_LINALG_INT type.");

         }

         ipiv = Tensor::Empty({ipiv_len}, ipiv_dtype, device);

         void* ipiv_data = ipiv.GetDataPtr();

         LUCPU(A_data, ipiv_data, rows, cols, dtype, device);

     }

     // COL_MAJOR -> ROW_MAJOR.

     output = output.T().Contiguous();

 }


 void LU(const Tensor& A,

         Tensor& permutation,

         Tensor& lower,

         Tensor& upper,

         const bool permute_l) {

     AssertTensorDtypes(A, {Float32, Float64});


     // Get output matrix and ipiv.

     Tensor ipiv, output;

     LUIpiv(A, ipiv, output);


     // Decompose output in P, L, U matrix form.

     OutputToPLU(output, permutation, lower, upper, ipiv, permute_l);


     // For non-square input case of shape {rows, cols}, shape of P, L, U:

     // P {rows, rows}; L {rows, min(rows, cols)}; U {min(rows, cols), cols}.

     if (A.GetShape()[0] != A.GetShape()[1]) {

         int64_t min_ = std::min(A.GetShape()[0], A.GetShape()[1]);

         lower = lower.Slice(1, 0, min_);

         upper = upper.Slice(0, 0, min_);

     }

 }


 }  // namespace core

 }  // namespace cloudViewer

CUDAUtils.h
Common CUDA utilities.

LUImpl.h

LU.h

LinalgHeadersCPU.h

CLOUDVIEWER_CPU_LINALG_INT
#define CLOUDVIEWER_CPU_LINALG_INT
Definition: LinalgHeadersCPU.h:23

AssertTensorDtypes
#define AssertTensorDtypes(tensor,...)
Definition: TensorCheck.h:33

Tri.h

cloudViewer::core::CUDAScopedDevice
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214

cloudViewer::core::Device
Definition: Device.h:18

cloudViewer::core::Dtype
Definition: Dtype.h:21

cloudViewer::core::SizeVector
Definition: SizeVector.h:70

cloudViewer::core::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:119

cloudViewer::core::Tensor
Definition: Tensor.h:32

cloudViewer::core::Tensor::Contiguous
Tensor Contiguous() const
Definition: Tensor.cpp:772

cloudViewer::core::Tensor::Matmul
Tensor Matmul(const Tensor &rhs) const
Definition: Tensor.cpp:1919

cloudViewer::core::Tensor::Arange
static Tensor Arange(const Scalar start, const Scalar stop, const Scalar step=1, const Dtype dtype=core::Int64, const Device &device=core::Device("CPU:0"))
Create a 1D tensor with evenly spaced values in the given interval.
Definition: Tensor.cpp:436

cloudViewer::core::Tensor::GetDtype
Dtype GetDtype() const
Definition: Tensor.h:1164

cloudViewer::core::Tensor::GetDataPtr
T * GetDataPtr()
Definition: Tensor.h:1144

cloudViewer::core::Tensor::Eye
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
Definition: Tensor.cpp:418

cloudViewer::core::Tensor::IndexGet
Tensor IndexGet(const std::vector< Tensor > &index_tensors) const
Advanced indexing getter. This will always allocate a new Tensor.
Definition: Tensor.cpp:905

cloudViewer::core::Tensor::GetDevice
Device GetDevice() const override
Definition: Tensor.cpp:1435

cloudViewer::core::Tensor::Clone
Tensor Clone() const
Copy Tensor to the same device.
Definition: Tensor.h:502

cloudViewer::core::Tensor::Empty
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
Definition: Tensor.cpp:400

cloudViewer::core::Tensor::GetShape
SizeVector GetShape() const
Definition: Tensor.h:1127

cloudViewer::core::Tensor::T
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Definition: Tensor.cpp:1079

cloudViewer::core::Tensor::Slice
Tensor Slice(int64_t dim, int64_t start, int64_t stop, int64_t step=1) const
Definition: Tensor.cpp:857

cloudViewer::core::Tensor::To
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739

LogInfo
#define LogInfo(...)
Definition: Logging.h:81

LogError
#define LogError(...)
Definition: Logging.h:60

min
int min(int a, int b)
Definition: cutil_math.h:53

cloudViewer::core::OutputToPLU
static void OutputToPLU(const Tensor &output, Tensor &permutation, Tensor &lower, Tensor &upper, const Tensor &ipiv, const bool permute_l)
Definition: LU.cpp:38

cloudViewer::core::GetColPermutation
static Tensor GetColPermutation(const Tensor &ipiv, int number_of_indices, int number_of_rows)
Definition: LU.cpp:19

cloudViewer::core::LUSYCL
void LUSYCL(void *A_data, void *ipiv_data, int64_t m, int64_t n, Dtype dtype, const Device &device)
Definition: LUSYCL.cpp:19

cloudViewer::core::LUCUDA
void LUCUDA(void *A_data, void *ipiv_data, int64_t rows, int64_t cols, Dtype dtype, const Device &device)
Definition: LUCUDA.cpp:15

cloudViewer::core::LUIpiv
void LUIpiv(const Tensor &A, Tensor &ipiv, Tensor &output)
Definition: LU.cpp:62

cloudViewer::core::Int64
const Dtype Int64
Definition: Dtype.cpp:47

cloudViewer::core::Triul
void Triul(const Tensor &A, Tensor &upper, Tensor &lower, const int diagonal)
Definition: Tri.cpp:79

cloudViewer::core::LU
void LU(const Tensor &A, Tensor &permutation, Tensor &lower, Tensor &upper, const bool permute_l)
Definition: LU.cpp:126

cloudViewer::core::Float64
const Dtype Float64
Definition: Dtype.cpp:43

cloudViewer::core::LUCPU
void LUCPU(void *A_data, void *ipiv_data, int64_t rows, int64_t cols, Dtype dtype, const Device &device)
Definition: LUCPU.cpp:15

cloudViewer::core::Int32
const Dtype Int32
Definition: Dtype.cpp:46

cloudViewer::core::Float32
const Dtype Float32
Definition: Dtype.cpp:42

cloudViewer
Generic file read and write utility for python interface.
Definition: AutoSegmentationTools.h:16