cpp_api/api/Inverse_8cpp_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 #include "cloudViewer/core/linalg/Inverse.h"


 #include <unordered_map>


 #include "cloudViewer/core/CUDAUtils.h"

 #include "cloudViewer/core/linalg/LinalgHeadersCPU.h"


 namespace cloudViewer {

 namespace core {


 void Inverse(const Tensor &A, Tensor &output) {

     AssertTensorDtypes(A, {Float32, Float64});


     const Device device = A.GetDevice();

     const Dtype dtype = A.GetDtype();


     // Check dimensions

     SizeVector A_shape = A.GetShape();

     if (A_shape.size() != 2) {

         utility::LogError("Tensor must be 2D, but got {}D.", A_shape.size());

     }

     if (A_shape[0] != A_shape[1]) {

         utility::LogError("Tensor must be square, but got {} x {}.", A_shape[0],

                           A_shape[1]);

     }


     int64_t n = A_shape[0];

     if (n == 0) {

         utility::LogError(

                 "Tensor shapes should not contain dimensions with zero.");

     }


     if (device.IsCUDA()) {

 #ifdef BUILD_CUDA_MODULE

         CUDAScopedDevice scoped_device(device);

         Tensor ipiv = Tensor::Zeros({n}, core::Int32, device);

         void *ipiv_data = ipiv.GetDataPtr();


         // cuSolver does not support getri, so we have to provide an identity

         // matrix. This matrix is modified in-place as output.

         Tensor A_T = A.T().Contiguous();

         void *A_data = A_T.GetDataPtr();


         output = Tensor::Eye(n, dtype, device);

         void *output_data = output.GetDataPtr();


         InverseCUDA(A_data, ipiv_data, output_data, n, dtype, device);

         output = output.T();

 #else

         utility::LogError("Unimplemented device.");

 #endif

     } else if (device.IsSYCL()) {

 #ifdef BUILD_SYCL_MODULE

         Tensor ipiv = Tensor::Empty({n}, core::Int64, device);

         void *ipiv_data = ipiv.GetDataPtr();


         // LAPACKE supports getri, A is in-place modified as output.

         Tensor A_T = A.T().To(device, /*copy=*/true);

         void *A_data = A_T.GetDataPtr();


         InverseSYCL(A_data, ipiv_data, nullptr, n, dtype, device);

         output = A_T.T();

 #else

         utility::LogError("Unimplemented device.");

 #endif

     } else {

         Dtype ipiv_dtype;

         if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 4) {

             ipiv_dtype = core::Int32;

         } else if (sizeof(CLOUDVIEWER_CPU_LINALG_INT) == 8) {

             ipiv_dtype = core::Int64;

         } else {

             utility::LogError("Unsupported CLOUDVIEWER_CPU_LINALG_INT type.");

         }

         Tensor ipiv = Tensor::Empty({n}, ipiv_dtype, device);

         void *ipiv_data = ipiv.GetDataPtr();


         // LAPACKE supports getri, A is in-place modified as output.

         Tensor A_T = A.T().To(device, /*copy=*/true);

         void *A_data = A_T.GetDataPtr();


         InverseCPU(A_data, ipiv_data, nullptr, n, dtype, device);

         output = A_T.T();

     }

 }

 }  // namespace core

 }  // namespace cloudViewer

CUDAUtils.h
Common CUDA utilities.

Inverse.h

LinalgHeadersCPU.h

CLOUDVIEWER_CPU_LINALG_INT
#define CLOUDVIEWER_CPU_LINALG_INT
Definition: LinalgHeadersCPU.h:23

AssertTensorDtypes
#define AssertTensorDtypes(tensor,...)
Definition: TensorCheck.h:33

cloudViewer::core::CUDAScopedDevice
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214

cloudViewer::core::Device
Definition: Device.h:18

cloudViewer::core::Dtype
Definition: Dtype.h:21

cloudViewer::core::SizeVector
Definition: SizeVector.h:70

cloudViewer::core::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:119

cloudViewer::core::Tensor
Definition: Tensor.h:32

cloudViewer::core::Tensor::Contiguous
Tensor Contiguous() const
Definition: Tensor.cpp:772

cloudViewer::core::Tensor::GetDtype
Dtype GetDtype() const
Definition: Tensor.h:1164

cloudViewer::core::Tensor::GetDataPtr
T * GetDataPtr()
Definition: Tensor.h:1144

cloudViewer::core::Tensor::Eye
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
Definition: Tensor.cpp:418

cloudViewer::core::Tensor::Zeros
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
Definition: Tensor.cpp:406

cloudViewer::core::Tensor::GetDevice
Device GetDevice() const override
Definition: Tensor.cpp:1435

cloudViewer::core::Tensor::Empty
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
Definition: Tensor.cpp:400

cloudViewer::core::Tensor::GetShape
SizeVector GetShape() const
Definition: Tensor.h:1127

cloudViewer::core::Tensor::T
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Definition: Tensor.cpp:1079

cloudViewer::core::Tensor::To
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739

LogError
#define LogError(...)
Definition: Logging.h:60

cloudViewer::core::InverseCPU
void InverseCPU(void *A_data, void *ipiv_data, [[maybe_unused]] void *output_data, int64_t n, Dtype dtype, const Device &device)

cloudViewer::core::Int64
const Dtype Int64
Definition: Dtype.cpp:47

cloudViewer::core::InverseSYCL
void InverseSYCL(void *A_data, void *ipiv_data, void *output_data, int64_t n, Dtype dtype, const Device &device)
Definition: InverseSYCL.cpp:19

cloudViewer::core::Inverse
void Inverse(const Tensor &A, Tensor &output)
Computes A^{-1} with LU factorization, where A is a N x N square matrix.
Definition: Inverse.cpp:18

cloudViewer::core::Float64
const Dtype Float64
Definition: Dtype.cpp:43

cloudViewer::core::Int32
const Dtype Int32
Definition: Dtype.cpp:46

cloudViewer::core::InverseCUDA
void InverseCUDA(void *A_data, void *ipiv_data, void *output_data, int64_t n, Dtype dtype, const Device &device)
Definition: InverseCUDA.cpp:16

cloudViewer::core::Float32
const Dtype Float32
Definition: Dtype.cpp:42

cloudViewer
Generic file read and write utility for python interface.
Definition: AutoSegmentationTools.h:16