10 #include <unordered_map>
26 if (A_shape.
size() != 2) {
29 if (A_shape[0] != A_shape[1]) {
34 int64_t n = A_shape[0];
37 "Tensor shapes should not contain dimensions with zero.");
40 if (device.IsCUDA()) {
41 #ifdef BUILD_CUDA_MODULE
54 InverseCUDA(A_data, ipiv_data, output_data, n, dtype, device);
59 }
else if (device.IsSYCL()) {
60 #ifdef BUILD_SYCL_MODULE
68 InverseSYCL(A_data, ipiv_data,
nullptr, n, dtype, device);
89 InverseCPU(A_data, ipiv_data,
nullptr, n, dtype, device);
#define AssertTensorDtypes(tensor,...)
When CUDA is not enabled, this is a dummy class.
Tensor Contiguous() const
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
Device GetDevice() const override
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
SizeVector GetShape() const
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Tensor To(Dtype dtype, bool copy=false) const
void InverseCPU(void *A_data, void *ipiv_data, [[maybe_unused]] void *output_data, int64_t n, Dtype dtype, const Device &device)
void InverseSYCL(void *A_data, void *ipiv_data, void *output_data, int64_t n, Dtype dtype, const Device &device)
void Inverse(const Tensor &A, Tensor &output)
Computes A^{-1} with LU factorization, where A is a N x N square matrix.
void InverseCUDA(void *A_data, void *ipiv_data, void *output_data, int64_t n, Dtype dtype, const Device &device)
Generic file read and write utility for python interface.