20 int number_of_indices,
25 const int* ipiv_ptr =
static_cast<const int*
>(ipiv_cpu.
GetDataPtr());
26 int* full_ipiv_ptr =
static_cast<int*
>(full_ipiv.
GetDataPtr());
27 for (
int i = 0; i < number_of_indices; i++) {
28 int temp = full_ipiv_ptr[i];
29 full_ipiv_ptr[i] = full_ipiv_ptr[ipiv_ptr[i] - 1];
30 full_ipiv_ptr[ipiv_ptr[i] - 1] = temp;
43 const bool permute_l) {
48 Triul(output, upper, lower, 0);
58 lower = permutation.
Matmul(lower);
70 if (A_shape.
size() != 2) {
74 const int64_t rows = A_shape[0];
75 const int64_t cols = A_shape[1];
76 if (rows == 0 || cols == 0) {
78 "Tensor shapes should not contain dimensions with zero.");
91 int64_t ipiv_len =
std::min(rows, cols);
92 if (device.IsCUDA()) {
93 #ifdef BUILD_CUDA_MODULE
97 LUCUDA(A_data, ipiv_data, rows, cols, dtype, device);
101 }
else if (device.IsSYCL()) {
102 #ifdef BUILD_SYCL_MODULE
105 LUSYCL(A_data, ipiv_data, rows, cols, dtype, device);
120 LUCPU(A_data, ipiv_data, rows, cols, dtype, device);
130 const bool permute_l) {
138 OutputToPLU(output, permutation, lower, upper, ipiv, permute_l);
144 lower = lower.
Slice(1, 0, min_);
145 upper = upper.
Slice(0, 0, min_);
#define AssertTensorDtypes(tensor,...)
When CUDA is not enabled, this is a dummy class.
Tensor Contiguous() const
Tensor Matmul(const Tensor &rhs) const
static Tensor Arange(const Scalar start, const Scalar stop, const Scalar step=1, const Dtype dtype=core::Int64, const Device &device=core::Device("CPU:0"))
Create a 1D tensor with evenly spaced values in the given interval.
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
Tensor IndexGet(const std::vector< Tensor > &index_tensors) const
Advanced indexing getter. This will always allocate a new Tensor.
Device GetDevice() const override
Tensor Clone() const
Copy Tensor to the same device.
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
SizeVector GetShape() const
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Tensor Slice(int64_t dim, int64_t start, int64_t stop, int64_t step=1) const
Tensor To(Dtype dtype, bool copy=false) const
static void OutputToPLU(const Tensor &output, Tensor &permutation, Tensor &lower, Tensor &upper, const Tensor &ipiv, const bool permute_l)
static Tensor GetColPermutation(const Tensor &ipiv, int number_of_indices, int number_of_rows)
void LUSYCL(void *A_data, void *ipiv_data, int64_t m, int64_t n, Dtype dtype, const Device &device)
void LUCUDA(void *A_data, void *ipiv_data, int64_t rows, int64_t cols, Dtype dtype, const Device &device)
void LUIpiv(const Tensor &A, Tensor &ipiv, Tensor &output)
void Triul(const Tensor &A, Tensor &upper, Tensor &lower, const int diagonal)
void LU(const Tensor &A, Tensor &permutation, Tensor &lower, Tensor &upper, const bool permute_l)
void LUCPU(void *A_data, void *ipiv_data, int64_t rows, int64_t cols, Dtype dtype, const Device &device)
Generic file read and write utility for python interface.