10 #ifdef BUILD_CUDA_MODULE
11 #include <cuda_runtime_api.h>
14 #include <unordered_map>
33 if (A_shape.
size() != 2) {
36 if (A_shape[0] != A_shape[1]) {
38 A_shape[0], A_shape[1]);
40 if (B_shape.
size() != 1 && B_shape.
size() != 2) {
42 "Tensor B must be 1D (vector) or 2D (matrix), but got {}D",
45 if (B_shape[0] != A_shape[0]) {
49 int64_t n = A_shape[0];
50 int64_t k = B_shape.
size() == 2 ? B_shape[1] : 1;
51 if (n == 0 || k == 0) {
53 "Tensor shapes should not contain dimensions with zero.");
61 void *B_data =
X.GetDataPtr();
63 if (device.IsSYCL()) {
64 #ifdef BUILD_SYCL_MODULE
68 SolveSYCL(A_data, B_data, ipiv_data, n, k, dtype, device);
72 }
else if (device.IsCUDA()) {
73 #ifdef BUILD_CUDA_MODULE
78 SolveCUDA(A_data, B_data, ipiv_data, n, k, dtype, device);
94 SolveCPU(A_data, B_data, ipiv_data, n, k, dtype, device);
#define AssertTensorDevice(tensor,...)
#define AssertTensorDtype(tensor,...)
#define AssertTensorDtypes(tensor,...)
When CUDA is not enabled, this is a dummy class.
Device GetDevice() const override
Tensor Clone() const
Copy Tensor to the same device.
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
SizeVector GetShape() const
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
void SolveSYCL(void *A_data, void *B_data, void *ipiv_data, int64_t n, int64_t k, Dtype dtype, const Device &device)
void SolveCUDA(void *A_data, void *B_data, void *ipiv_data, int64_t n, int64_t k, Dtype dtype, const Device &device)
void Solve(const Tensor &A, const Tensor &B, Tensor &X)
Solve AX = B with LU decomposition. A is a square matrix.
void SolveCPU(void *A_data, void *B_data, void *ipiv_data, int64_t n, int64_t k, Dtype dtype, const Device &device)
Generic file read and write utility for python interface.