10 #include <unordered_map>
28 if (A_shape.
size() != 2) {
31 if (B_shape.
size() != 1 && B_shape.
size() != 2) {
33 "Tensor B must be 1D (vector) or 2D (matrix), but got {}D.",
36 if (B_shape[0] != A_shape[0]) {
40 int64_t m = A_shape[0];
41 int64_t n = A_shape[1];
42 int64_t k = B_shape.
size() == 2 ? B_shape[1] : 1;
43 if (m == 0 || n == 0 || k == 0) {
45 "Tensor shapes should not contain dimensions with zero.");
61 #ifdef BUILD_CUDA_MODULE
67 }
else if (device.
IsSYCL()) {
68 #ifdef BUILD_SYCL_MODULE
#define AssertTensorDevice(tensor,...)
#define AssertTensorDtype(tensor,...)
#define AssertTensorDtypes(tensor,...)
When CUDA is not enabled, this is a dummy class.
bool IsCUDA() const
Returns true iff device type is CUDA.
bool IsSYCL() const
Returns true iff device type is SYCL GPU.
Device GetDevice() const override
Tensor Clone() const
Copy Tensor to the same device.
SizeVector GetShape() const
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Tensor Slice(int64_t dim, int64_t start, int64_t stop, int64_t step=1) const
void LeastSquaresCUDA(void *A_data, void *B_data, int64_t m, int64_t n, int64_t k, Dtype dtype, const Device &device)
void LeastSquares(const Tensor &A, const Tensor &B, Tensor &X)
Solve AX = B with QR decomposition. A is a full-rank m x n matrix (m >= n).
void LeastSquaresSYCL(void *A_data, void *B_data, int64_t m, int64_t n, int64_t k, Dtype dtype, const Device &device)
void LeastSquaresCPU(void *A_data, void *B_data, int64_t m, int64_t n, int64_t k, Dtype dtype, const Device &device)
Generic file read and write utility for python interface.