16 void Matmul(
const Tensor& A,
const Tensor& B, Tensor& C);
18 #ifdef BUILD_SYCL_MODULE
26 const Device& device);
28 #ifdef BUILD_CUDA_MODULE
36 const Device& device);
void MatmulCUDA(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, Dtype dtype, const Device &device)
void MatmulSYCL(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, Dtype dtype, const Device &device)
void Matmul(const Tensor &A, const Tensor &B, Tensor &output)
Computes matrix multiplication C = AB.
void MatmulCPU(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, Dtype dtype)
Generic file read and write utility for python interface.