27 scalar_t alpha = 1, beta = 0;
29 gemm_cuda<scalar_t>(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k,
31 static_cast<const scalar_t*
>(A_data), m,
32 static_cast<const scalar_t*
>(B_data), k,
33 &beta,
static_cast<scalar_t*
>(C_data), m),
#define DISPATCH_LINALG_DTYPE_TO_TEMPLATE(DTYPE,...)
ccGuiPythonInstance * GetInstance() noexcept
void MatmulCUDA(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, Dtype dtype, const Device &device)
Generic file read and write utility for python interface.