18 template <
typename scalar_t>
20 CBLAS_TRANSPOSE trans_A,
21 CBLAS_TRANSPOSE trans_B,
26 const scalar_t *A_data,
28 const scalar_t *B_data,
38 CBLAS_TRANSPOSE trans_A,
39 CBLAS_TRANSPOSE trans_B,
51 cblas_sgemm(layout, trans_A, trans_B, m, n, k, alpha, A_data, lda, B_data,
52 ldb, beta, C_data, ldc);
57 CBLAS_TRANSPOSE trans_A,
58 CBLAS_TRANSPOSE trans_B,
70 cblas_dgemm(layout, trans_A, trans_B, m, n, k, alpha, A_data, lda, B_data,
71 ldb, beta, C_data, ldc);
74 #ifdef BUILD_CUDA_MODULE
75 template <
typename scalar_t>
76 inline cublasStatus_t gemm_cuda(cublasHandle_t handle,
77 cublasOperation_t transa,
78 cublasOperation_t transb,
82 const scalar_t *alpha,
83 const scalar_t *A_data,
85 const scalar_t *B_data,
91 return CUBLAS_STATUS_NOT_SUPPORTED;
94 template <
typename scalar_t>
95 inline cublasStatus_t trsm_cuda(cublasHandle_t handle,
96 cublasSideMode_t side,
97 cublasFillMode_t uplo,
98 cublasOperation_t trans,
99 cublasDiagType_t diag,
102 const scalar_t *alpha,
108 return CUBLAS_STATUS_NOT_SUPPORTED;
112 inline cublasStatus_t gemm_cuda<float>(cublasHandle_t handle,
113 cublasOperation_t transa,
114 cublasOperation_t transb,
126 return cublasSgemm(handle, transa,
129 alpha,
static_cast<const float *
>(A_data), lda,
130 static_cast<const float *
>(B_data),
132 beta,
static_cast<float *
>(C_data), ldc);
136 inline cublasStatus_t gemm_cuda<double>(cublasHandle_t handle,
137 cublasOperation_t transa,
138 cublasOperation_t transb,
143 const double *A_data,
145 const double *B_data,
150 return cublasDgemm(handle, transa,
153 alpha,
static_cast<const double *
>(A_data), lda,
154 static_cast<const double *
>(B_data),
156 beta,
static_cast<double *
>(C_data), ldc);
160 inline cublasStatus_t trsm_cuda<float>(cublasHandle_t handle,
161 cublasSideMode_t side,
162 cublasFillMode_t uplo,
163 cublasOperation_t trans,
164 cublasDiagType_t diag,
172 return cublasStrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B,
177 inline cublasStatus_t trsm_cuda<double>(cublasHandle_t handle,
178 cublasSideMode_t side,
179 cublasFillMode_t uplo,
180 cublasOperation_t trans,
181 cublasDiagType_t diag,
189 return cublasDtrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B,
void gemm_cpu(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans_A, CBLAS_TRANSPOSE trans_B, CLOUDVIEWER_CPU_LINALG_INT m, CLOUDVIEWER_CPU_LINALG_INT n, CLOUDVIEWER_CPU_LINALG_INT k, scalar_t alpha, const scalar_t *A_data, CLOUDVIEWER_CPU_LINALG_INT lda, const scalar_t *B_data, CLOUDVIEWER_CPU_LINALG_INT ldb, scalar_t beta, scalar_t *C_data, CLOUDVIEWER_CPU_LINALG_INT ldc)
void gemm_cpu< float >(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans_A, CBLAS_TRANSPOSE trans_B, CLOUDVIEWER_CPU_LINALG_INT m, CLOUDVIEWER_CPU_LINALG_INT n, CLOUDVIEWER_CPU_LINALG_INT k, float alpha, const float *A_data, CLOUDVIEWER_CPU_LINALG_INT lda, const float *B_data, CLOUDVIEWER_CPU_LINALG_INT ldb, float beta, float *C_data, CLOUDVIEWER_CPU_LINALG_INT ldc)
void gemm_cpu< double >(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans_A, CBLAS_TRANSPOSE trans_B, CLOUDVIEWER_CPU_LINALG_INT m, CLOUDVIEWER_CPU_LINALG_INT n, CLOUDVIEWER_CPU_LINALG_INT k, double alpha, const double *A_data, CLOUDVIEWER_CPU_LINALG_INT lda, const double *B_data, CLOUDVIEWER_CPU_LINALG_INT ldb, double beta, double *C_data, CLOUDVIEWER_CPU_LINALG_INT ldc)
Generic file read and write utility for python interface.