ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
LeastSquaresSYCL.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <sycl/sycl.hpp>
9 
10 #include "cloudViewer/core/Blob.h"
14 #include "oneapi/mkl.hpp"
15 
16 namespace cloudViewer {
17 namespace core {
18 
19 void LeastSquaresSYCL(void* A_data,
20  void* B_data,
21  int64_t m,
22  int64_t n,
23  int64_t k,
24  Dtype dtype,
25  const Device& device) {
26  using namespace oneapi::mkl;
27  sycl::queue queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
28  int nrhs = k, lda = m, stride_a = lda * n, ldb = std::max(m, n),
29  stride_b = ldb * nrhs, batch_size = 1;
31  // Use blob to ensure cleanup of scratchpad memory.
32  int64_t scratchpad_size = lapack::gels_batch_scratchpad_size<scalar_t>(
33  queue, transpose::N, m, n, nrhs, lda, stride_a, ldb, stride_b,
34  batch_size);
35  core::Blob scratchpad(scratchpad_size * sizeof(scalar_t), device);
36  lapack::gels_batch(
37  queue, transpose::N, m, n, nrhs, static_cast<scalar_t*>(A_data),
38  lda, stride_a, static_cast<scalar_t*>(B_data), ldb, stride_b,
39  batch_size, static_cast<scalar_t*>(scratchpad.GetDataPtr()),
40  scratchpad_size)
41  .wait_and_throw();
42  });
43 }
44 
45 } // namespace core
46 } // namespace cloudViewer
#define DISPATCH_LINALG_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: LinalgUtils.h:23
SYCL queue manager.
void * GetDataPtr()
Definition: Blob.h:75
static SYCLContext & GetInstance()
Get singleton instance.
Definition: SYCLContext.cpp:25
sycl::queue GetDefaultQueue(const Device &device)
Get the default SYCL queue given an CloudViewer device.
Definition: SYCLContext.cpp:43
int max(int a, int b)
Definition: cutil_math.h:48
void LeastSquaresSYCL(void *A_data, void *B_data, int64_t m, int64_t n, int64_t k, Dtype dtype, const Device &device)
Generic file read and write utility for python interface.