cpp_api/api/Matmul_8cpp_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 #include "cloudViewer/core/linalg/Matmul.h"


 #include <unordered_map>


 #include "cloudViewer/core/CUDAUtils.h"


 namespace cloudViewer {

 namespace core {


 void Matmul(const Tensor& A, const Tensor& B, Tensor& output) {

     AssertTensorDevice(B, A.GetDevice());

     AssertTensorDtype(B, A.GetDtype());


     const Device device = A.GetDevice();

     const Dtype dtype_original = A.GetDtype();

     Dtype dtype;


     if (dtype_original != core::Float32 && dtype_original != core::Float64) {

         utility::LogDebug("Converting to Float32 dtype to from {}.",

                           dtype_original.ToString());

         dtype = core::Float32;

     } else {

         dtype = dtype_original;

     }


     // Check shapes

     SizeVector A_shape = A.GetShape();

     SizeVector B_shape = B.GetShape();


     if (A_shape.size() != 2) {

         utility::LogError("Tensor A must be 2D, but got {}D.", A_shape.size());

     }

     if (B_shape.size() != 1 && B_shape.size() != 2) {

         utility::LogError(

                 "Tensor B must be 1D (vector) or 2D (matrix), but got {}D.",

                 B_shape.size());

     }

     if (A_shape[1] != B_shape[0]) {

         utility::LogError("Tensor A columns {} mismatch with Tensor B rows {}.",

                           A_shape[1], B_shape[0]);

     }


     // Dispatch to backends

     int64_t m = A_shape[0];

     int64_t k = A_shape[1];

     int64_t n = B_shape.size() == 2 ? B_shape[1] : 1;


     if (m == 0 || k == 0 || n == 0) {

         utility::LogError(

                 "Tensor shapes should not contain dimensions with zero.");

     }


     Tensor A_contiguous = A.Contiguous().To(dtype);

     Tensor B_contiguous = B.Contiguous().To(dtype);

     void* A_data = A_contiguous.GetDataPtr();

     void* B_data = B_contiguous.GetDataPtr();


     output = Tensor::Empty({m, n}, dtype, device);

     void* C_data = output.GetDataPtr();


     if (device.IsSYCL()) {

 #ifdef BUILD_SYCL_MODULE

         MatmulSYCL(B_data, A_data, C_data, n, k, m, dtype, device);

 #else

         utility::LogError("Unimplemented device.");

 #endif

     } else if (device.IsCUDA()) {

 #ifdef BUILD_CUDA_MODULE

         CUDAScopedDevice scoped_device(device);

         MatmulCUDA(B_data, A_data, C_data, n, k, m, dtype, device);

 #else

         utility::LogError("Unimplemented device.");

 #endif

     } else {

         MatmulCPU(B_data, A_data, C_data, n, k, m, dtype);

     }


     output = output.To(dtype_original);

 };


 }  // namespace core

 }  // namespace cloudViewer

CUDAUtils.h
Common CUDA utilities.

Matmul.h

AssertTensorDevice
#define AssertTensorDevice(tensor,...)
Definition: TensorCheck.h:45

AssertTensorDtype
#define AssertTensorDtype(tensor,...)
Definition: TensorCheck.h:21

cloudViewer::core::CUDAScopedDevice
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214

cloudViewer::core::Device
Definition: Device.h:18

cloudViewer::core::Device::IsCUDA
bool IsCUDA() const
Returns true iff device type is CUDA.
Definition: Device.h:49

cloudViewer::core::Device::IsSYCL
bool IsSYCL() const
Returns true iff device type is SYCL GPU.
Definition: Device.h:52

cloudViewer::core::Dtype
Definition: Dtype.h:21

cloudViewer::core::Dtype::ToString
std::string ToString() const
Definition: Dtype.h:65

cloudViewer::core::SizeVector
Definition: SizeVector.h:70

cloudViewer::core::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:119

cloudViewer::core::Tensor
Definition: Tensor.h:32

cloudViewer::core::Tensor::Contiguous
Tensor Contiguous() const
Definition: Tensor.cpp:772

cloudViewer::core::Tensor::GetDtype
Dtype GetDtype() const
Definition: Tensor.h:1164

cloudViewer::core::Tensor::GetDataPtr
T * GetDataPtr()
Definition: Tensor.h:1144

cloudViewer::core::Tensor::GetDevice
Device GetDevice() const override
Definition: Tensor.cpp:1435

cloudViewer::core::Tensor::Empty
static Tensor Empty(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor with uninitialized values.
Definition: Tensor.cpp:400

cloudViewer::core::Tensor::GetShape
SizeVector GetShape() const
Definition: Tensor.h:1127

cloudViewer::core::Tensor::To
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739

LogError
#define LogError(...)
Definition: Logging.h:60

LogDebug
#define LogDebug(...)
Definition: Logging.h:90

cloudViewer::core::MatmulCUDA
void MatmulCUDA(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, Dtype dtype, const Device &device)
Definition: MatmulCUDA.cpp:17

cloudViewer::core::MatmulSYCL
void MatmulSYCL(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, Dtype dtype, const Device &device)
Definition: MatmulSYCL.cpp:19

cloudViewer::core::Matmul
void Matmul(const Tensor &A, const Tensor &B, Tensor &output)
Computes matrix multiplication C = AB.
Definition: Matmul.cpp:17

cloudViewer::core::MatmulCPU
void MatmulCPU(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, Dtype dtype)
Definition: MatmulCPU.cpp:14

cloudViewer::core::Float64
const Dtype Float64
Definition: Dtype.cpp:43

cloudViewer::core::Float32
const Dtype Float32
Definition: Dtype.cpp:42

cloudViewer
Generic file read and write utility for python interface.
Definition: AutoSegmentationTools.h:16