cpp_api/api/AddMM_8cpp_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 #include "cloudViewer/core/linalg/AddMM.h"


 #include <unordered_map>


 #include "cloudViewer/core/CUDAUtils.h"


 namespace cloudViewer {

 namespace core {


 void AddMM(const Tensor& A,

            const Tensor& B,

            Tensor& output,

            double alpha,

            double beta) {

     AssertTensorDevice(B, A.GetDevice());

     AssertTensorDtype(B, A.GetDtype());

     AssertTensorDevice(output, A.GetDevice());

     AssertTensorDtype(output, A.GetDtype());


     const Device device = A.GetDevice();

     const Dtype dtype = A.GetDtype();


     if (dtype != core::Float32 && dtype != core::Float64) {

         utility::LogError("AddMM is not implemented for {}.", dtype.ToString());

     }


     // Check shapes

     SizeVector A_shape = A.GetShape();

     SizeVector B_shape = B.GetShape();

     SizeVector output_shape = output.GetShape();


     if (A_shape.size() != 2) {

         utility::LogError("Tensor A must be 2D, but got {}D.", A_shape.size());

     }

     if (B_shape.size() != 1 && B_shape.size() != 2) {

         utility::LogError(

                 "Tensor B must be 1D (vector) or 2D (matrix), but got {}D.",

                 B_shape.size());

     }

     if (A_shape[1] != B_shape[0]) {

         utility::LogError("Tensor A columns {} mismatch with Tensor B rows {}.",

                           A_shape[1], B_shape[0]);

     }

     if (output_shape[0] != A_shape[0] &&

         output_shape[1] != B_shape[B_shape.size() - 1]) {

         utility::LogError(

                 "Tensor output must match A rows {} and B columns {}.",

                 A_shape[0], B_shape[B_shape.size() - 1]);

     }


     // Check the memory layout of tensors.

     Tensor A_contiguous, B_contiguous;

     bool transA = false;

     bool transB = false;

     if (A.IsContiguous() || A.T().IsContiguous()) {

         transA = A.T().IsContiguous();

         A_contiguous = A;

     } else {

         A_contiguous = A.Contiguous();

     }


     if (B.IsContiguous() || B.T().IsContiguous()) {

         transB = B.T().IsContiguous();

         B_contiguous = B;

     } else {

         B_contiguous = B.Contiguous();

     }


     // Dispatch to backends

     int64_t m = output.GetShape(0);

     int64_t n = output.GetShape(1);

     int64_t k = A_contiguous.GetShape(1);


     int lda = A_contiguous.GetStride(transA ? 1 : 0);

     int ldb = B_contiguous.GetStride(transB ? 1 : 0);

     int ldc = output.GetStride(0);


     if (m == 0 || k == 0 || n == 0) {

         utility::LogError(

                 "Tensor shapes should not contain dimensions with zero.");

     }


     void* A_data = A_contiguous.To(dtype).GetDataPtr();

     void* B_data = B_contiguous.To(dtype).GetDataPtr();

     void* C_data = output.GetDataPtr();


     if (device.IsCUDA()) {

 #ifdef BUILD_CUDA_MODULE

         CUDAScopedDevice scoped_device(device);

         AddMMCUDA(B_data, A_data, C_data, n, k, m, alpha, beta, transB, transA,

                   ldb, lda, ldc, dtype, device);

 #else

         utility::LogError("Unimplemented device.");

 #endif

     } else if (device.IsSYCL()) {

 #ifdef BUILD_SYCL_MODULE

         AddMMSYCL(B_data, A_data, C_data, n, k, m, alpha, beta, transB, transA,

                   ldb, lda, ldc, dtype, device);

 #else

         utility::LogError("Unimplemented device.");

 #endif

     } else {

         AddMMCPU(B_data, A_data, C_data, n, k, m, alpha, beta, transB, transA,

                  ldb, lda, ldc, dtype);

     }

 };


 }  // namespace core

 }  // namespace cloudViewer

AddMM.h

CUDAUtils.h
Common CUDA utilities.

AssertTensorDevice
#define AssertTensorDevice(tensor,...)
Definition: TensorCheck.h:45

AssertTensorDtype
#define AssertTensorDtype(tensor,...)
Definition: TensorCheck.h:21

cloudViewer::core::CUDAScopedDevice
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214

cloudViewer::core::Device
Definition: Device.h:18

cloudViewer::core::Device::IsCUDA
bool IsCUDA() const
Returns true iff device type is CUDA.
Definition: Device.h:49

cloudViewer::core::Device::IsSYCL
bool IsSYCL() const
Returns true iff device type is SYCL GPU.
Definition: Device.h:52

cloudViewer::core::Dtype
Definition: Dtype.h:21

cloudViewer::core::Dtype::ToString
std::string ToString() const
Definition: Dtype.h:65

cloudViewer::core::SizeVector
Definition: SizeVector.h:70

cloudViewer::core::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:119

cloudViewer::core::Tensor
Definition: Tensor.h:32

cloudViewer::core::Tensor::Contiguous
Tensor Contiguous() const
Definition: Tensor.cpp:772

cloudViewer::core::Tensor::IsContiguous
bool IsContiguous() const
Definition: Tensor.h:1036

cloudViewer::core::Tensor::GetDtype
Dtype GetDtype() const
Definition: Tensor.h:1164

cloudViewer::core::Tensor::GetDataPtr
T * GetDataPtr()
Definition: Tensor.h:1144

cloudViewer::core::Tensor::GetStride
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:1139

cloudViewer::core::Tensor::GetDevice
Device GetDevice() const override
Definition: Tensor.cpp:1435

cloudViewer::core::Tensor::GetShape
SizeVector GetShape() const
Definition: Tensor.h:1127

cloudViewer::core::Tensor::T
Tensor T() const
Expects input to be <= 2-D Tensor by swapping dimension 0 and 1.
Definition: Tensor.cpp:1079

cloudViewer::core::Tensor::To
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739

LogError
#define LogError(...)
Definition: Logging.h:60

cloudViewer::core::AddMMCPU
void AddMMCPU(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, double alpha, double beta, bool gemmTrA, bool gemmTrB, int lda, int ldb, int ldc, Dtype dtype)
Definition: AddMMCPU.cpp:17

cloudViewer::core::AddMM
void AddMM(const Tensor &A, const Tensor &B, Tensor &output, double alpha, double beta)
Definition: AddMM.cpp:17

cloudViewer::core::Float64
const Dtype Float64
Definition: Dtype.cpp:43

cloudViewer::core::AddMMCUDA
void AddMMCUDA(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, double alpha, double beta, bool gemmTrA, bool gemmTrB, int lda, int ldb, int ldc, Dtype dtype, const Device &device)
Definition: AddMMCUDA.cpp:17

cloudViewer::core::AddMMSYCL
void AddMMSYCL(void *A_data, void *B_data, void *C_data, int64_t m, int64_t k, int64_t n, double alpha, double beta, bool gemmTrA, bool gemmTrB, int lda, int ldb, int ldc, Dtype dtype, const Device &device)
Definition: AddMMSYCL.cpp:20

cloudViewer::core::Float32
const Dtype Float32
Definition: Dtype.cpp:42

cloudViewer
Generic file read and write utility for python interface.
Definition: AutoSegmentationTools.h:16