cpp_api/api/DLPack_8h_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------

 //

 // This file is retrieved from:

 // https://github.com/dmlc/dlpack/blob/master/include/dlpack/dlpack.h

 // Version: v1.2

 //

 // License:

 // https://github.com/dmlc/dlpack/blob/master/LICENSE

 //

 // CloudViewer changes:

 // - Automatic style changed by clang-format.

 // - Formatting helper


 #ifndef DLPACK_DLPACK_H_

 #define DLPACK_DLPACK_H_


 #ifdef __cplusplus

 #define DLPACK_EXTERN_C extern "C"

 #else

 #define DLPACK_EXTERN_C

 #endif


 #define DLPACK_MAJOR_VERSION 1


 #define DLPACK_MINOR_VERSION 2


 #ifdef _WIN32

 #ifdef DLPACK_EXPORTS

 #define DLPACK_DLL __declspec(dllexport)

 #else

 #define DLPACK_DLL __declspec(dllimport)

 #endif

 #else

 #define DLPACK_DLL

 #endif


 #include <stddef.h>

 #include <stdint.h>


 #ifdef __cplusplus

 extern "C" {

 #endif


 typedef struct {

     uint32_t major;

     uint32_t minor;

 } DLPackVersion;


 #ifdef __cplusplus

 typedef enum : int32_t {

 #else

 typedef enum {

 #endif

     kDLCPU = 1,

     kDLCUDA = 2,

     kDLCUDAHost = 3,

     kDLOpenCL = 4,

     kDLVulkan = 7,

     kDLMetal = 8,

     kDLVPI = 9,

     kDLROCM = 10,

     kDLROCMHost = 11,

     kDLExtDev = 12,

     kDLCUDAManaged = 13,

     kDLOneAPI = 14,

     kDLWebGPU = 15,

     kDLHexagon = 16,

     kDLMAIA = 17,

     kDLTrn = 18,

 } DLDeviceType;


 typedef struct {

     DLDeviceType device_type;

     int32_t device_id;

 } DLDevice;


 typedef enum {

     kDLInt = 0U,

     kDLUInt = 1U,

     kDLFloat = 2U,

     kDLOpaqueHandle = 3U,

     kDLBfloat = 4U,

     kDLComplex = 5U,

     kDLBool = 6U,

     kDLFloat8_e3m4 = 7U,

     kDLFloat8_e4m3 = 8U,

     kDLFloat8_e4m3b11fnuz = 9U,

     kDLFloat8_e4m3fn = 10U,

     kDLFloat8_e4m3fnuz = 11U,

     kDLFloat8_e5m2 = 12U,

     kDLFloat8_e5m2fnuz = 13U,

     kDLFloat8_e8m0fnu = 14U,

     kDLFloat6_e2m3fn = 15U,

     kDLFloat6_e3m2fn = 16U,

     kDLFloat4_e2m1fn = 17U,

 } DLDataTypeCode;


 typedef struct {

     uint8_t code;

     uint8_t bits;

     uint16_t lanes;

 } DLDataType;


 typedef struct {

     void* data;

     DLDevice device;

     int32_t ndim;

     DLDataType dtype;

     int64_t* shape;

     int64_t* strides;

     uint64_t byte_offset;

 } DLTensor;


 typedef struct DLManagedTensor {

     DLTensor dl_tensor;

     void* manager_ctx;

     void (*deleter)(struct DLManagedTensor* self);

 } DLManagedTensor;


 // bit masks used in the DLManagedTensorVersioned


 #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)


 #define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)


 #define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL)


 typedef struct DLManagedTensorVersioned {

     DLPackVersion version;

     void* manager_ctx;

     void (*deleter)(struct DLManagedTensorVersioned* self);

     uint64_t flags;

     DLTensor dl_tensor;

 } DLManagedTensorVersioned;


 //----------------------------------------------------------------------

 // DLPack `__c_dlpack_exchange_api__` fast exchange protocol definitions

 //----------------------------------------------------------------------

 typedef int (*DLPackManagedTensorAllocator)(  //

         DLTensor* prototype,

         DLManagedTensorVersioned** out,

         void* error_ctx,  //

         void (*SetError)(void* error_ctx,

                          const char* kind,

                          const char* message)  //

 );


 typedef int (*DLPackManagedTensorFromPyObjectNoSync)(  //

         void* py_object,                               //

         DLManagedTensorVersioned** out                 //

 );


 typedef int (*DLPackDLTensorFromPyObjectNoSync)(  //

         void* py_object,                          //

         DLTensor* out                             //

 );


 typedef int (*DLPackCurrentWorkStream)(  //

         DLDeviceType device_type,        //

         int32_t device_id,               //

         void** out_current_stream        //

 );


 typedef int (*DLPackManagedTensorToPyObjectNoSync)(  //

         DLManagedTensorVersioned* tensor,            //

         void** out_py_object                         //

 );


 typedef struct DLPackExchangeAPIHeader {

     DLPackVersion version;

     struct DLPackExchangeAPIHeader* prev_api;

 } DLPackExchangeAPIHeader;


 typedef struct DLPackExchangeAPI {

     DLPackExchangeAPIHeader header;

     DLPackManagedTensorAllocator managed_tensor_allocator;

     DLPackManagedTensorFromPyObjectNoSync managed_tensor_from_py_object_no_sync;

     DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync;

     DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync;

     DLPackCurrentWorkStream current_work_stream;

 } DLPackExchangeAPI;


 #ifdef __cplusplus

 }  // DLPACK_EXTERN_C

 #endif


 #include <fmt/core.h>

 #include <fmt/format.h>


 namespace fmt {


 template <>

 struct formatter<DLDeviceType> {

     template <typename FormatContext>

     auto format(const DLDeviceType& c,

                 FormatContext& ctx) const -> decltype(ctx.out()) {

         const char* text = nullptr;

         switch (c) {

             case kDLCPU:

                 text = "kDLCPU";

                 break;

             case kDLCUDA:

                 text = "kDLCUDA";

                 break;

             case kDLCUDAHost:

                 text = "kDLCUDAHost";

                 break;

             case kDLOpenCL:

                 text = "kDLOpenCL";

                 break;

             case kDLVulkan:

                 text = "kDLVulkan";

                 break;

             case kDLMetal:

                 text = "kDLMetal";

                 break;

             case kDLVPI:

                 text = "kDLVPI";

                 break;

             case kDLROCM:

                 text = "kDLROCM";

                 break;

             case kDLROCMHost:

                 text = "kDLROCMHost";

                 break;

             case kDLExtDev:

                 text = "kDLExtDev";

                 break;

             case kDLCUDAManaged:

                 text = "kDLCUDAManaged";

                 break;

             case kDLOneAPI:

                 text = "kDLOneAPI";

                 break;

             case kDLWebGPU:

                 text = "kDLWebGPU";

                 break;

             case kDLHexagon:

                 text = "kDLHexagon";

                 break;

             case kDLMAIA:

                 text = "kDLMAIA";

                 break;

             case kDLTrn:

                 text = "kDLTrn";

                 break;

         }

         return format_to(ctx.out(), text);

     }


     template <typename ParseContext>

     constexpr auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {

         return ctx.begin();

     }

 };


 }  // namespace fmt


 #endif  // DLPACK_DLPACK_H_

DLDataTypeCode
DLDataTypeCode
The type code options DLDataType.
Definition: DLPack.h:160

kDLFloat8_e5m2fnuz
@ kDLFloat8_e5m2fnuz
Definition: DLPack.h:189

kDLInt
@ kDLInt
signed integer
Definition: DLPack.h:162

kDLComplex
@ kDLComplex
complex number (C/C++/Python layout: compact struct per complex number)
Definition: DLPack.h:179

kDLFloat8_e4m3b11fnuz
@ kDLFloat8_e4m3b11fnuz
Definition: DLPack.h:185

kDLFloat8_e4m3
@ kDLFloat8_e4m3
Definition: DLPack.h:184

kDLFloat6_e2m3fn
@ kDLFloat6_e2m3fn
FP6 data types Setting bits != 6 is currently unspecified, and the producer must ensure it is set whi...
Definition: DLPack.h:196

kDLFloat8_e3m4
@ kDLFloat8_e3m4
FP8 data types.
Definition: DLPack.h:183

kDLFloat8_e5m2
@ kDLFloat8_e5m2
Definition: DLPack.h:188

kDLFloat4_e2m1fn
@ kDLFloat4_e2m1fn
FP4 data types Setting bits != 4 is currently unspecified, and the producer must ensure it is set whi...
Definition: DLPack.h:203

kDLFloat8_e8m0fnu
@ kDLFloat8_e8m0fnu
Definition: DLPack.h:190

kDLBfloat
@ kDLBfloat
bfloat16
Definition: DLPack.h:174

kDLFloat8_e4m3fn
@ kDLFloat8_e4m3fn
Definition: DLPack.h:186

kDLFloat8_e4m3fnuz
@ kDLFloat8_e4m3fnuz
Definition: DLPack.h:187

kDLFloat
@ kDLFloat
IEEE floating point.
Definition: DLPack.h:166

kDLUInt
@ kDLUInt
unsigned integer
Definition: DLPack.h:164

kDLOpaqueHandle
@ kDLOpaqueHandle
Opaque handle type, reserved for testing purposes. Frameworks need to agree on the handle data type f...
Definition: DLPack.h:172

kDLBool
@ kDLBool
boolean
Definition: DLPack.h:181

kDLFloat6_e3m2fn
@ kDLFloat6_e3m2fn
Definition: DLPack.h:197

DLPackCurrentWorkStream
int(* DLPackCurrentWorkStream)(DLDeviceType device_type, int32_t device_id, void **out_current_stream)
Obtain the current work stream of a device.
Definition: DLPack.h:512

DLPackManagedTensorToPyObjectNoSync
int(* DLPackManagedTensorToPyObjectNoSync)(DLManagedTensorVersioned *tensor, void **out_py_object)
Imports a DLManagedTensorVersioned to a PyObject* Tensor/NDArray.
Definition: DLPack.h:535

DLPackManagedTensorAllocator
int(* DLPackManagedTensorAllocator)(DLTensor *prototype, DLManagedTensorVersioned **out, void *error_ctx, void(*SetError)(void *error_ctx, const char *kind, const char *message))
Request a producer library to create a new tensor.
Definition: DLPack.h:429

DLPackDLTensorFromPyObjectNoSync
int(* DLPackDLTensorFromPyObjectNoSync)(void *py_object, DLTensor *out)
Exports a PyObject* Tensor/NDArray to a provided DLTensor.
Definition: DLPack.h:486

DLDeviceType
DLDeviceType
The device type in DLDevice.
Definition: DLPack.h:92

kDLTrn
@ kDLTrn
AWS Trainium.
Definition: DLPack.h:140

kDLVulkan
@ kDLVulkan
Vulkan buffer for next generation graphics.
Definition: DLPack.h:105

kDLMetal
@ kDLMetal
Metal for Apple GPU.
Definition: DLPack.h:107

kDLCUDAManaged
@ kDLCUDAManaged
CUDA managed/unified memory allocated by cudaMallocManaged.
Definition: DLPack.h:125

kDLCUDA
@ kDLCUDA
CUDA GPU device.
Definition: DLPack.h:97

kDLCUDAHost
@ kDLCUDAHost
Pinned CUDA CPU memory by cudaMallocHost.
Definition: DLPack.h:101

kDLOneAPI
@ kDLOneAPI
Unified shared memory allocated on a oneAPI non-partititioned device. Call to oneAPI runtime is requi...
Definition: DLPack.h:132

kDLWebGPU
@ kDLWebGPU
GPU support for next generation WebGPU standard.
Definition: DLPack.h:134

kDLOpenCL
@ kDLOpenCL
OpenCL devices.
Definition: DLPack.h:103

kDLROCMHost
@ kDLROCMHost
Pinned ROCm CPU memory allocated by hipMallocHost.
Definition: DLPack.h:115

kDLHexagon
@ kDLHexagon
Qualcomm Hexagon DSP.
Definition: DLPack.h:136

kDLCPU
@ kDLCPU
CPU device.
Definition: DLPack.h:95

kDLMAIA
@ kDLMAIA
Microsoft MAIA devices.
Definition: DLPack.h:138

kDLROCM
@ kDLROCM
ROCm GPUs for AMD GPUs.
Definition: DLPack.h:111

kDLExtDev
@ kDLExtDev
Reserved extension device type, used for quickly test extension device The semantics can differ depen...
Definition: DLPack.h:121

kDLVPI
@ kDLVPI
Verilog simulator buffer.
Definition: DLPack.h:109

DLPackExchangeAPI
struct DLPackExchangeAPI DLPackExchangeAPI
Framework-specific function pointers table for DLPack exchange.

DLPackExchangeAPIHeader
struct DLPackExchangeAPIHeader DLPackExchangeAPIHeader
DLPackExchangeAPI stable header.

DLManagedTensorVersioned
struct DLManagedTensorVersioned DLManagedTensorVersioned
A versioned and managed C Tensor object, manage memory of DLTensor.

DLManagedTensor
struct DLManagedTensor DLManagedTensor
C Tensor object, manage memory of DLTensor. This data structure is intended to facilitate the borrowi...

DLPackManagedTensorFromPyObjectNoSync
int(* DLPackManagedTensorFromPyObjectNoSync)(void *py_object, DLManagedTensorVersioned **out)
Exports a PyObject* Tensor/NDArray to a DLManagedTensorVersioned.
Definition: DLPack.h:456

fmt
Definition: IJsonConvertible.h:86

DLDataType
The data type the tensor can hold. The data type is assumed to follow the native endian-ness....
Definition: DLPack.h:226

DLDataType::lanes
uint16_t lanes
Number of lanes in the type, used for vector types.
Definition: DLPack.h:238

DLDataType::bits
uint8_t bits
Number of bits, common choices are 8, 16, 32.
Definition: DLPack.h:236

DLDataType::code
uint8_t code
Type code of base types. We keep it uint8_t instead of DLDataTypeCode for minimal memory footprint,...
Definition: DLPack.h:232

DLDevice
A Device for Tensor and operator.
Definition: DLPack.h:146

DLDevice::device_type
DLDeviceType device_type
The device type used in the device.
Definition: DLPack.h:148

DLDevice::device_id
int32_t device_id
The device index. For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
Definition: DLPack.h:154

DLManagedTensorVersioned
A versioned and managed C Tensor object, manage memory of DLTensor.
Definition: DLPack.h:366

DLManagedTensorVersioned::version
DLPackVersion version
The API and ABI version of the current managed Tensor.
Definition: DLPack.h:370

DLManagedTensorVersioned::manager_ctx
void * manager_ctx
the context of the original host framework.
Definition: DLPack.h:377

DLManagedTensorVersioned::deleter
void(* deleter)(struct DLManagedTensorVersioned *self)
Destructor.
Definition: DLPack.h:386

DLManagedTensorVersioned::dl_tensor
DLTensor dl_tensor
DLTensor which is being memory managed.
Definition: DLPack.h:400

DLManagedTensorVersioned::flags
uint64_t flags
Additional bitmask flags information about the tensor.
Definition: DLPack.h:398

DLManagedTensor
C Tensor object, manage memory of DLTensor. This data structure is intended to facilitate the borrowi...
Definition: DLPack.h:319

DLManagedTensor::deleter
void(* deleter)(struct DLManagedTensor *self)
Destructor - this should be called to destruct the manager_ctx which backs the DLManagedTensor....
Definition: DLPack.h:332

DLManagedTensor::dl_tensor
DLTensor dl_tensor
DLTensor which is being memory managed.
Definition: DLPack.h:321

DLManagedTensor::manager_ctx
void * manager_ctx
the context of the original host framework of DLManagedTensor in which DLManagedTensor is used in the...
Definition: DLPack.h:325

DLPackExchangeAPIHeader
DLPackExchangeAPI stable header.
Definition: DLPack.h:544

DLPackExchangeAPIHeader::version
DLPackVersion version
The provided DLPack version the consumer must check major version compatibility before using this str...
Definition: DLPack.h:549

DLPackExchangeAPIHeader::prev_api
struct DLPackExchangeAPIHeader * prev_api
Optional pointer to an older DLPackExchangeAPI in the chain.
Definition: DLPack.h:560

DLPackExchangeAPI
Framework-specific function pointers table for DLPack exchange.
Definition: DLPack.h:634

DLPackExchangeAPI::header
DLPackExchangeAPIHeader header
The header that remains stable across versions.
Definition: DLPack.h:638

DLPackExchangeAPI::managed_tensor_to_py_object_no_sync
DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync
Producer function pointer for DLPackManagedTensorToPyObject This function must be not NULL.
Definition: DLPack.h:656

DLPackExchangeAPI::current_work_stream
DLPackCurrentWorkStream current_work_stream
Producer function pointer for DLPackCurrentWorkStream This function must be not NULL.
Definition: DLPack.h:668

DLPackExchangeAPI::managed_tensor_allocator
DLPackManagedTensorAllocator managed_tensor_allocator
Producer function pointer for DLPackManagedTensorAllocator This function must not be NULL.
Definition: DLPack.h:644

DLPackExchangeAPI::managed_tensor_from_py_object_no_sync
DLPackManagedTensorFromPyObjectNoSync managed_tensor_from_py_object_no_sync
Producer function pointer for DLPackManagedTensorFromPyObject This function must be not NULL.
Definition: DLPack.h:650

DLPackExchangeAPI::dltensor_from_py_object_no_sync
DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync
Producer function pointer for DLPackDLTensorFromPyObject This function can be NULL when the producer ...
Definition: DLPack.h:662

DLPackVersion
The DLPack version.
Definition: DLPack.h:79

DLPackVersion::minor
uint32_t minor
DLPack minor version.
Definition: DLPack.h:83

DLPackVersion::major
uint32_t major
DLPack major version.
Definition: DLPack.h:81

DLTensor
Plain C Tensor object, does not manage memory.
Definition: DLPack.h:244

DLTensor::ndim
int32_t ndim
Number of dimensions.
Definition: DLPack.h:278

DLTensor::strides
int64_t * strides
strides of the tensor (in number of elements, not bytes), can not be NULL if ndim !...
Definition: DLPack.h:300

DLTensor::device
DLDevice device
The device of the tensor.
Definition: DLPack.h:276

DLTensor::byte_offset
uint64_t byte_offset
The offset in bytes to the beginning pointer to data.
Definition: DLPack.h:302

DLTensor::data
void * data
The data pointer points to the allocated data. This will be CUDA device pointer or cl_mem handle in O...
Definition: DLPack.h:274

DLTensor::shape
int64_t * shape
The shape of the tensor.
Definition: DLPack.h:286

DLTensor::dtype
DLDataType dtype
The data type of the pointer.
Definition: DLPack.h:280

fmt::formatter< DLDeviceType >::format
auto format(const DLDeviceType &c, FormatContext &ctx) const -> decltype(ctx.out())
Definition: DLPack.h:683

fmt::formatter< DLDeviceType >::parse
constexpr auto parse(ParseContext &ctx) -> decltype(ctx.begin())
Definition: DLPack.h:740