24 #ifndef DLPACK_DLPACK_H_
25 #define DLPACK_DLPACK_H_
31 #define DLPACK_EXTERN_C extern "C"
33 #define DLPACK_EXTERN_C
37 #define DLPACK_MAJOR_VERSION 1
40 #define DLPACK_MINOR_VERSION 2
45 #define DLPACK_DLL __declspec(dllexport)
47 #define DLPACK_DLL __declspec(dllimport)
90 typedef enum : int32_t {
338 #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
346 #define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
354 #define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL)
433 void (*SetError)(
void* error_ctx,
515 void** out_current_stream
675 #include <fmt/core.h>
676 #include <fmt/format.h>
682 template <
typename FormatContext>
684 FormatContext& ctx)
const -> decltype(ctx.out()) {
685 const char* text =
nullptr;
694 text =
"kDLCUDAHost";
712 text =
"kDLROCMHost";
718 text =
"kDLCUDAManaged";
736 return format_to(ctx.out(), text);
739 template <
typename ParseContext>
740 constexpr
auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
DLDataTypeCode
The type code options DLDataType.
@ kDLComplex
complex number (C/C++/Python layout: compact struct per complex number)
@ kDLFloat6_e2m3fn
FP6 data types Setting bits != 6 is currently unspecified, and the producer must ensure it is set whi...
@ kDLFloat8_e3m4
FP8 data types.
@ kDLFloat4_e2m1fn
FP4 data types Setting bits != 4 is currently unspecified, and the producer must ensure it is set whi...
@ kDLFloat
IEEE floating point.
@ kDLUInt
unsigned integer
@ kDLOpaqueHandle
Opaque handle type, reserved for testing purposes. Frameworks need to agree on the handle data type f...
int(* DLPackCurrentWorkStream)(DLDeviceType device_type, int32_t device_id, void **out_current_stream)
Obtain the current work stream of a device.
int(* DLPackManagedTensorToPyObjectNoSync)(DLManagedTensorVersioned *tensor, void **out_py_object)
Imports a DLManagedTensorVersioned to a PyObject* Tensor/NDArray.
int(* DLPackManagedTensorAllocator)(DLTensor *prototype, DLManagedTensorVersioned **out, void *error_ctx, void(*SetError)(void *error_ctx, const char *kind, const char *message))
Request a producer library to create a new tensor.
int(* DLPackDLTensorFromPyObjectNoSync)(void *py_object, DLTensor *out)
Exports a PyObject* Tensor/NDArray to a provided DLTensor.
DLDeviceType
The device type in DLDevice.
@ kDLVulkan
Vulkan buffer for next generation graphics.
@ kDLMetal
Metal for Apple GPU.
@ kDLCUDAManaged
CUDA managed/unified memory allocated by cudaMallocManaged.
@ kDLCUDA
CUDA GPU device.
@ kDLCUDAHost
Pinned CUDA CPU memory by cudaMallocHost.
@ kDLOneAPI
Unified shared memory allocated on a oneAPI non-partititioned device. Call to oneAPI runtime is requi...
@ kDLWebGPU
GPU support for next generation WebGPU standard.
@ kDLOpenCL
OpenCL devices.
@ kDLROCMHost
Pinned ROCm CPU memory allocated by hipMallocHost.
@ kDLHexagon
Qualcomm Hexagon DSP.
@ kDLMAIA
Microsoft MAIA devices.
@ kDLROCM
ROCm GPUs for AMD GPUs.
@ kDLExtDev
Reserved extension device type, used for quickly test extension device The semantics can differ depen...
@ kDLVPI
Verilog simulator buffer.
struct DLPackExchangeAPI DLPackExchangeAPI
Framework-specific function pointers table for DLPack exchange.
struct DLPackExchangeAPIHeader DLPackExchangeAPIHeader
DLPackExchangeAPI stable header.
struct DLManagedTensorVersioned DLManagedTensorVersioned
A versioned and managed C Tensor object, manage memory of DLTensor.
struct DLManagedTensor DLManagedTensor
C Tensor object, manage memory of DLTensor. This data structure is intended to facilitate the borrowi...
int(* DLPackManagedTensorFromPyObjectNoSync)(void *py_object, DLManagedTensorVersioned **out)
Exports a PyObject* Tensor/NDArray to a DLManagedTensorVersioned.
The data type the tensor can hold. The data type is assumed to follow the native endian-ness....
uint16_t lanes
Number of lanes in the type, used for vector types.
uint8_t bits
Number of bits, common choices are 8, 16, 32.
uint8_t code
Type code of base types. We keep it uint8_t instead of DLDataTypeCode for minimal memory footprint,...
A Device for Tensor and operator.
DLDeviceType device_type
The device type used in the device.
int32_t device_id
The device index. For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
A versioned and managed C Tensor object, manage memory of DLTensor.
DLPackVersion version
The API and ABI version of the current managed Tensor.
void * manager_ctx
the context of the original host framework.
void(* deleter)(struct DLManagedTensorVersioned *self)
Destructor.
DLTensor dl_tensor
DLTensor which is being memory managed.
uint64_t flags
Additional bitmask flags information about the tensor.
C Tensor object, manage memory of DLTensor. This data structure is intended to facilitate the borrowi...
void(* deleter)(struct DLManagedTensor *self)
Destructor - this should be called to destruct the manager_ctx which backs the DLManagedTensor....
DLTensor dl_tensor
DLTensor which is being memory managed.
void * manager_ctx
the context of the original host framework of DLManagedTensor in which DLManagedTensor is used in the...
Framework-specific function pointers table for DLPack exchange.
DLPackExchangeAPIHeader header
The header that remains stable across versions.
DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync
Producer function pointer for DLPackManagedTensorToPyObject This function must be not NULL.
DLPackCurrentWorkStream current_work_stream
Producer function pointer for DLPackCurrentWorkStream This function must be not NULL.
DLPackManagedTensorAllocator managed_tensor_allocator
Producer function pointer for DLPackManagedTensorAllocator This function must not be NULL.
DLPackManagedTensorFromPyObjectNoSync managed_tensor_from_py_object_no_sync
Producer function pointer for DLPackManagedTensorFromPyObject This function must be not NULL.
DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync
Producer function pointer for DLPackDLTensorFromPyObject This function can be NULL when the producer ...
uint32_t minor
DLPack minor version.
uint32_t major
DLPack major version.
Plain C Tensor object, does not manage memory.
int32_t ndim
Number of dimensions.
int64_t * strides
strides of the tensor (in number of elements, not bytes), can not be NULL if ndim !...
DLDevice device
The device of the tensor.
uint64_t byte_offset
The offset in bytes to the beginning pointer to data.
void * data
The data pointer points to the allocated data. This will be CUDA device pointer or cl_mem handle in O...
int64_t * shape
The shape of the tensor.
DLDataType dtype
The data type of the pointer.