#include <Indexer.h>

Collaboration diagram for cloudViewer::core::Indexer:

Public Member Functions
	Indexer ()

	Indexer (const Indexer &)=default

Indexer &	operator= (const Indexer &)=default

	Indexer (const std::vector< Tensor > &input_tensors, const Tensor &output_tensor, DtypePolicy dtype_policy=DtypePolicy::ALL_SAME, const SizeVector &reduction_dims={})

	Indexer (const std::vector< Tensor > &input_tensors, const std::vector< Tensor > &output_tensors, DtypePolicy dtype_policy=DtypePolicy::ALL_SAME, const SizeVector &reduction_dims={})

bool	CanUse32BitIndexing () const
	Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1. More...

IndexerIterator	SplitTo32BitIndexing () const

std::unique_ptr< Indexer >	SplitLargestDim ()

Indexer	GetPerOutputIndexer (int64_t output_idx) const

bool	ShouldAccumulate () const

bool	IsFinalOutput () const

void	ShrinkDim (int64_t dim, int64_t start, int64_t size)

int64_t	NumReductionDims () const
	Returns the number of reduction dimensions. More...

int64_t	NumDims () const
	Returns number of dimensions of the Indexer. More...

const int64_t *	GetPrimaryShape () const

int64_t *	GetPrimaryShape ()

const int64_t *	GetPrimaryStrides () const

int64_t	NumWorkloads () const

int64_t	NumOutputElements () const
	Returns the number of output elements. More...

int64_t	NumInputs () const
	Number of input Tensors. More...

int64_t	NumOutputs () const
	Number of output Tensors. More...

TensorRef &	GetInput (int64_t i)
	Returns input TensorRef. More...

const TensorRef &	GetInput (int64_t i) const

TensorRef &	GetOutput (int64_t i)
	Returns output TensorRef. More...

const TensorRef &	GetOutput (int64_t i) const

TensorRef &	GetOutput ()

const TensorRef &	GetOutput () const

bool	IsReductionDim (int64_t dim) const
	Returns true if the `dim` -th dimension is reduced. More...

CLOUDVIEWER_HOST_DEVICE char *	GetInputPtr (int64_t input_idx, int64_t workload_idx) const

template<typename T >
CLOUDVIEWER_HOST_DEVICE T *	GetInputPtr (int64_t input_idx, int64_t workload_idx) const

CLOUDVIEWER_HOST_DEVICE char *	GetOutputPtr (int64_t workload_idx) const

template<typename T >
CLOUDVIEWER_HOST_DEVICE T *	GetOutputPtr (int64_t workload_idx) const

CLOUDVIEWER_HOST_DEVICE char *	GetOutputPtr (int64_t output_idx, int64_t workload_idx) const

template<typename T >
CLOUDVIEWER_HOST_DEVICE T *	GetOutputPtr (int64_t output_idx, int64_t workload_idx) const

Protected Member Functions
void	CoalesceDimensions ()

void	ReorderDimensions (const SizeVector &reduction_dims)

void	UpdatePrimaryStrides ()
	Update primary_strides_ based on primary_shape_. More...

void	UpdateContiguousFlags ()
	Update input_contiguous_ and output_contiguous_. More...

CLOUDVIEWER_HOST_DEVICE char *	GetWorkloadDataPtr (const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const

template<typename T >
CLOUDVIEWER_HOST_DEVICE T *	GetWorkloadDataPtr (const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const

Static Protected Member Functions
static void	BroadcastRestride (TensorRef &src, int64_t dst_ndims, const int64_t *dst_shape)

static void	ReductionRestride (TensorRef &dst, int64_t src_ndims, const int64_t *src_shape, const SizeVector &reduction_dims)

Protected Attributes
int64_t	num_inputs_ = 0
	Number of input and output Tensors. More...

int64_t	num_outputs_ = 0

TensorRef	inputs_ [MAX_INPUTS]
	Array of input TensorRefs. More...

TensorRef	outputs_ [MAX_OUTPUTS]
	Array of output TensorRefs. More...

bool	inputs_contiguous_ [MAX_INPUTS]
	Array of contiguous flags for all input TensorRefs. More...

bool	outputs_contiguous_ [MAX_OUTPUTS]
	Array of contiguous flags for all output TensorRefs. More...

int64_t	primary_shape_ [MAX_DIMS]

int64_t	primary_strides_ [MAX_DIMS]

int64_t	ndims_ = 0
	Indexer's global number of dimensions. More...

bool	final_output_ = true

bool	accumulate_ = false

Detailed Description

Indexing engine for elementwise ops with broadcasting support.

Fancy indexing is supported by restriding input tensor and treating the operation as elementwise op.

After constructing Indexer on the host, the indexing methods can be used from both host and device.

Definition at line 262 of file Indexer.h.

Constructor & Destructor Documentation

◆ Indexer() [1/4]

cloudViewer::core::Indexer::Indexer ( )

inline

Definition at line 264 of file Indexer.h.

Referenced by SplitLargestDim().

◆ Indexer() [2/4]

cloudViewer::core::Indexer::Indexer ( const Indexer & )

default

◆ Indexer() [3/4]

cloudViewer::core::Indexer::Indexer	(	const std::vector< Tensor > &	input_tensors,
		const Tensor &	output_tensor,
		DtypePolicy	dtype_policy = `DtypePolicy::ALL_SAME`,
		const SizeVector &	reduction_dims = `{}`
	)

Only single output is supported for simplicity. To extend this function to support multiple outputs, one may check for shape compatibility of all outputs.

Definition at line 35 of file Indexer.cpp.

◆ Indexer() [4/4]

cloudViewer::core::Indexer::Indexer	(	const std::vector< Tensor > &	input_tensors,
		const std::vector< Tensor > &	output_tensors,
		DtypePolicy	dtype_policy = `DtypePolicy::ALL_SAME`,
		const SizeVector &	reduction_dims = `{}`
	)

Member Function Documentation

◆ BroadcastRestride()

void cloudViewer::core::Indexer::BroadcastRestride	(	TensorRef &	src,
		int64_t	dst_ndims,
		const int64_t *	dst_shape
	)

staticprotected

Broadcast src to dst by setting shape 1 to omitted dimensions and setting stride 0 to brocasted dimensions.

Note that other approaches may also work. E.g. one could set src's shape to exactly the same as dst's shape. In general, if a dimension is of size 1, the stride have no effect in computing offsets; or likewise if a dimension has stride 0, the shape have no effect in computing offsets.

[After] src.shape_: [ 1, 2, 1, 1, 3] src.strides_: [ 0, 3, 0, 3, 1]

Parameters

src	The source TensorRef to be broadcasted.
dst_ndims	Number of dimensions to be broadcasted to.
dst_shape	Shape to be broadcasted to.

Definition at line 575 of file Indexer.cpp.

References cloudViewer::core::TensorRef::byte_strides_, cloudViewer::core::TensorRef::ndims_, and cloudViewer::core::TensorRef::shape_.

Referenced by Indexer().

◆ CanUse32BitIndexing()

bool cloudViewer::core::Indexer::CanUse32BitIndexing ( ) const

Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1.

Definition at line 198 of file Indexer.cpp.

References inputs_, max(), ndims_, num_inputs_, num_outputs_, NumWorkloads(), outputs_, and primary_shape_.

◆ CoalesceDimensions()

void cloudViewer::core::Indexer::CoalesceDimensions ( )

protected

Merge adjacent dimensions if either dim is 1 or if: shape[n] * stride[n] == shape[n + 1]

Definition at line 425 of file Indexer.cpp.

References cloudViewer::core::TensorRef::byte_strides_, inputs_, cloudViewer::core::TensorRef::ndims_, ndims_, num_inputs_, num_outputs_, outputs_, primary_shape_, stride, UpdateContiguousFlags(), and UpdatePrimaryStrides().

Referenced by Indexer(), and ShrinkDim().

◆ GetInput() [1/2]

TensorRef& cloudViewer::core::Indexer::GetInput ( int64_t i )

inline

Returns input TensorRef.

Definition at line 345 of file Indexer.h.

References inputs_, LogError, and num_inputs_.

◆ GetInput() [2/2]

const TensorRef& cloudViewer::core::Indexer::GetInput ( int64_t i ) const

inline

Definition at line 352 of file Indexer.h.

References inputs_, LogError, and num_inputs_.

◆ GetInputPtr() [1/2]

CLOUDVIEWER_HOST_DEVICE char* cloudViewer::core::Indexer::GetInputPtr	(	int64_t	input_idx,
		int64_t	workload_idx
	)		const

inline

Get input Tensor data pointer based on workload_idx.

Parameters

input_idx	Input tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Definition at line 406 of file Indexer.h.

References GetWorkloadDataPtr(), inputs_, inputs_contiguous_, and num_inputs_.

Referenced by cloudViewer::core::AdvancedIndexer::GetIndexedOffset(), cloudViewer::core::AdvancedIndexer::GetInputPtr(), and cloudViewer::core::kernel::CPUArgReductionEngine::LaunchArgReductionParallelDim().

◆ GetInputPtr() [2/2]

template<typename T >

CLOUDVIEWER_HOST_DEVICE T* cloudViewer::core::Indexer::GetInputPtr	(	int64_t	input_idx,
		int64_t	workload_idx
	)		const

inline

Get input Tensor data pointer based on workload_idx.

Parameters

input_idx	Input tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Note: Assumes that sizeof(T) matches the input's dtype size, but does not check this constraint for performance reasons.

Definition at line 424 of file Indexer.h.

References inputs_, inputs_contiguous_, and num_inputs_.

◆ GetOutput() [1/4]

TensorRef& cloudViewer::core::Indexer::GetOutput ( )

inline

Returns output TensorRef. Only works if there's only one output. Equivalent to GetOutput(0).

Definition at line 378 of file Indexer.h.

References LogError, and num_outputs_.

Referenced by GetOutput().

◆ GetOutput() [2/4]

const TensorRef& cloudViewer::core::Indexer::GetOutput ( ) const

inline

Definition at line 385 of file Indexer.h.

References GetOutput(), LogError, and num_outputs_.

◆ GetOutput() [3/4]

TensorRef& cloudViewer::core::Indexer::GetOutput ( int64_t i )

inline

Returns output TensorRef.

Definition at line 361 of file Indexer.h.

References LogError, num_outputs_, and outputs_.

◆ GetOutput() [4/4]

const TensorRef& cloudViewer::core::Indexer::GetOutput ( int64_t i ) const

inline

Definition at line 368 of file Indexer.h.

References LogError, num_outputs_, and outputs_.

◆ GetOutputPtr() [1/4]

CLOUDVIEWER_HOST_DEVICE char* cloudViewer::core::Indexer::GetOutputPtr	(	int64_t	output_idx,
		int64_t	workload_idx
	)		const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

output_idx	Output tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Definition at line 461 of file Indexer.h.

References GetWorkloadDataPtr(), outputs_, and outputs_contiguous_.

◆ GetOutputPtr() [2/4]

template<typename T >

CLOUDVIEWER_HOST_DEVICE T* cloudViewer::core::Indexer::GetOutputPtr	(	int64_t	output_idx,
		int64_t	workload_idx
	)		const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

output_idx	Output tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Definition at line 474 of file Indexer.h.

References outputs_, and outputs_contiguous_.

◆ GetOutputPtr() [3/4]

CLOUDVIEWER_HOST_DEVICE char* cloudViewer::core::Indexer::GetOutputPtr ( int64_t workload_idx ) const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

workload_idx The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Definition at line 438 of file Indexer.h.

References GetWorkloadDataPtr(), outputs_, and outputs_contiguous_.

Referenced by cloudViewer::core::AdvancedIndexer::GetOutputPtr(), and cloudViewer::core::kernel::CPUArgReductionEngine::LaunchArgReductionParallelDim().

◆ GetOutputPtr() [4/4]

template<typename T >

CLOUDVIEWER_HOST_DEVICE T* cloudViewer::core::Indexer::GetOutputPtr ( int64_t workload_idx ) const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

workload_idx The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Note: Assumes that sizeof(T) matches the output's dtype size, but does not check this constraint for performance reasons.

Definition at line 451 of file Indexer.h.

References outputs_, and outputs_contiguous_.

◆ GetPerOutputIndexer()

Indexer cloudViewer::core::Indexer::GetPerOutputIndexer ( int64_t output_idx ) const

Get a sub-indexer that loops through all inputs corresponding to a single output.

Definition at line 303 of file Indexer.cpp.

References cloudViewer::core::TensorRef::byte_strides_, cloudViewer::core::TensorRef::data_ptr_, GetPrimaryShape(), inputs_, IsReductionDim(), cloudViewer::core::MAX_DIMS, ndims_, num_inputs_, num_outputs_, outputs_, primary_shape_, cloudViewer::core::TensorRef::shape_, stride, UpdateContiguousFlags(), and UpdatePrimaryStrides().

◆ GetPrimaryShape() [1/2]

int64_t* cloudViewer::core::Indexer::GetPrimaryShape ( )

inline

Definition at line 317 of file Indexer.h.

References primary_shape_.

◆ GetPrimaryShape() [2/2]

const int64_t* cloudViewer::core::Indexer::GetPrimaryShape ( ) const

inline

Returns Indexer's primary shape, one can iterate the Indexer with this shape.

Definition at line 316 of file Indexer.h.

References primary_shape_.

Referenced by GetPerOutputIndexer().

◆ GetPrimaryStrides()

const int64_t* cloudViewer::core::Indexer::GetPrimaryStrides ( ) const

inline

Returns Indexer's primary strides, one can iterate the Indexer with this strides. It is always set to be the default strides from primary_shape_.

Definition at line 321 of file Indexer.h.

References primary_strides_.

◆ GetWorkloadDataPtr() [1/2]

CLOUDVIEWER_HOST_DEVICE char* cloudViewer::core::Indexer::GetWorkloadDataPtr	(	const TensorRef &	tr,
		bool	tr_contiguous,
		int64_t	workload_idx
	)		const

inlineprotected

Get data pointer from a TensorRef with workload_idx. Note: can be optimized by computing all input ptrs and output ptr together.

Definition at line 542 of file Indexer.h.

References cloudViewer::core::TensorRef::byte_strides_, cloudViewer::core::TensorRef::data_ptr_, cloudViewer::core::TensorRef::dtype_byte_size_, ndims_, offset, and primary_strides_.

Referenced by GetInputPtr(), and GetOutputPtr().

◆ GetWorkloadDataPtr() [2/2]

template<typename T >

CLOUDVIEWER_HOST_DEVICE T* cloudViewer::core::Indexer::GetWorkloadDataPtr	(	const TensorRef &	tr,
		bool	tr_contiguous,
		int64_t	workload_idx
	)		const

inlineprotected

Get data pointer from a TensorRef with workload_idx. Note: can be optimized by computing all input ptrs and output ptr together.

Note: Assumes that sizeof(T) matches the data's dtype size, but does not check this constraint for performance reasons.

Definition at line 572 of file Indexer.h.

References cloudViewer::core::TensorRef::byte_strides_, cloudViewer::core::TensorRef::data_ptr_, ndims_, offset, and primary_strides_.

◆ IsFinalOutput()

bool cloudViewer::core::Indexer::IsFinalOutput ( ) const

inline

Definition at line 299 of file Indexer.h.

References final_output_.

◆ IsReductionDim()

bool cloudViewer::core::Indexer::IsReductionDim ( int64_t dim ) const

inline

Returns true if the dim -th dimension is reduced.

Definition at line 394 of file Indexer.h.

References cloudViewer::core::TensorRef::byte_strides_, outputs_, and primary_shape_.

Referenced by GetPerOutputIndexer(), and SplitLargestDim().

◆ NumDims()

int64_t cloudViewer::core::Indexer::NumDims ( ) const

inline

Returns number of dimensions of the Indexer.

Definition at line 312 of file Indexer.h.

References ndims_.

◆ NumInputs()

int64_t cloudViewer::core::Indexer::NumInputs ( ) const

inline

Number of input Tensors.

Definition at line 339 of file Indexer.h.

References num_inputs_.

◆ NumOutputElements()

int64_t cloudViewer::core::Indexer::NumOutputElements ( ) const

Returns the number of output elements.

Definition at line 414 of file Indexer.cpp.

References ndims_, outputs_, and primary_shape_.

Referenced by cloudViewer::core::kernel::CPUReductionEngine::Run(), and cloudViewer::core::kernel::CPUArgReductionEngine::Run().

◆ NumOutputs()

int64_t cloudViewer::core::Indexer::NumOutputs ( ) const

inline

Number of output Tensors.

Definition at line 342 of file Indexer.h.

References num_outputs_.

◆ NumReductionDims()

int64_t cloudViewer::core::Indexer::NumReductionDims ( ) const

Returns the number of reduction dimensions.

Definition at line 395 of file Indexer.cpp.

References count, ndims_, and outputs_.

◆ NumWorkloads()

int64_t cloudViewer::core::Indexer::NumWorkloads ( ) const

Returns the total number of workloads (e.g. computations) needed for the op. The scheduler schedules these workloads to run on parallel threads.

For non-reduction ops, NumWorkloads() is the same as number of output elements (e.g. for broadcasting ops).

For reduction ops, NumWorkLoads() is the same as the number of input elements. Currently we don't allow mixing broadcasting and reduction in one op kernel.

Definition at line 406 of file Indexer.cpp.

References ndims_, and primary_shape_.

Referenced by CanUse32BitIndexing(), cloudViewer::core::kernel::CPUArgReductionEngine::LaunchArgReductionParallelDim(), cloudViewer::core::AdvancedIndexer::NumWorkloads(), and cloudViewer::core::ParallelForSYCL().

◆ operator=()

Indexer& cloudViewer::core::Indexer::operator= ( const Indexer & )

default

◆ ReductionRestride()

void cloudViewer::core::Indexer::ReductionRestride	(	TensorRef &	dst,
		int64_t	src_ndims,
		const int64_t *	src_shape,
		const SizeVector &	reduction_dims
	)

staticprotected

Symmetrical to BroadcastRestride. Set the reduced dimensions' stride to 0 at output. Currently only support the keepdim=true case.

Definition at line 602 of file Indexer.cpp.

References cloudViewer::core::TensorRef::byte_strides_, LogError, cloudViewer::core::TensorRef::ndims_, and cloudViewer::core::TensorRef::shape_.

Referenced by Indexer().

◆ ReorderDimensions()

void cloudViewer::core::Indexer::ReorderDimensions ( const SizeVector & reduction_dims )

protected

Definition at line 491 of file Indexer.cpp.

References cloudViewer::core::TensorRef::byte_strides_, inputs_, ndims_, num_inputs_, num_outputs_, outputs_, cloudViewer::core::TensorRef::Permute(), cloudViewer::core::SmallVectorTemplateCommon< T, typename >::rbegin(), cloudViewer::core::SmallVectorTemplateCommon< T, typename >::rend(), and std::swap().

Referenced by Indexer().

◆ ShouldAccumulate()

bool cloudViewer::core::Indexer::ShouldAccumulate ( ) const

inline

Definition at line 297 of file Indexer.h.

References accumulate_.

◆ ShrinkDim()

void cloudViewer::core::Indexer::ShrinkDim	(	int64_t	dim,
		int64_t	start,
		int64_t	size
	)

Shrink iteration to a specific range in a specific dimension.

Parameters

dim	The dimension to be shrunken to.
start	Starting index (inclusive) for dimension `dim`. No dimension wrapping is available.
size	The size to iterate in dimension `dim`.

Definition at line 364 of file Indexer.cpp.

References CoalesceDimensions(), cloudViewer::core::TensorRef::data_ptr_, inputs_, LogError, ndims_, num_inputs_, num_outputs_, outputs_, primary_shape_, size, UpdateContiguousFlags(), and UpdatePrimaryStrides().

Referenced by SplitLargestDim().

◆ SplitLargestDim()

std::unique_ptr< Indexer > cloudViewer::core::Indexer::SplitLargestDim ( )

Split the indexer such that the largest-span-dimension is split into two halves. The returned new indexer iterates the first half while the current indexer iterates the second half.

Definition at line 238 of file Indexer.cpp.

References accumulate_, copy, Indexer(), inputs_, IsReductionDim(), LogError, ndims_, num_inputs_, num_outputs_, outputs_, primary_shape_, ShrinkDim(), and size.

◆ SplitTo32BitIndexing()

IndexerIterator cloudViewer::core::Indexer::SplitTo32BitIndexing ( ) const

Returns an iterator of Indexers, each of which can be indexed in 32 bits.

Definition at line 234 of file Indexer.cpp.

◆ UpdateContiguousFlags()

void cloudViewer::core::Indexer::UpdateContiguousFlags ( )

protected

Update input_contiguous_ and output_contiguous_.

Definition at line 565 of file Indexer.cpp.

References inputs_, inputs_contiguous_, cloudViewer::core::TensorRef::IsContiguous(), num_inputs_, num_outputs_, outputs_, and outputs_contiguous_.

Referenced by CoalesceDimensions(), GetPerOutputIndexer(), Indexer(), and ShrinkDim().

◆ UpdatePrimaryStrides()

void cloudViewer::core::Indexer::UpdatePrimaryStrides ( )

protected

Update primary_strides_ based on primary_shape_.

Definition at line 556 of file Indexer.cpp.

References ndims_, primary_shape_, primary_strides_, and stride.

Referenced by CoalesceDimensions(), GetPerOutputIndexer(), Indexer(), and ShrinkDim().

Member Data Documentation

◆ accumulate_

bool cloudViewer::core::Indexer::accumulate_ = false

protected

If the kernel should accumulate into the output. Only relevant for CUDA reductions.

Definition at line 637 of file Indexer.h.

Referenced by ShouldAccumulate(), and SplitLargestDim().

◆ final_output_

bool cloudViewer::core::Indexer::final_output_ = true

protected

Whether this iterator produces the actual output, as opposed to something that will be accumulated further. Only relevant for CUDA reductions.

Definition at line 633 of file Indexer.h.

Referenced by IsFinalOutput().

◆ inputs_

TensorRef cloudViewer::core::Indexer::inputs_[MAX_INPUTS]

protected

Array of input TensorRefs.

Definition at line 599 of file Indexer.h.

Referenced by CanUse32BitIndexing(), CoalesceDimensions(), GetInput(), GetInputPtr(), GetPerOutputIndexer(), Indexer(), ReorderDimensions(), ShrinkDim(), SplitLargestDim(), and UpdateContiguousFlags().

◆ inputs_contiguous_

bool cloudViewer::core::Indexer::inputs_contiguous_[MAX_INPUTS]

protected

Array of contiguous flags for all input TensorRefs.

Definition at line 605 of file Indexer.h.

Referenced by GetInputPtr(), and UpdateContiguousFlags().

◆ ndims_

int64_t cloudViewer::core::Indexer::ndims_ = 0

protected

Indexer's global number of dimensions.

Definition at line 628 of file Indexer.h.

Referenced by CanUse32BitIndexing(), CoalesceDimensions(), GetPerOutputIndexer(), GetWorkloadDataPtr(), Indexer(), NumDims(), NumOutputElements(), NumReductionDims(), NumWorkloads(), ReorderDimensions(), ShrinkDim(), SplitLargestDim(), and UpdatePrimaryStrides().

◆ num_inputs_

int64_t cloudViewer::core::Indexer::num_inputs_ = 0

protected

Number of input and output Tensors.

Definition at line 595 of file Indexer.h.

Referenced by CanUse32BitIndexing(), CoalesceDimensions(), GetInput(), GetInputPtr(), GetPerOutputIndexer(), Indexer(), NumInputs(), ReorderDimensions(), ShrinkDim(), SplitLargestDim(), and UpdateContiguousFlags().

◆ num_outputs_

int64_t cloudViewer::core::Indexer::num_outputs_ = 0

protected

Definition at line 596 of file Indexer.h.

Referenced by CanUse32BitIndexing(), CoalesceDimensions(), GetOutput(), GetPerOutputIndexer(), Indexer(), NumOutputs(), ReorderDimensions(), ShrinkDim(), SplitLargestDim(), and UpdateContiguousFlags().

◆ outputs_

TensorRef cloudViewer::core::Indexer::outputs_[MAX_OUTPUTS]

protected

Array of output TensorRefs.

Definition at line 602 of file Indexer.h.

Referenced by CanUse32BitIndexing(), CoalesceDimensions(), GetOutput(), GetOutputPtr(), GetPerOutputIndexer(), Indexer(), IsReductionDim(), NumOutputElements(), NumReductionDims(), ReorderDimensions(), ShrinkDim(), SplitLargestDim(), and UpdateContiguousFlags().

◆ outputs_contiguous_

bool cloudViewer::core::Indexer::outputs_contiguous_[MAX_OUTPUTS]

protected

Array of contiguous flags for all output TensorRefs.

Definition at line 608 of file Indexer.h.

Referenced by GetOutputPtr(), and UpdateContiguousFlags().

◆ primary_shape_

int64_t cloudViewer::core::Indexer::primary_shape_[MAX_DIMS]

protected

Indexer's global shape. The shape's number of elements is the same as GetNumWorkloads() for the Indexer.

For broadcasting, primary_shape_ is the same as the output shape.
For reduction, primary_shape_ is the same as the input shape.
Currently we don't allow broadcasting mixed with reduction. But if broadcasting mixed with reduction is allowed, primary_shape_ is a mix of input shape and output shape. First, fill in all omitted dimensions (in inputs for broadcasting) and reduction dimensions (as if keepdim=true always) with size 1. For each axis, the primary dimension is the non-1 dimension (if both are 1, then the primary dimension is 1 in that axis).

Definition at line 621 of file Indexer.h.

Referenced by CanUse32BitIndexing(), CoalesceDimensions(), GetPerOutputIndexer(), GetPrimaryShape(), Indexer(), IsReductionDim(), NumOutputElements(), NumWorkloads(), ShrinkDim(), SplitLargestDim(), and UpdatePrimaryStrides().

◆ primary_strides_

int64_t cloudViewer::core::Indexer::primary_strides_[MAX_DIMS]

protected

The default strides for primary_shape_ for internal use only. Used to compute the actual strides and ultimately the index offsets.

Definition at line 625 of file Indexer.h.

Referenced by GetPrimaryStrides(), GetWorkloadDataPtr(), and UpdatePrimaryStrides().

The documentation for this class was generated from the following files:

/root/ACloudViewer/libs/cloudViewer/core/Indexer.h
/root/ACloudViewer/libs/cloudViewer/core/Indexer.cpp

Public Member Functions

Protected Member Functions

Static Protected Member Functions

Protected Attributes

Detailed Description

Constructor & Destructor Documentation

◆ Indexer() [1/4]

◆ Indexer() [2/4]

◆ Indexer() [3/4]

◆ Indexer() [4/4]

Member Function Documentation

◆ BroadcastRestride()

◆ CanUse32BitIndexing()

◆ CoalesceDimensions()

◆ GetInput() [1/2]

◆ GetInput() [2/2]

◆ GetInputPtr() [1/2]

◆ GetInputPtr() [2/2]

◆ GetOutput() [1/4]

◆ GetOutput() [2/4]

◆ GetOutput() [3/4]

◆ GetOutput() [4/4]

◆ GetOutputPtr() [1/4]

◆ GetOutputPtr() [2/4]

◆ GetOutputPtr() [3/4]

◆ GetOutputPtr() [4/4]

◆ GetPerOutputIndexer()

◆ GetPrimaryShape() [1/2]

◆ GetPrimaryShape() [2/2]

◆ GetPrimaryStrides()

◆ GetWorkloadDataPtr() [1/2]

◆ GetWorkloadDataPtr() [2/2]

◆ IsFinalOutput()

◆ IsReductionDim()

◆ NumDims()

◆ NumInputs()

◆ NumOutputElements()

◆ NumOutputs()

◆ NumReductionDims()

◆ NumWorkloads()

◆ operator=()

◆ ReductionRestride()

◆ ReorderDimensions()

◆ ShouldAccumulate()

◆ ShrinkDim()

◆ SplitLargestDim()

◆ SplitTo32BitIndexing()

◆ UpdateContiguousFlags()

◆ UpdatePrimaryStrides()

Member Data Documentation

◆ accumulate_

◆ final_output_

◆ inputs_

◆ inputs_contiguous_

◆ ndims_

◆ num_inputs_

◆ num_outputs_

◆ outputs_

◆ outputs_contiguous_

◆ primary_shape_

◆ primary_strides_