8 #include <benchmark/benchmark.h>
17 int64_t large_dim = (1ULL << 27) + 10;
22 for (
auto _ : state) {
30 #ifdef BUILD_CUDA_MODULE
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
BENCHMARK_CAPTURE(BinaryEW, Add__CPU_Int8__100, 100, BinaryOpCode::Add, Int8, Device("CPU:0")) -> Unit(benchmark::kMillisecond)
void Zeros(benchmark::State &state, const Device &device)
Generic file read and write utility for python interface.