63 const char* rhs_ptr =
reinterpret_cast<const char*
>(&rhs);
64 buffer_.reserve(buffer_.size() +
sizeof(T));
65 for (
size_t byte = 0;
byte <
sizeof(T);
byte++) {
66 char val = *(rhs_ptr + byte);
67 buffer_.push_back(val);
73 buffer_.insert(buffer_.end(), rhs.begin(), rhs.end());
79 size_t len = strlen(rhs);
80 buffer_.reserve(buffer_.size() + len);
81 for (
size_t byte = 0;
byte < len;
byte++) {
82 buffer_.push_back(rhs[
byte]);
87 template <
typename InputIt>
89 buffer_.insert(buffer_.end(), first, last);
95 for (
size_t i = 0; i <
count; ++i) {
102 buffer_.insert(buffer_.end(), other.buffer_.begin(),
103 other.buffer_.end());
107 std::vector<char>::iterator
Begin() {
return buffer_.begin(); }
109 std::vector<char>::iterator
End() {
return buffer_.end(); }
111 size_t Size()
const {
return buffer_.size(); }
115 char*
Data() {
return buffer_.data(); }
117 const char*
Data()
const {
return buffer_.data(); }
121 const char&
operator[](
size_t index)
const {
return buffer_[index]; }
124 std::vector<char> buffer_;
129 return ((
reinterpret_cast<char*
>(&x))[0]) ?
'<' :
'>';
162 std::stringstream shape_ss;
163 if (shape.
size() == 0) {
165 }
else if (shape.
size() == 1) {
169 shape_ss << shape[0];
170 for (
size_t i = 1; i < shape.size(); i++) {
172 shape_ss << shape[i];
174 if (shape.size() == 1) {
185 "{{'descr': '{}{}{}', 'fortran_order': False, 'shape': {}, }}",
190 property_dict.
Append(property_dict_body);
192 size_t padding_count = 16 - (10 + property_dict.
Size()) % 16 - 1;
193 property_dict.
Append(padding_count,
' ');
194 property_dict.
Append(
'\n');
197 header.
Append<
unsigned char>(0x93);
199 header.
Append<uint8_t>(0x01);
200 header.
Append<uint8_t>(0x00);
201 header.
Append<uint16_t>(property_dict.
Size());
202 header.
Append(property_dict);
208 const std::string& header) {
218 loc1 = header.find(
"fortran_order");
219 if (loc1 == std::string::npos) {
223 fortran_order = (header.substr(loc1, 4) ==
"True" ? true :
false);
226 loc1 = header.find(
"(");
227 loc2 = header.find(
")");
228 if (loc1 == std::string::npos || loc2 == std::string::npos) {
232 std::regex num_regex(
"[0-9][0-9]*");
236 std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
237 while (std::regex_search(str_shape, sm, num_regex)) {
239 str_shape = sm.suffix().str();
245 loc1 = header.find(
"descr");
246 if (loc1 == std::string::npos) {
252 (header[loc1] ==
'<' || header[loc1] ==
'|' ? true :
false);
253 if (!little_endian) {
257 type = header[loc1 + 1];
259 std::string str_ws = header.substr(loc1 + 2);
260 loc2 = str_ws.find(
"'");
261 word_size = atoi(str_ws.substr(0, loc2).c_str());
263 return std::make_tuple(shape,
type, word_size, fortran_order);
282 if (*
reinterpret_cast<const unsigned char*
>(&preamble[0]) !=
283 static_cast<unsigned char>(0x93) ||
284 preamble[1] !=
'N' || preamble[2] !=
'U' || preamble[3] !=
'M' ||
285 preamble[4] !=
'P' || preamble[5] !=
'Y') {
287 preamble[1], preamble[2], preamble[3], preamble[4],
290 if (preamble[6] !=
static_cast<uint8_t
>(0x01) ||
291 preamble[7] !=
static_cast<uint8_t
>(0x00)) {
293 "Not supported Numpy format version: {}.{}. Only version 1.0 "
295 preamble[6], preamble[7]);
297 uint16_t header_len = *
reinterpret_cast<const uint16_t*
>(&preamble[8]);
298 return static_cast<size_t>(header_len);
305 const size_t preamble_len = 10;
307 if (fread(preamble.
Data(),
sizeof(
char), preamble_len, fp) !=
314 if (fread(header.
Data(),
sizeof(
char), header_len, fp) != header_len) {
317 if (header[header_len - 1] !=
'\n') {
324 static std::tuple<core::SizeVector, char, int64_t, bool>
327 std::string header(
reinterpret_cast<const char*
>(buffer + 10), header_len);
332 size_t footer_len = 22;
334 fseek(fp, -
static_cast<int64_t
>(footer_len),
SEEK_END);
335 if (fread(footer.
Data(),
sizeof(
char), footer_len, fp) != footer_len) {
340 uint16_t disk_no = *
reinterpret_cast<uint16_t*
>(&footer[4 ]);
341 uint16_t disk_start = *
reinterpret_cast<uint16_t*
>(&footer[6 ]);
342 uint16_t nrecs_on_disk = *
reinterpret_cast<uint16_t*
>(&footer[8 ]);
343 uint16_t nrecs = *
reinterpret_cast<uint16_t*
>(&footer[10]);
344 uint32_t global_header_size = *
reinterpret_cast<uint32_t*
>(&footer[12]);
345 uint32_t global_header_offset = *
reinterpret_cast<uint32_t*
>(&footer[16]);
346 uint16_t comment_len = *
reinterpret_cast<uint16_t*
>(&footer[20]);
349 if (disk_no != 0 || disk_start != 0 || comment_len != 0) {
352 if (nrecs_on_disk != nrecs) {
356 return std::make_tuple(
static_cast<size_t>(nrecs), global_header_size,
357 global_header_offset);
361 const std::string& tensor_name,
367 const int64_t element_byte_size = dtype.
ByteSize();
370 const std::string mode = append ?
"r+b" :
"wb";
371 if (!cfile.
Open(file_name, mode)) {
378 size_t global_header_offset = 0;
387 size_t global_header_size;
388 std::tie(nrecs, global_header_size, global_header_offset) =
390 fseek(fp, global_header_offset,
SEEK_SET);
391 global_header.
Resize(global_header_size);
392 size_t res = fread(global_header.
Data(),
sizeof(
char),
393 global_header_size, fp);
394 if (res != global_header_size) {
397 fseek(fp, global_header_offset,
SEEK_SET);
402 size_t nels = std::accumulate(shape.
begin(), shape.
end(), 1,
403 std::multiplies<size_t>());
404 size_t nbytes = nels * element_byte_size + npy_header.
Size();
407 uint32_t crc = crc32(0L,
reinterpret_cast<uint8_t*
>(npy_header.
Data()),
409 crc = crc32(crc,
static_cast<const uint8_t*
>(data),
410 nels * element_byte_size);
413 std::string var_name = tensor_name +
".npy";
417 local_header.
Append(
"PK");
418 local_header.
Append<uint16_t>(0x0403);
419 local_header.
Append<uint16_t>(20);
420 local_header.
Append<uint16_t>(0);
421 local_header.
Append<uint16_t>(0);
422 local_header.
Append<uint16_t>(0);
423 local_header.
Append<uint16_t>(0);
424 local_header.
Append<uint32_t>(crc);
425 local_header.
Append<uint32_t>(nbytes);
426 local_header.
Append<uint32_t>(nbytes);
427 local_header.
Append<uint16_t>(var_name.size());
428 local_header.
Append<uint16_t>(0);
429 local_header.
Append(var_name);
432 global_header.
Append(
"PK");
433 global_header.
Append<uint16_t>(0x0201);
434 global_header.
Append<uint16_t>(20);
436 global_header.
Append<uint16_t>(0);
437 global_header.
Append<uint16_t>(0);
438 global_header.
Append<uint16_t>(0);
439 global_header.
Append<uint32_t>(0);
442 global_header.
Append<uint32_t>(global_header_offset);
443 global_header.
Append(var_name);
448 footer.
Append<uint16_t>(0x0605);
449 footer.
Append<uint16_t>(0);
450 footer.
Append<uint16_t>(0);
451 footer.
Append<uint16_t>(nrecs + 1);
452 footer.
Append<uint16_t>(nrecs + 1);
453 footer.
Append<uint32_t>(global_header.
Size());
456 footer.
Append<uint32_t>(global_header_offset + nbytes +
457 local_header.
Size());
458 footer.
Append<uint16_t>(0);
461 fwrite(local_header.
Data(),
sizeof(
char), local_header.
Size(), fp);
462 fwrite(npy_header.
Data(),
sizeof(
char), npy_header.
Size(), fp);
463 fwrite(data, element_byte_size, nels, fp);
464 fwrite(global_header.
Data(),
sizeof(
char), global_header.
Size(), fp);
465 fwrite(footer.
Data(),
sizeof(
char), footer.
Size(), fp);
470 if (!cfile.
Open(file_name,
"wb")) {
479 footer.
Append<uint16_t>(0x0605);
480 footer.
Append<uint16_t>(0);
481 footer.
Append<uint16_t>(0);
482 footer.
Append<uint16_t>(0);
483 footer.
Append<uint16_t>(0);
484 footer.
Append<uint32_t>(0);
485 footer.
Append<uint32_t>(0);
486 footer.
Append<uint16_t>(0);
487 if (footer.
Size() != 22) {
492 fwrite(footer.
Data(),
sizeof(
char), footer.
Size(), fp);
500 word_size_(t.
GetDtype().ByteSize()),
501 fortran_order_(false) {
511 word_size_(word_size),
512 fortran_order_(fortran_order) {
516 template <
typename T>
518 return reinterpret_cast<T*
>(blob_->GetDataPtr());
521 template <
typename T>
523 return reinterpret_cast<const T*
>(blob_->GetDataPtr());
529 if (type_ ==
'i' && word_size_ == 1)
return core::Int8;
530 if (type_ ==
'i' && word_size_ == 2)
return core::Int16;
531 if (type_ ==
'i' && word_size_ == 4)
return core::Int32;
532 if (type_ ==
'i' && word_size_ == 8)
return core::Int64;
533 if (type_ ==
'u' && word_size_ == 1)
return core::UInt8;
534 if (type_ ==
'u' && word_size_ == 2)
return core::UInt16;
535 if (type_ ==
'u' && word_size_ == 4)
return core::UInt32;
536 if (type_ ==
'u' && word_size_ == 8)
return core::UInt64;
551 if (fortran_order_) {
557 "Cannot load Numpy array with Numpy dtype={} and "
563 const_cast<void*
>(GetDataPtr<void>()), dtype, blob_);
567 void Save(std::string file_name)
const {
569 if (!cfile.
Open(file_name,
"wb")) {
577 fwrite(header.
Data(),
sizeof(
char), header.
Size(), fp);
579 fwrite(GetDataPtr<void>(),
static_cast<size_t>(
GetDtype().ByteSize()),
584 std::shared_ptr<core::Blob> blob_ =
nullptr;
600 std::tie(shape,
type, word_size, fortran_order) =
604 size_t nread = fread(arr.
GetDataPtr<
char>(), 1,
605 static_cast<size_t>(arr.
NumBytes()), fp);
606 if (nread !=
static_cast<size_t>(arr.
NumBytes())) {
614 uint32_t num_compressed_bytes,
615 uint32_t num_uncompressed_bytes) {
616 CharVector buffer_compressed(num_compressed_bytes);
617 CharVector buffer_uncompressed(num_uncompressed_bytes);
618 size_t nread = fread(buffer_compressed.
Data(), 1, num_compressed_bytes, fp);
619 if (nread != num_compressed_bytes) {
626 d_stream.zalloc = Z_NULL;
627 d_stream.zfree = Z_NULL;
628 d_stream.opaque = Z_NULL;
629 d_stream.avail_in = 0;
630 d_stream.next_in = Z_NULL;
631 err = inflateInit2(&d_stream, -MAX_WBITS);
633 d_stream.avail_in = num_compressed_bytes;
635 reinterpret_cast<unsigned char*
>(buffer_compressed.
Data());
636 d_stream.avail_out = num_uncompressed_bytes;
638 reinterpret_cast<unsigned char*
>(buffer_uncompressed.
Data());
640 err = inflate(&d_stream, Z_FINISH);
641 err = inflateEnd(&d_stream);
650 std::tie(shape,
type, word_size, fortran_order) =
664 if (!cfile.
Open(file_name,
"rb")) {
675 std::unordered_map<std::string, core::Tensor>
ReadNpz(
676 const std::string& file_name) {
678 if (!cfile.
Open(file_name,
"rb")) {
684 std::unordered_map<std::string, core::Tensor> tensor_map;
690 size_t local_header_bytes =
691 fread(local_header.
Data(),
sizeof(
char), 30, fp);
694 if (local_header_bytes == 22) {
695 const char empty_zip_bytes[22] = {
696 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00,
697 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
698 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
699 if (std::memcmp(empty_zip_bytes, local_header.
Data(), 22) == 0) {
706 if (local_header_bytes != 30) {
711 if (local_header[2] != 0x03 || local_header[3] != 0x04) {
716 uint16_t tensor_name_len =
717 *
reinterpret_cast<uint16_t*
>(&local_header[26]);
718 CharVector tensor_name_buf(tensor_name_len,
' ');
719 if (fread(tensor_name_buf.
Data(),
sizeof(
char), tensor_name_len, fp) !=
725 std::string tensor_name(tensor_name_buf.
Begin(), tensor_name_buf.
End());
726 tensor_name.erase(tensor_name.end() - 4, tensor_name.end());
729 uint16_t extra_field_len =
730 *
reinterpret_cast<uint16_t*
>(&local_header[28]);
731 if (extra_field_len > 0) {
733 if (fread(buff.
Data(),
sizeof(
char), extra_field_len, fp) !=
739 uint16_t compressed_method =
740 *
reinterpret_cast<uint16_t*
>(&local_header[8]);
741 uint32_t num_compressed_bytes =
742 *
reinterpret_cast<uint32_t*
>(&local_header[18]);
743 uint32_t num_uncompressed_bytes =
744 *
reinterpret_cast<uint32_t*
>(&local_header[22]);
746 if (compressed_method == 0) {
749 tensor_map[tensor_name] =
751 num_uncompressed_bytes)
760 const std::unordered_map<std::string, core::Tensor>& tensor_map) {
761 if (tensor_map.empty()) {
765 std::unordered_map<std::string, core::Tensor> contiguous_tensor_map;
766 for (
auto it = tensor_map.begin(); it != tensor_map.end(); ++it) {
767 contiguous_tensor_map[it->first] =
775 bool is_first_tensor =
true;
776 for (
auto it = tensor_map.begin(); it != tensor_map.end(); ++it) {
778 if (is_first_tensor) {
780 is_first_tensor =
false;
filament::Texture::InternalFormat format
std::string ToString() const
DtypeCode GetDtypeCode() const
int64_t NumElements() const
void push_back(const T &Elt)
Tensor Contiguous() const
SizeVector GetShape() const
Tensor To(Dtype dtype, bool copy=false) const
std::shared_ptr< Blob > GetBlob() const
char & operator[](size_t index)
const char & operator[](size_t index) const
CharVector & Append(const T &rhs)
CharVector & Append(InputIt first, InputIt last)
CharVector & Append(const std::string &rhs)
CharVector & Append(size_t count, const T &value)
const char * Data() const
CharVector & Append(const CharVector &other)
std::vector< char >::iterator Begin()
void Resize(size_t count)
CharVector & Append(const char *rhs)
CharVector(size_t size, const char value)
std::vector< char >::iterator End()
NumpyArray(const core::Tensor &t)
NumpyArray(const core::SizeVector &shape, char type, int64_t word_size, bool fortran_order)
bool IsFortranOrder() const
core::SizeVector GetShape() const
core::Tensor ToTensor() const
const T * GetDataPtr() const
int64_t NumElements() const
void Save(std::string file_name) const
core::Dtype GetDtype() const
std::string GetError()
Returns the last encountered error for this file.
bool Open(const std::string &filename, const std::string &mode)
Open a file.
FILE * GetFILE()
Returns the underlying C FILE pointer.
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
std::unordered_map< std::string, core::Tensor > ReadNpz(const std::string &file_name)
static std::tuple< size_t, size_t, size_t > ParseZipFooter(FILE *fp)
core::Tensor ReadNpy(const std::string &file_name)
void WriteNpz(const std::string &file_name, const std::unordered_map< std::string, core::Tensor > &tensor_map)
static char BigEndianChar()
static std::tuple< core::SizeVector, char, int64_t, bool > ParseNpyHeaderFromBuffer(const char *buffer)
static CharVector CreateNumpyHeader(const core::SizeVector &shape, const core::Dtype &dtype)
static NumpyArray CreateNumpyArrayFromCompressedFile(FILE *fp, uint32_t num_compressed_bytes, uint32_t num_uncompressed_bytes)
static char DtypeToChar(const core::Dtype &dtype)
static NumpyArray CreateNumpyArrayFromFile(FILE *fp)
static void WriteNpzOneTensor(const std::string &file_name, const std::string &tensor_name, const core::Tensor &tensor, bool append)
static void WriteNpzEmpty(const std::string &file_name)
static size_t ParseNpyPreamble(const char *preamble)
void WriteNpy(const std::string &file_name, const core::Tensor &tensor)
static std::tuple< core::SizeVector, char, int64_t, bool > ParsePropertyDict(const std::string &header)
static std::tuple< core::SizeVector, char, int64_t, bool > ParseNpyHeaderFromFile(FILE *fp)
Generic file read and write utility for python interface.