ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
NumpyIO.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 // Contains source code from: https://github.com/rogersce/cnpy.
9 //
10 // The MIT License
11 //
12 // Copyright (c) Carl Rogers, 2011
13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 // of this software and associated documentation files (the "Software"), to deal
16 // in the Software without restriction, including without limitation the rights
17 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 // copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
20 //
21 // The above copyright notice and this permission notice shall be included in
22 // all copies or substantial portions of the Software.
23 //
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30 // THE SOFTWARE.
31 
33 
34 #include <FileSystem.h>
35 #include <Logging.h>
36 #include <zlib.h>
37 
38 #include <memory>
39 #include <numeric>
40 #include <regex>
41 #include <sstream>
42 #include <string>
43 #include <vector>
44 
45 #include "cloudViewer/core/Blob.h"
47 #include "cloudViewer/core/Dtype.h"
49 
50 namespace cloudViewer {
51 namespace t {
52 namespace io {
53 
54 class CharVector {
55 public:
57  CharVector(size_t size) : buffer_(size) {}
58  CharVector(size_t size, const char value) : buffer_(size, value) {}
59 
60  template <typename T>
61  CharVector& Append(const T& rhs) {
62  // Write in little endian.
63  const char* rhs_ptr = reinterpret_cast<const char*>(&rhs);
64  buffer_.reserve(buffer_.size() + sizeof(T));
65  for (size_t byte = 0; byte < sizeof(T); byte++) {
66  char val = *(rhs_ptr + byte);
67  buffer_.push_back(val);
68  }
69  return *this;
70  }
71 
72  CharVector& Append(const std::string& rhs) {
73  buffer_.insert(buffer_.end(), rhs.begin(), rhs.end());
74  return *this;
75  }
76 
77  CharVector& Append(const char* rhs) {
78  // Write in little endian.
79  size_t len = strlen(rhs);
80  buffer_.reserve(buffer_.size() + len);
81  for (size_t byte = 0; byte < len; byte++) {
82  buffer_.push_back(rhs[byte]);
83  }
84  return *this;
85  }
86 
87  template <typename InputIt>
88  CharVector& Append(InputIt first, InputIt last) {
89  buffer_.insert(buffer_.end(), first, last);
90  return *this;
91  }
92 
93  template <typename T>
94  CharVector& Append(size_t count, const T& value) {
95  for (size_t i = 0; i < count; ++i) {
96  Append(value);
97  }
98  return *this;
99  }
100 
101  CharVector& Append(const CharVector& other) {
102  buffer_.insert(buffer_.end(), other.buffer_.begin(),
103  other.buffer_.end());
104  return *this;
105  }
106 
107  std::vector<char>::iterator Begin() { return buffer_.begin(); }
108 
109  std::vector<char>::iterator End() { return buffer_.end(); }
110 
111  size_t Size() const { return buffer_.size(); }
112 
113  void Resize(size_t count) { buffer_.resize(count); }
114 
115  char* Data() { return buffer_.data(); }
116 
117  const char* Data() const { return buffer_.data(); }
118 
119  char& operator[](size_t index) { return buffer_[index]; }
120 
121  const char& operator[](size_t index) const { return buffer_[index]; }
122 
123 private:
124  std::vector<char> buffer_;
125 };
126 
127 static char BigEndianChar() {
128  int x = 1;
129  return ((reinterpret_cast<char*>(&x))[0]) ? '<' : '>';
130 }
131 
132 static char DtypeToChar(const core::Dtype& dtype) {
133  // Not all dtypes are supported.
134  // 'f': float, double, long double
135  // 'i': int, char, short, long, long long
136  // 'u': unsigned char, unsigned short, unsigned long, unsigned long long,
137  // unsigned int
138  // 'b': bool
139  // 'c': std::complex<float>, std::complex<double>),
140  // std::complex<long double>)
141  // '?': object
142  if (dtype == core::Float32) return 'f';
143  if (dtype == core::Float64) return 'f';
144  if (dtype == core::Int8) return 'i';
145  if (dtype == core::Int16) return 'i';
146  if (dtype == core::Int32) return 'i';
147  if (dtype == core::Int64) return 'i';
148  if (dtype == core::UInt8) return 'u';
149  if (dtype == core::UInt16) return 'u';
150  if (dtype == core::UInt32) return 'u';
151  if (dtype == core::UInt64) return 'u';
152  if (dtype == core::Bool) return 'b';
153  utility::LogError("Unsupported dtype: {}", dtype.ToString());
154  return '\0';
155 }
156 
158  const core::Dtype& dtype) {
159  // {} -> "()"
160  // {1} -> "(1,)"
161  // {1, 2} -> "(1, 2)"
162  std::stringstream shape_ss;
163  if (shape.size() == 0) {
164  shape_ss << "()";
165  } else if (shape.size() == 1) {
166  shape_ss << fmt::format("({},)", shape[0]);
167  } else {
168  shape_ss << "(";
169  shape_ss << shape[0];
170  for (size_t i = 1; i < shape.size(); i++) {
171  shape_ss << ", ";
172  shape_ss << shape[i];
173  }
174  if (shape.size() == 1) {
175  shape_ss << ",";
176  }
177  shape_ss << ")";
178  }
179 
180  // Pad with spaces so that preamble+dict is modulo 16 bytes.
181  // - Preamble is 10 bytes.
182  // - Dict needs to end with '\n'.
183  // - Header dict size includes the padding size and '\n'.
184  std::string property_dict_body = fmt::format(
185  "{{'descr': '{}{}{}', 'fortran_order': False, 'shape': {}, }}",
186  BigEndianChar(), DtypeToChar(dtype), dtype.ByteSize(),
187  shape_ss.str());
188 
189  CharVector property_dict;
190  property_dict.Append(property_dict_body);
191  // {0, 1, ..., 15}
192  size_t padding_count = 16 - (10 + property_dict.Size()) % 16 - 1;
193  property_dict.Append(padding_count, ' ');
194  property_dict.Append('\n');
195 
196  CharVector header;
197  header.Append<unsigned char>(0x93); // Magic value
198  header.Append("NUMPY"); // Magic value
199  header.Append<uint8_t>(0x01); // Major version
200  header.Append<uint8_t>(0x00); // Minor version
201  header.Append<uint16_t>(property_dict.Size());
202  header.Append(property_dict);
203 
204  return header; // Use move since CharVector is inherited.
205 }
206 
207 static std::tuple<core::SizeVector, char, int64_t, bool> ParsePropertyDict(
208  const std::string& header) {
209  core::SizeVector shape;
210  char type;
211  int64_t word_size;
212  bool fortran_order;
213 
214  size_t loc1;
215  size_t loc2;
216 
217  // Fortran order.
218  loc1 = header.find("fortran_order");
219  if (loc1 == std::string::npos) {
220  utility::LogError("Failed to find header keyword: 'fortran_order'");
221  }
222  loc1 += 16;
223  fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
224 
225  // Shape.
226  loc1 = header.find("(");
227  loc2 = header.find(")");
228  if (loc1 == std::string::npos || loc2 == std::string::npos) {
229  utility::LogError("Failed to find header keyword: '(' or ')'");
230  }
231 
232  std::regex num_regex("[0-9][0-9]*");
233  std::smatch sm;
234  shape.clear();
235 
236  std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
237  while (std::regex_search(str_shape, sm, num_regex)) {
238  shape.push_back(std::stoi(sm[0].str()));
239  str_shape = sm.suffix().str();
240  }
241 
242  // Endian, word size, data type.
243  // byte order code | stands for not applicable.
244  // not sure when this applies except for byte array.
245  loc1 = header.find("descr");
246  if (loc1 == std::string::npos) {
247  utility::LogError("Failed to find header keyword: 'descr'");
248  }
249 
250  loc1 += 9;
251  bool little_endian =
252  (header[loc1] == '<' || header[loc1] == '|' ? true : false);
253  if (!little_endian) {
254  utility::LogError("Only big endian is supported.");
255  }
256 
257  type = header[loc1 + 1];
258 
259  std::string str_ws = header.substr(loc1 + 2);
260  loc2 = str_ws.find("'");
261  word_size = atoi(str_ws.substr(0, loc2).c_str());
262 
263  return std::make_tuple(shape, type, word_size, fortran_order);
264 }
265 
266 // Returns header length, which is the length of the string of property dict.
267 // The preamble must be at least 10 bytes.
268 // Ref: https://numpy.org/devdocs/reference/generated/numpy.lib.format.html
269 //
270 // - bytes[0] to bytes[5] : \x93NUMPY # Magic string
271 // - bytes[6] : \x01 # Major version, unsigned
272 // - bytes[7] : \x00 # Minor version, unsigned
273 // - bytes[8] to bytes[9] : HEADER_LEN little-endian uint16_t
274 // - bytes[10] to bytes[10+HEADER_LEN]: Dict, padded, terminated by '\n'
275 // - (10 + HEADER_LEN) % 64 == 0 : Guaranteed
276 //
277 // - We only support Version 1.0 for now.
278 // - Version 2.0+ supports up to 4GiB HEADER_LEN and the HEADER_LEN is
279 // replaced from uint16_t to uint32_t.
280 // - Version 3.0 uses utf8-encoded header string.
281 static size_t ParseNpyPreamble(const char* preamble) {
282  if (*reinterpret_cast<const unsigned char*>(&preamble[0]) !=
283  static_cast<unsigned char>(0x93) ||
284  preamble[1] != 'N' || preamble[2] != 'U' || preamble[3] != 'M' ||
285  preamble[4] != 'P' || preamble[5] != 'Y') {
286  utility::LogError("Invalid Numpy preamble {}{}{}{}{}{}.", preamble[0],
287  preamble[1], preamble[2], preamble[3], preamble[4],
288  preamble[5]);
289  }
290  if (preamble[6] != static_cast<uint8_t>(0x01) ||
291  preamble[7] != static_cast<uint8_t>(0x00)) {
293  "Not supported Numpy format version: {}.{}. Only version 1.0 "
294  "is supported.",
295  preamble[6], preamble[7]);
296  }
297  uint16_t header_len = *reinterpret_cast<const uint16_t*>(&preamble[8]);
298  return static_cast<size_t>(header_len);
299 }
300 
301 // Returns {shape, type(char), word_size, fortran_order}.
302 // This will advance the file pointer to the end of the header.
303 static std::tuple<core::SizeVector, char, int64_t, bool> ParseNpyHeaderFromFile(
304  FILE* fp) {
305  const size_t preamble_len = 10; // Version 1.0 assumed.
306  CharVector preamble(preamble_len);
307  if (fread(preamble.Data(), sizeof(char), preamble_len, fp) !=
308  preamble_len) {
309  utility::LogError("Header preamble cannot be read.");
310  }
311  const size_t header_len = ParseNpyPreamble(preamble.Data());
312 
313  CharVector header(header_len, 0);
314  if (fread(header.Data(), sizeof(char), header_len, fp) != header_len) {
315  utility::LogError("Failed to read header dictionary.");
316  }
317  if (header[header_len - 1] != '\n') {
318  utility::LogError("Numpy header not terminated by null character.");
319  }
320 
321  return ParsePropertyDict(std::string(header.Data(), header_len));
322 }
323 
324 static std::tuple<core::SizeVector, char, int64_t, bool>
325 ParseNpyHeaderFromBuffer(const char* buffer) {
326  const size_t header_len = ParseNpyPreamble(buffer);
327  std::string header(reinterpret_cast<const char*>(buffer + 10), header_len);
328  return ParsePropertyDict(header);
329 }
330 
331 static std::tuple<size_t, size_t, size_t> ParseZipFooter(FILE* fp) {
332  size_t footer_len = 22;
333  CharVector footer(footer_len);
334  fseek(fp, -static_cast<int64_t>(footer_len), SEEK_END);
335  if (fread(footer.Data(), sizeof(char), footer_len, fp) != footer_len) {
336  utility::LogError("Footer fread failed.");
337  }
338 
339  // clang-format off
340  uint16_t disk_no = *reinterpret_cast<uint16_t*>(&footer[4 ]);
341  uint16_t disk_start = *reinterpret_cast<uint16_t*>(&footer[6 ]);
342  uint16_t nrecs_on_disk = *reinterpret_cast<uint16_t*>(&footer[8 ]);
343  uint16_t nrecs = *reinterpret_cast<uint16_t*>(&footer[10]);
344  uint32_t global_header_size = *reinterpret_cast<uint32_t*>(&footer[12]);
345  uint32_t global_header_offset = *reinterpret_cast<uint32_t*>(&footer[16]);
346  uint16_t comment_len = *reinterpret_cast<uint16_t*>(&footer[20]);
347  // clang-format on
348 
349  if (disk_no != 0 || disk_start != 0 || comment_len != 0) {
350  utility::LogError("Unsupported zip footer.");
351  }
352  if (nrecs_on_disk != nrecs) {
353  utility::LogError("Unsupported zip footer.");
354  }
355 
356  return std::make_tuple(static_cast<size_t>(nrecs), global_header_size,
357  global_header_offset);
358 }
359 
360 static void WriteNpzOneTensor(const std::string& file_name,
361  const std::string& tensor_name,
362  const core::Tensor& tensor,
363  bool append) {
364  const void* data = tensor.GetDataPtr();
365  const core::SizeVector shape = tensor.GetShape();
366  const core::Dtype dtype = tensor.GetDtype();
367  const int64_t element_byte_size = dtype.ByteSize();
368 
370  const std::string mode = append ? "r+b" : "wb";
371  if (!cfile.Open(file_name, mode)) {
372  utility::LogError("Failed to open file {}, error: {}.", file_name,
373  cfile.GetError());
374  }
375  FILE* fp = cfile.GetFILE();
376 
377  size_t nrecs = 0;
378  size_t global_header_offset = 0;
379  CharVector global_header;
380 
381  if (append) {
382  // Zip file exists. we need to add a new npy file to it. First read the
383  // footer. This gives us the offset and size of the global header then
384  // read and store the global header. Below, we will write the the new
385  // data at the start of the global header then append the global header
386  // and footer below it.
387  size_t global_header_size;
388  std::tie(nrecs, global_header_size, global_header_offset) =
389  ParseZipFooter(fp);
390  fseek(fp, global_header_offset, SEEK_SET);
391  global_header.Resize(global_header_size);
392  size_t res = fread(global_header.Data(), sizeof(char),
393  global_header_size, fp);
394  if (res != global_header_size) {
395  utility::LogError("Header read error while saving to npz.");
396  }
397  fseek(fp, global_header_offset, SEEK_SET);
398  }
399 
400  CharVector npy_header = CreateNumpyHeader(shape, dtype);
401 
402  size_t nels = std::accumulate(shape.begin(), shape.end(), 1,
403  std::multiplies<size_t>());
404  size_t nbytes = nels * element_byte_size + npy_header.Size();
405 
406  // Get the CRC of the data to be added.
407  uint32_t crc = crc32(0L, reinterpret_cast<uint8_t*>(npy_header.Data()),
408  npy_header.Size());
409  crc = crc32(crc, static_cast<const uint8_t*>(data),
410  nels * element_byte_size);
411 
412  // The ".npy" suffix will be removed when npz is read.
413  std::string var_name = tensor_name + ".npy";
414 
415  // Build the local header.
416  CharVector local_header;
417  local_header.Append("PK"); // First part of sig
418  local_header.Append<uint16_t>(0x0403); // Second part of sig
419  local_header.Append<uint16_t>(20); // Min version to extract
420  local_header.Append<uint16_t>(0); // General purpose bit flag
421  local_header.Append<uint16_t>(0); // Compression method
422  local_header.Append<uint16_t>(0); // File last mod time
423  local_header.Append<uint16_t>(0); // File last mod date
424  local_header.Append<uint32_t>(crc); // CRC
425  local_header.Append<uint32_t>(nbytes); // Compressed size
426  local_header.Append<uint32_t>(nbytes); // Uncompressed size
427  local_header.Append<uint16_t>(var_name.size()); // Varaible's name length
428  local_header.Append<uint16_t>(0); // Extra field length
429  local_header.Append(var_name);
430 
431  // Build global header.
432  global_header.Append("PK"); // First part of sig
433  global_header.Append<uint16_t>(0x0201); // Second part of sig
434  global_header.Append<uint16_t>(20); // Version made by
435  global_header.Append(local_header.Begin() + 4, local_header.Begin() + 30);
436  global_header.Append<uint16_t>(0); // File comment length
437  global_header.Append<uint16_t>(0); // Disk number where file starts
438  global_header.Append<uint16_t>(0); // Internal file attributes
439  global_header.Append<uint32_t>(0); // External file attributes
440  // Relative offset of local file header, since it begins where the global
441  // header used to begin.
442  global_header.Append<uint32_t>(global_header_offset);
443  global_header.Append(var_name);
444 
445  // Build footer.
446  CharVector footer;
447  footer.Append("PK"); // First part of sig
448  footer.Append<uint16_t>(0x0605); // Second part of sig
449  footer.Append<uint16_t>(0); // Number of this disk
450  footer.Append<uint16_t>(0); // Disk where footer starts
451  footer.Append<uint16_t>(nrecs + 1); // Number of records on this disk
452  footer.Append<uint16_t>(nrecs + 1); // Total number of records
453  footer.Append<uint32_t>(global_header.Size()); // Nbytes of global headers
454  // Offset of start of global headers, since global header now starts after
455  // newly written array.
456  footer.Append<uint32_t>(global_header_offset + nbytes +
457  local_header.Size());
458  footer.Append<uint16_t>(0); // Zip file comment length.
459 
460  // Write everything.
461  fwrite(local_header.Data(), sizeof(char), local_header.Size(), fp);
462  fwrite(npy_header.Data(), sizeof(char), npy_header.Size(), fp);
463  fwrite(data, element_byte_size, nels, fp);
464  fwrite(global_header.Data(), sizeof(char), global_header.Size(), fp);
465  fwrite(footer.Data(), sizeof(char), footer.Size(), fp);
466 }
467 
468 static void WriteNpzEmpty(const std::string& file_name) {
470  if (!cfile.Open(file_name, "wb")) {
471  utility::LogError("Failed to open file {}, error: {}.", file_name,
472  cfile.GetError());
473  }
474  FILE* fp = cfile.GetFILE();
475 
476  // Build footer.
477  CharVector footer;
478  footer.Append("PK"); // First part of sig
479  footer.Append<uint16_t>(0x0605); // Second part of sig
480  footer.Append<uint16_t>(0); // Number of this disk
481  footer.Append<uint16_t>(0); // Disk where footer starts
482  footer.Append<uint16_t>(0); // Number of records on this disk
483  footer.Append<uint16_t>(0); // Total number of records
484  footer.Append<uint32_t>(0); // Nbytes of global headers
485  footer.Append<uint32_t>(0); // External file attributes
486  footer.Append<uint16_t>(0); // Zip file comment length.
487  if (footer.Size() != 22) {
488  utility::LogError("Internal error: empty zip file must have size 22.");
489  }
490 
491  // Write everything.
492  fwrite(footer.Data(), sizeof(char), footer.Size(), fp);
493 }
494 
495 class NumpyArray {
496 public:
498  : shape_(t.GetShape()),
499  type_(DtypeToChar(t.GetDtype())),
500  word_size_(t.GetDtype().ByteSize()),
501  fortran_order_(false) {
502  blob_ = t.To(core::Device("CPU:0")).Contiguous().GetBlob();
503  }
504 
506  char type,
507  int64_t word_size,
508  bool fortran_order)
509  : shape_(shape),
510  type_(type),
511  word_size_(word_size),
512  fortran_order_(fortran_order) {
513  blob_ = std::make_shared<core::Blob>(NumBytes(), core::Device("CPU:0"));
514  }
515 
516  template <typename T>
517  T* GetDataPtr() {
518  return reinterpret_cast<T*>(blob_->GetDataPtr());
519  }
520 
521  template <typename T>
522  const T* GetDataPtr() const {
523  return reinterpret_cast<const T*>(blob_->GetDataPtr());
524  }
525 
527  if (type_ == 'f' && word_size_ == 4) return core::Float32;
528  if (type_ == 'f' && word_size_ == 8) return core::Float64;
529  if (type_ == 'i' && word_size_ == 1) return core::Int8;
530  if (type_ == 'i' && word_size_ == 2) return core::Int16;
531  if (type_ == 'i' && word_size_ == 4) return core::Int32;
532  if (type_ == 'i' && word_size_ == 8) return core::Int64;
533  if (type_ == 'u' && word_size_ == 1) return core::UInt8;
534  if (type_ == 'u' && word_size_ == 2) return core::UInt16;
535  if (type_ == 'u' && word_size_ == 4) return core::UInt32;
536  if (type_ == 'u' && word_size_ == 8) return core::UInt64;
537  if (type_ == 'b') return core::Bool;
538 
539  return core::Undefined;
540  }
541 
542  core::SizeVector GetShape() const { return shape_; }
543 
544  bool IsFortranOrder() const { return fortran_order_; }
545 
546  int64_t NumBytes() const { return NumElements() * word_size_; }
547 
548  int64_t NumElements() const { return shape_.NumElements(); }
549 
551  if (fortran_order_) {
552  utility::LogError("Cannot load Numpy array with fortran_order.");
553  }
554  core::Dtype dtype = GetDtype();
557  "Cannot load Numpy array with Numpy dtype={} and "
558  "word_size={}.",
559  type_, word_size_);
560  }
561  // t.blob_ is the same as blob_, no need for memory copy.
563  const_cast<void*>(GetDataPtr<void>()), dtype, blob_);
564  return t;
565  }
566 
567  void Save(std::string file_name) const {
569  if (!cfile.Open(file_name, "wb")) {
570  utility::LogError("Failed to open file {}, error: {}.", file_name,
571  cfile.GetError());
572  }
573  FILE* fp = cfile.GetFILE();
574 
575  CharVector header = CreateNumpyHeader(shape_, GetDtype());
576  fseek(fp, 0, SEEK_SET);
577  fwrite(header.Data(), sizeof(char), header.Size(), fp);
578  fseek(fp, 0, SEEK_END);
579  fwrite(GetDataPtr<void>(), static_cast<size_t>(GetDtype().ByteSize()),
580  static_cast<size_t>(shape_.NumElements()), fp);
581  }
582 
583 private:
584  std::shared_ptr<core::Blob> blob_ = nullptr;
585  core::SizeVector shape_;
586  char type_;
587  int64_t word_size_;
588  bool fortran_order_;
589 };
590 
592  if (!fp) {
593  utility::LogError("Unable to open file ptr.");
594  }
595 
596  core::SizeVector shape;
597  char type;
598  int64_t word_size;
599  bool fortran_order;
600  std::tie(shape, type, word_size, fortran_order) =
602 
603  NumpyArray arr(shape, type, word_size, fortran_order);
604  size_t nread = fread(arr.GetDataPtr<char>(), 1,
605  static_cast<size_t>(arr.NumBytes()), fp);
606  if (nread != static_cast<size_t>(arr.NumBytes())) {
607  utility::LogError("Failed to read array data.");
608  }
609  return arr;
610 }
611 
613  FILE* fp,
614  uint32_t num_compressed_bytes,
615  uint32_t num_uncompressed_bytes) {
616  CharVector buffer_compressed(num_compressed_bytes);
617  CharVector buffer_uncompressed(num_uncompressed_bytes);
618  size_t nread = fread(buffer_compressed.Data(), 1, num_compressed_bytes, fp);
619  if (nread != num_compressed_bytes) {
620  utility::LogError("Failed to read compressed data.");
621  }
622 
623  int err;
624  z_stream d_stream;
625 
626  d_stream.zalloc = Z_NULL;
627  d_stream.zfree = Z_NULL;
628  d_stream.opaque = Z_NULL;
629  d_stream.avail_in = 0;
630  d_stream.next_in = Z_NULL;
631  err = inflateInit2(&d_stream, -MAX_WBITS);
632 
633  d_stream.avail_in = num_compressed_bytes;
634  d_stream.next_in =
635  reinterpret_cast<unsigned char*>(buffer_compressed.Data());
636  d_stream.avail_out = num_uncompressed_bytes;
637  d_stream.next_out =
638  reinterpret_cast<unsigned char*>(buffer_uncompressed.Data());
639 
640  err = inflate(&d_stream, Z_FINISH);
641  err = inflateEnd(&d_stream);
642  if (err != Z_OK) {
643  utility::LogError("Failed to decompress data.");
644  }
645 
646  core::SizeVector shape;
647  char type;
648  size_t word_size;
649  bool fortran_order;
650  std::tie(shape, type, word_size, fortran_order) =
651  ParseNpyHeaderFromBuffer(buffer_uncompressed.Data());
652 
653  NumpyArray array(shape, type, word_size, fortran_order);
654 
655  size_t offset = num_uncompressed_bytes - array.NumBytes();
656  memcpy(array.GetDataPtr<char>(), buffer_uncompressed.Data() + offset,
657  array.NumBytes());
658 
659  return array;
660 }
661 
662 core::Tensor ReadNpy(const std::string& file_name) {
664  if (!cfile.Open(file_name, "rb")) {
665  utility::LogError("Failed to open file {}, error: {}.", file_name,
666  cfile.GetError());
667  }
668  return CreateNumpyArrayFromFile(cfile.GetFILE()).ToTensor();
669 }
670 
671 void WriteNpy(const std::string& file_name, const core::Tensor& tensor) {
672  NumpyArray(tensor).Save(file_name);
673 }
674 
675 std::unordered_map<std::string, core::Tensor> ReadNpz(
676  const std::string& file_name) {
678  if (!cfile.Open(file_name, "rb")) {
679  utility::LogError("Failed to open file {}, error: {}.", file_name,
680  cfile.GetError());
681  }
682  FILE* fp = cfile.GetFILE();
683 
684  std::unordered_map<std::string, core::Tensor> tensor_map;
685 
686  // It's possible to check tensor_name and only one selected numpy array,
687  // here we load all of them.
688  while (true) {
689  CharVector local_header(30);
690  size_t local_header_bytes =
691  fread(local_header.Data(), sizeof(char), 30, fp);
692 
693  // An empty zip file has exactly 22 bytes.
694  if (local_header_bytes == 22) {
695  const char empty_zip_bytes[22] = {
696  0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00,
697  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
698  0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
699  if (std::memcmp(empty_zip_bytes, local_header.Data(), 22) == 0) {
700  break;
701  } else {
702  utility::LogError("Invalid empty .npz file.");
703  }
704  }
705 
706  if (local_header_bytes != 30) {
707  utility::LogError("Failed to read local header in npz.");
708  }
709 
710  // If we've reached the global header, stop reading.
711  if (local_header[2] != 0x03 || local_header[3] != 0x04) {
712  break;
713  }
714 
715  // Read tensor name.
716  uint16_t tensor_name_len =
717  *reinterpret_cast<uint16_t*>(&local_header[26]);
718  CharVector tensor_name_buf(tensor_name_len, ' ');
719  if (fread(tensor_name_buf.Data(), sizeof(char), tensor_name_len, fp) !=
720  tensor_name_len) {
721  utility::LogError("Failed to read tensor name in npz.");
722  }
723 
724  // Erase the trailing ".npy".
725  std::string tensor_name(tensor_name_buf.Begin(), tensor_name_buf.End());
726  tensor_name.erase(tensor_name.end() - 4, tensor_name.end());
727 
728  // Read extra field.
729  uint16_t extra_field_len =
730  *reinterpret_cast<uint16_t*>(&local_header[28]);
731  if (extra_field_len > 0) {
732  CharVector buff(extra_field_len);
733  if (fread(buff.Data(), sizeof(char), extra_field_len, fp) !=
734  extra_field_len) {
735  utility::LogError("Failed to read extra field in npz.");
736  }
737  }
738 
739  uint16_t compressed_method =
740  *reinterpret_cast<uint16_t*>(&local_header[8]);
741  uint32_t num_compressed_bytes =
742  *reinterpret_cast<uint32_t*>(&local_header[18]);
743  uint32_t num_uncompressed_bytes =
744  *reinterpret_cast<uint32_t*>(&local_header[22]);
745 
746  if (compressed_method == 0) {
747  tensor_map[tensor_name] = CreateNumpyArrayFromFile(fp).ToTensor();
748  } else {
749  tensor_map[tensor_name] =
750  CreateNumpyArrayFromCompressedFile(fp, num_compressed_bytes,
751  num_uncompressed_bytes)
752  .ToTensor();
753  }
754  }
755 
756  return tensor_map;
757 }
758 
759 void WriteNpz(const std::string& file_name,
760  const std::unordered_map<std::string, core::Tensor>& tensor_map) {
761  if (tensor_map.empty()) {
762  WriteNpzEmpty(file_name);
763  }
764 
765  std::unordered_map<std::string, core::Tensor> contiguous_tensor_map;
766  for (auto it = tensor_map.begin(); it != tensor_map.end(); ++it) {
767  contiguous_tensor_map[it->first] =
768  it->second.To(core::Device("CPU:0")).Contiguous();
769  }
770 
771  // TODO: WriteNpzOneTensor is called multiple times in order to write
772  // multiple tensors. This requires opening/closing the npz file for multiple
773  // times, which is not optimal.
774  // TODO: Support writing in compressed mode: np.savez_compressed().
775  bool is_first_tensor = true;
776  for (auto it = tensor_map.begin(); it != tensor_map.end(); ++it) {
777  core::Tensor tensor = it->second.To(core::Device("CPU:0")).Contiguous();
778  if (is_first_tensor) {
779  WriteNpzOneTensor(file_name, it->first, tensor, /*append=*/false);
780  is_first_tensor = false;
781  } else {
782  WriteNpzOneTensor(file_name, it->first, tensor, /*append=*/true);
783  }
784  }
785 }
786 
787 } // namespace io
788 } // namespace t
789 } // namespace cloudViewer
filament::Texture::InternalFormat format
int size
int count
int offset
char type
std::string ToString() const
Definition: Dtype.h:65
int64_t ByteSize() const
Definition: Dtype.h:59
DtypeCode GetDtypeCode() const
Definition: Dtype.h:61
Tensor Contiguous() const
Definition: Tensor.cpp:772
Dtype GetDtype() const
Definition: Tensor.h:1164
SizeVector GetShape() const
Definition: Tensor.h:1127
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739
std::shared_ptr< Blob > GetBlob() const
Definition: Tensor.h:1168
char & operator[](size_t index)
Definition: NumpyIO.cpp:119
const char & operator[](size_t index) const
Definition: NumpyIO.cpp:121
CharVector & Append(const T &rhs)
Definition: NumpyIO.cpp:61
CharVector & Append(InputIt first, InputIt last)
Definition: NumpyIO.cpp:88
CharVector & Append(const std::string &rhs)
Definition: NumpyIO.cpp:72
CharVector & Append(size_t count, const T &value)
Definition: NumpyIO.cpp:94
const char * Data() const
Definition: NumpyIO.cpp:117
CharVector & Append(const CharVector &other)
Definition: NumpyIO.cpp:101
std::vector< char >::iterator Begin()
Definition: NumpyIO.cpp:107
void Resize(size_t count)
Definition: NumpyIO.cpp:113
CharVector & Append(const char *rhs)
Definition: NumpyIO.cpp:77
CharVector(size_t size, const char value)
Definition: NumpyIO.cpp:58
std::vector< char >::iterator End()
Definition: NumpyIO.cpp:109
NumpyArray(const core::Tensor &t)
Definition: NumpyIO.cpp:497
NumpyArray(const core::SizeVector &shape, char type, int64_t word_size, bool fortran_order)
Definition: NumpyIO.cpp:505
core::SizeVector GetShape() const
Definition: NumpyIO.cpp:542
core::Tensor ToTensor() const
Definition: NumpyIO.cpp:550
const T * GetDataPtr() const
Definition: NumpyIO.cpp:522
int64_t NumElements() const
Definition: NumpyIO.cpp:548
void Save(std::string file_name) const
Definition: NumpyIO.cpp:567
core::Dtype GetDtype() const
Definition: NumpyIO.cpp:526
std::string GetError()
Returns the last encountered error for this file.
Definition: FileSystem.cpp:748
bool Open(const std::string &filename, const std::string &mode)
Open a file.
Definition: FileSystem.cpp:739
FILE * GetFILE()
Returns the underlying C FILE pointer.
Definition: FileSystem.h:264
#define LogError(...)
Definition: Logging.h:60
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition: ShapeUtil.cpp:214
const Dtype Undefined
Definition: Dtype.cpp:41
const Dtype Int8
Definition: Dtype.cpp:44
const Dtype Bool
Definition: Dtype.cpp:52
const Dtype Int64
Definition: Dtype.cpp:47
const Dtype UInt64
Definition: Dtype.cpp:51
const Dtype UInt32
Definition: Dtype.cpp:50
const Dtype UInt8
Definition: Dtype.cpp:48
const Dtype Int16
Definition: Dtype.cpp:45
const Dtype Float64
Definition: Dtype.cpp:43
const Dtype UInt16
Definition: Dtype.cpp:49
const Dtype Int32
Definition: Dtype.cpp:46
const Dtype Float32
Definition: Dtype.cpp:42
std::unordered_map< std::string, core::Tensor > ReadNpz(const std::string &file_name)
Definition: NumpyIO.cpp:675
static std::tuple< size_t, size_t, size_t > ParseZipFooter(FILE *fp)
Definition: NumpyIO.cpp:331
core::Tensor ReadNpy(const std::string &file_name)
Definition: NumpyIO.cpp:662
void WriteNpz(const std::string &file_name, const std::unordered_map< std::string, core::Tensor > &tensor_map)
Definition: NumpyIO.cpp:759
static char BigEndianChar()
Definition: NumpyIO.cpp:127
static std::tuple< core::SizeVector, char, int64_t, bool > ParseNpyHeaderFromBuffer(const char *buffer)
Definition: NumpyIO.cpp:325
static CharVector CreateNumpyHeader(const core::SizeVector &shape, const core::Dtype &dtype)
Definition: NumpyIO.cpp:157
static NumpyArray CreateNumpyArrayFromCompressedFile(FILE *fp, uint32_t num_compressed_bytes, uint32_t num_uncompressed_bytes)
Definition: NumpyIO.cpp:612
static char DtypeToChar(const core::Dtype &dtype)
Definition: NumpyIO.cpp:132
static NumpyArray CreateNumpyArrayFromFile(FILE *fp)
Definition: NumpyIO.cpp:591
static void WriteNpzOneTensor(const std::string &file_name, const std::string &tensor_name, const core::Tensor &tensor, bool append)
Definition: NumpyIO.cpp:360
static void WriteNpzEmpty(const std::string &file_name)
Definition: NumpyIO.cpp:468
static size_t ParseNpyPreamble(const char *preamble)
Definition: NumpyIO.cpp:281
void WriteNpy(const std::string &file_name, const core::Tensor &tensor)
Definition: NumpyIO.cpp:671
static std::tuple< core::SizeVector, char, int64_t, bool > ParsePropertyDict(const std::string &header)
Definition: NumpyIO.cpp:207
static std::tuple< core::SizeVector, char, int64_t, bool > ParseNpyHeaderFromFile(FILE *fp)
Definition: NumpyIO.cpp:303
Generic file read and write utility for python interface.
#define SEEK_SET
Definition: qioapi.cpp:38
#define SEEK_END
Definition: qioapi.cpp:34