cpp_api/api/NumpyIO_8cpp_source.html

 // ----------------------------------------------------------------------------

 // -                        CloudViewer: www.cloudViewer.org                  -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.cloudViewer.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 // Contains source code from: https://github.com/rogersce/cnpy.

 //

 // The MIT License

 //

 // Copyright (c) Carl Rogers, 2011

 //

 // Permission is hereby granted, free of charge, to any person obtaining a copy

 // of this software and associated documentation files (the "Software"), to deal

 // in the Software without restriction, including without limitation the rights

 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

 // copies of the Software, and to permit persons to whom the Software is

 // furnished to do so, subject to the following conditions:

 //

 // The above copyright notice and this permission notice shall be included in

 // all copies or substantial portions of the Software.

 //

 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

 // THE SOFTWARE.


 #include "cloudViewer/t/io/NumpyIO.h"


 #include <FileSystem.h>

 #include <Logging.h>

 #include <zlib.h>


 #include <memory>

 #include <numeric>

 #include <regex>

 #include <sstream>

 #include <string>

 #include <vector>


 #include "cloudViewer/core/Blob.h"

 #include "cloudViewer/core/Dispatch.h"

 #include "cloudViewer/core/Dtype.h"

 #include "cloudViewer/core/SizeVector.h"


 namespace cloudViewer {

 namespace t {

 namespace io {


 class CharVector {

 public:

     CharVector() {}

     CharVector(size_t size) : buffer_(size) {}

     CharVector(size_t size, const char value) : buffer_(size, value) {}


     template <typename T>

     CharVector& Append(const T& rhs) {

         // Write in little endian.

         const char* rhs_ptr = reinterpret_cast<const char*>(&rhs);

         buffer_.reserve(buffer_.size() + sizeof(T));

         for (size_t byte = 0; byte < sizeof(T); byte++) {

             char val = *(rhs_ptr + byte);

             buffer_.push_back(val);

         }

         return *this;

     }


     CharVector& Append(const std::string& rhs) {

         buffer_.insert(buffer_.end(), rhs.begin(), rhs.end());

         return *this;

     }


     CharVector& Append(const char* rhs) {

         // Write in little endian.

         size_t len = strlen(rhs);

         buffer_.reserve(buffer_.size() + len);

         for (size_t byte = 0; byte < len; byte++) {

             buffer_.push_back(rhs[byte]);

         }

         return *this;

     }


     template <typename InputIt>

     CharVector& Append(InputIt first, InputIt last) {

         buffer_.insert(buffer_.end(), first, last);

         return *this;

     }


     template <typename T>

     CharVector& Append(size_t count, const T& value) {

         for (size_t i = 0; i < count; ++i) {

             Append(value);

         }

         return *this;

     }


     CharVector& Append(const CharVector& other) {

         buffer_.insert(buffer_.end(), other.buffer_.begin(),

                        other.buffer_.end());

         return *this;

     }


     std::vector<char>::iterator Begin() { return buffer_.begin(); }


     std::vector<char>::iterator End() { return buffer_.end(); }


     size_t Size() const { return buffer_.size(); }


     void Resize(size_t count) { buffer_.resize(count); }


     char* Data() { return buffer_.data(); }


     const char* Data() const { return buffer_.data(); }


     char& operator[](size_t index) { return buffer_[index]; }


     const char& operator[](size_t index) const { return buffer_[index]; }


 private:

     std::vector<char> buffer_;

 };


 static char BigEndianChar() {

     int x = 1;

     return ((reinterpret_cast<char*>(&x))[0]) ? '<' : '>';

 }


 static char DtypeToChar(const core::Dtype& dtype) {

     // Not all dtypes are supported.

     // 'f': float, double, long double

     // 'i': int, char, short, long, long long

     // 'u': unsigned char, unsigned short, unsigned long, unsigned long long,

     //      unsigned int

     // 'b': bool

     // 'c': std::complex<float>, std::complex<double>),

     //      std::complex<long double>)

     // '?': object

     if (dtype == core::Float32) return 'f';

     if (dtype == core::Float64) return 'f';

     if (dtype == core::Int8) return 'i';

     if (dtype == core::Int16) return 'i';

     if (dtype == core::Int32) return 'i';

     if (dtype == core::Int64) return 'i';

     if (dtype == core::UInt8) return 'u';

     if (dtype == core::UInt16) return 'u';

     if (dtype == core::UInt32) return 'u';

     if (dtype == core::UInt64) return 'u';

     if (dtype == core::Bool) return 'b';

     utility::LogError("Unsupported dtype: {}", dtype.ToString());

     return '\0';

 }


 static CharVector CreateNumpyHeader(const core::SizeVector& shape,

                                     const core::Dtype& dtype) {

     // {}     -> "()"

     // {1}    -> "(1,)"

     // {1, 2} -> "(1, 2)"

     std::stringstream shape_ss;

     if (shape.size() == 0) {

         shape_ss << "()";

     } else if (shape.size() == 1) {

         shape_ss << fmt::format("({},)", shape[0]);

     } else {

         shape_ss << "(";

         shape_ss << shape[0];

         for (size_t i = 1; i < shape.size(); i++) {

             shape_ss << ", ";

             shape_ss << shape[i];

         }

         if (shape.size() == 1) {

             shape_ss << ",";

         }

         shape_ss << ")";

     }


     // Pad with spaces so that preamble+dict is modulo 16 bytes.

     // - Preamble is 10 bytes.

     // - Dict needs to end with '\n'.

     // - Header dict size includes the padding size and '\n'.

     std::string property_dict_body = fmt::format(

             "{{'descr': '{}{}{}', 'fortran_order': False, 'shape': {}, }}",

             BigEndianChar(), DtypeToChar(dtype), dtype.ByteSize(),

             shape_ss.str());


     CharVector property_dict;

     property_dict.Append(property_dict_body);

     // {0, 1, ..., 15}

     size_t padding_count = 16 - (10 + property_dict.Size()) % 16 - 1;

     property_dict.Append(padding_count, ' ');

     property_dict.Append('\n');


     CharVector header;

     header.Append<unsigned char>(0x93);  // Magic value

     header.Append("NUMPY");              // Magic value

     header.Append<uint8_t>(0x01);        // Major version

     header.Append<uint8_t>(0x00);        // Minor version

     header.Append<uint16_t>(property_dict.Size());

     header.Append(property_dict);


     return header;  // Use move since CharVector is inherited.

 }


 static std::tuple<core::SizeVector, char, int64_t, bool> ParsePropertyDict(

         const std::string& header) {

     core::SizeVector shape;

     char type;

     int64_t word_size;

     bool fortran_order;


     size_t loc1;

     size_t loc2;


     // Fortran order.

     loc1 = header.find("fortran_order");

     if (loc1 == std::string::npos) {

         utility::LogError("Failed to find header keyword: 'fortran_order'");

     }

     loc1 += 16;

     fortran_order = (header.substr(loc1, 4) == "True" ? true : false);


     // Shape.

     loc1 = header.find("(");

     loc2 = header.find(")");

     if (loc1 == std::string::npos || loc2 == std::string::npos) {

         utility::LogError("Failed to find header keyword: '(' or ')'");

     }


     std::regex num_regex("[0-9][0-9]*");

     std::smatch sm;

     shape.clear();


     std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);

     while (std::regex_search(str_shape, sm, num_regex)) {

         shape.push_back(std::stoi(sm[0].str()));

         str_shape = sm.suffix().str();

     }


     // Endian, word size, data type.

     // byte order code | stands for not applicable.

     // not sure when this applies except for byte array.

     loc1 = header.find("descr");

     if (loc1 == std::string::npos) {

         utility::LogError("Failed to find header keyword: 'descr'");

     }


     loc1 += 9;

     bool little_endian =

             (header[loc1] == '<' || header[loc1] == '|' ? true : false);

     if (!little_endian) {

         utility::LogError("Only big endian is supported.");

     }


     type = header[loc1 + 1];


     std::string str_ws = header.substr(loc1 + 2);

     loc2 = str_ws.find("'");

     word_size = atoi(str_ws.substr(0, loc2).c_str());


     return std::make_tuple(shape, type, word_size, fortran_order);

 }


 // Returns header length, which is the length of the string of property dict.

 // The preamble must be at least 10 bytes.

 // Ref: https://numpy.org/devdocs/reference/generated/numpy.lib.format.html

 //

 // - bytes[0] to bytes[5]             : \x93NUMPY # Magic string

 // - bytes[6]                         : \x01      # Major version, unsigned

 // - bytes[7]                         : \x00      # Minor version, unsigned

 // - bytes[8] to bytes[9]             : HEADER_LEN little-endian uint16_t

 // - bytes[10] to bytes[10+HEADER_LEN]: Dict, padded, terminated by '\n'

 // - (10 + HEADER_LEN) % 64 == 0      : Guaranteed

 //

 // - We only support Version 1.0 for now.

 // - Version 2.0+ supports up to 4GiB HEADER_LEN and the HEADER_LEN is

 //   replaced from uint16_t to uint32_t.

 // - Version 3.0 uses utf8-encoded header string.

 static size_t ParseNpyPreamble(const char* preamble) {

     if (*reinterpret_cast<const unsigned char*>(&preamble[0]) !=

                 static_cast<unsigned char>(0x93) ||

         preamble[1] != 'N' || preamble[2] != 'U' || preamble[3] != 'M' ||

         preamble[4] != 'P' || preamble[5] != 'Y') {

         utility::LogError("Invalid Numpy preamble {}{}{}{}{}{}.", preamble[0],

                           preamble[1], preamble[2], preamble[3], preamble[4],

                           preamble[5]);

     }

     if (preamble[6] != static_cast<uint8_t>(0x01) ||

         preamble[7] != static_cast<uint8_t>(0x00)) {

         utility::LogError(

                 "Not supported Numpy format version: {}.{}. Only version 1.0 "

                 "is supported.",

                 preamble[6], preamble[7]);

     }

     uint16_t header_len = *reinterpret_cast<const uint16_t*>(&preamble[8]);

     return static_cast<size_t>(header_len);

 }


 // Returns {shape, type(char), word_size, fortran_order}.

 // This will advance the file pointer to the end of the header.

 static std::tuple<core::SizeVector, char, int64_t, bool> ParseNpyHeaderFromFile(

         FILE* fp) {

     const size_t preamble_len = 10;  // Version 1.0 assumed.

     CharVector preamble(preamble_len);

     if (fread(preamble.Data(), sizeof(char), preamble_len, fp) !=

         preamble_len) {

         utility::LogError("Header preamble cannot be read.");

     }

     const size_t header_len = ParseNpyPreamble(preamble.Data());


     CharVector header(header_len, 0);

     if (fread(header.Data(), sizeof(char), header_len, fp) != header_len) {

         utility::LogError("Failed to read header dictionary.");

     }

     if (header[header_len - 1] != '\n') {

         utility::LogError("Numpy header not terminated by null character.");

     }


     return ParsePropertyDict(std::string(header.Data(), header_len));

 }


 static std::tuple<core::SizeVector, char, int64_t, bool>

 ParseNpyHeaderFromBuffer(const char* buffer) {

     const size_t header_len = ParseNpyPreamble(buffer);

     std::string header(reinterpret_cast<const char*>(buffer + 10), header_len);

     return ParsePropertyDict(header);

 }


 static std::tuple<size_t, size_t, size_t> ParseZipFooter(FILE* fp) {

     size_t footer_len = 22;

     CharVector footer(footer_len);

     fseek(fp, -static_cast<int64_t>(footer_len), SEEK_END);

     if (fread(footer.Data(), sizeof(char), footer_len, fp) != footer_len) {

         utility::LogError("Footer fread failed.");

     }


     // clang-format off

     uint16_t disk_no              = *reinterpret_cast<uint16_t*>(&footer[4 ]);

     uint16_t disk_start           = *reinterpret_cast<uint16_t*>(&footer[6 ]);

     uint16_t nrecs_on_disk        = *reinterpret_cast<uint16_t*>(&footer[8 ]);

     uint16_t nrecs                = *reinterpret_cast<uint16_t*>(&footer[10]);

     uint32_t global_header_size   = *reinterpret_cast<uint32_t*>(&footer[12]);

     uint32_t global_header_offset = *reinterpret_cast<uint32_t*>(&footer[16]);

     uint16_t comment_len          = *reinterpret_cast<uint16_t*>(&footer[20]);

     // clang-format on


     if (disk_no != 0 || disk_start != 0 || comment_len != 0) {

         utility::LogError("Unsupported zip footer.");

     }

     if (nrecs_on_disk != nrecs) {

         utility::LogError("Unsupported zip footer.");

     }


     return std::make_tuple(static_cast<size_t>(nrecs), global_header_size,

                            global_header_offset);

 }


 static void WriteNpzOneTensor(const std::string& file_name,

                               const std::string& tensor_name,

                               const core::Tensor& tensor,

                               bool append) {

     const void* data = tensor.GetDataPtr();

     const core::SizeVector shape = tensor.GetShape();

     const core::Dtype dtype = tensor.GetDtype();

     const int64_t element_byte_size = dtype.ByteSize();


     utility::filesystem::CFile cfile;

     const std::string mode = append ? "r+b" : "wb";

     if (!cfile.Open(file_name, mode)) {

         utility::LogError("Failed to open file {}, error: {}.", file_name,

                           cfile.GetError());

     }

     FILE* fp = cfile.GetFILE();


     size_t nrecs = 0;

     size_t global_header_offset = 0;

     CharVector global_header;


     if (append) {

         // Zip file exists. we need to add a new npy file to it. First read the

         // footer. This gives us the offset and size of the global header then

         // read and store the global header. Below, we will write the the new

         // data at the start of the global header then append the global header

         // and footer below it.

         size_t global_header_size;

         std::tie(nrecs, global_header_size, global_header_offset) =

                 ParseZipFooter(fp);

         fseek(fp, global_header_offset, SEEK_SET);

         global_header.Resize(global_header_size);

         size_t res = fread(global_header.Data(), sizeof(char),

                            global_header_size, fp);

         if (res != global_header_size) {

             utility::LogError("Header read error while saving to npz.");

         }

         fseek(fp, global_header_offset, SEEK_SET);

     }


     CharVector npy_header = CreateNumpyHeader(shape, dtype);


     size_t nels = std::accumulate(shape.begin(), shape.end(), 1,

                                   std::multiplies<size_t>());

     size_t nbytes = nels * element_byte_size + npy_header.Size();


     // Get the CRC of the data to be added.

     uint32_t crc = crc32(0L, reinterpret_cast<uint8_t*>(npy_header.Data()),

                          npy_header.Size());

     crc = crc32(crc, static_cast<const uint8_t*>(data),

                 nels * element_byte_size);


     // The ".npy" suffix will be removed when npz is read.

     std::string var_name = tensor_name + ".npy";


     // Build the local header.

     CharVector local_header;

     local_header.Append("PK");                       // First part of sig

     local_header.Append<uint16_t>(0x0403);           // Second part of sig

     local_header.Append<uint16_t>(20);               // Min version to extract

     local_header.Append<uint16_t>(0);                // General purpose bit flag

     local_header.Append<uint16_t>(0);                // Compression method

     local_header.Append<uint16_t>(0);                // File last mod time

     local_header.Append<uint16_t>(0);                // File last mod date

     local_header.Append<uint32_t>(crc);              // CRC

     local_header.Append<uint32_t>(nbytes);           // Compressed size

     local_header.Append<uint32_t>(nbytes);           // Uncompressed size

     local_header.Append<uint16_t>(var_name.size());  // Varaible's name length

     local_header.Append<uint16_t>(0);                // Extra field length

     local_header.Append(var_name);


     // Build global header.

     global_header.Append("PK");              // First part of sig

     global_header.Append<uint16_t>(0x0201);  // Second part of sig

     global_header.Append<uint16_t>(20);      // Version made by

     global_header.Append(local_header.Begin() + 4, local_header.Begin() + 30);

     global_header.Append<uint16_t>(0);  // File comment length

     global_header.Append<uint16_t>(0);  // Disk number where file starts

     global_header.Append<uint16_t>(0);  // Internal file attributes

     global_header.Append<uint32_t>(0);  // External file attributes

     // Relative offset of local file header, since it begins where the global

     // header used to begin.

     global_header.Append<uint32_t>(global_header_offset);

     global_header.Append(var_name);


     // Build footer.

     CharVector footer;

     footer.Append("PK");                 // First part of sig

     footer.Append<uint16_t>(0x0605);     // Second part of sig

     footer.Append<uint16_t>(0);          // Number of this disk

     footer.Append<uint16_t>(0);          // Disk where footer starts

     footer.Append<uint16_t>(nrecs + 1);  // Number of records on this disk

     footer.Append<uint16_t>(nrecs + 1);  // Total number of records

     footer.Append<uint32_t>(global_header.Size());  // Nbytes of global headers

     // Offset of start of global headers, since global header now starts after

     // newly written array.

     footer.Append<uint32_t>(global_header_offset + nbytes +

                             local_header.Size());

     footer.Append<uint16_t>(0);  // Zip file comment length.


     // Write everything.

     fwrite(local_header.Data(), sizeof(char), local_header.Size(), fp);

     fwrite(npy_header.Data(), sizeof(char), npy_header.Size(), fp);

     fwrite(data, element_byte_size, nels, fp);

     fwrite(global_header.Data(), sizeof(char), global_header.Size(), fp);

     fwrite(footer.Data(), sizeof(char), footer.Size(), fp);

 }


 static void WriteNpzEmpty(const std::string& file_name) {

     utility::filesystem::CFile cfile;

     if (!cfile.Open(file_name, "wb")) {

         utility::LogError("Failed to open file {}, error: {}.", file_name,

                           cfile.GetError());

     }

     FILE* fp = cfile.GetFILE();


     // Build footer.

     CharVector footer;

     footer.Append("PK");              // First part of sig

     footer.Append<uint16_t>(0x0605);  // Second part of sig

     footer.Append<uint16_t>(0);       // Number of this disk

     footer.Append<uint16_t>(0);       // Disk where footer starts

     footer.Append<uint16_t>(0);       // Number of records on this disk

     footer.Append<uint16_t>(0);       // Total number of records

     footer.Append<uint32_t>(0);       // Nbytes of global headers

     footer.Append<uint32_t>(0);       // External file attributes

     footer.Append<uint16_t>(0);       // Zip file comment length.

     if (footer.Size() != 22) {

         utility::LogError("Internal error: empty zip file must have size 22.");

     }


     // Write everything.

     fwrite(footer.Data(), sizeof(char), footer.Size(), fp);

 }


 class NumpyArray {

 public:

     NumpyArray(const core::Tensor& t)

         : shape_(t.GetShape()),

           type_(DtypeToChar(t.GetDtype())),

           word_size_(t.GetDtype().ByteSize()),

           fortran_order_(false) {

         blob_ = t.To(core::Device("CPU:0")).Contiguous().GetBlob();

     }


     NumpyArray(const core::SizeVector& shape,

                char type,

                int64_t word_size,

                bool fortran_order)

         : shape_(shape),

           type_(type),

           word_size_(word_size),

           fortran_order_(fortran_order) {

         blob_ = std::make_shared<core::Blob>(NumBytes(), core::Device("CPU:0"));

     }


     template <typename T>

     T* GetDataPtr() {

         return reinterpret_cast<T*>(blob_->GetDataPtr());

     }


     template <typename T>

     const T* GetDataPtr() const {

         return reinterpret_cast<const T*>(blob_->GetDataPtr());

     }


     core::Dtype GetDtype() const {

         if (type_ == 'f' && word_size_ == 4) return core::Float32;

         if (type_ == 'f' && word_size_ == 8) return core::Float64;

         if (type_ == 'i' && word_size_ == 1) return core::Int8;

         if (type_ == 'i' && word_size_ == 2) return core::Int16;

         if (type_ == 'i' && word_size_ == 4) return core::Int32;

         if (type_ == 'i' && word_size_ == 8) return core::Int64;

         if (type_ == 'u' && word_size_ == 1) return core::UInt8;

         if (type_ == 'u' && word_size_ == 2) return core::UInt16;

         if (type_ == 'u' && word_size_ == 4) return core::UInt32;

         if (type_ == 'u' && word_size_ == 8) return core::UInt64;

         if (type_ == 'b') return core::Bool;


         return core::Undefined;

     }


     core::SizeVector GetShape() const { return shape_; }


     bool IsFortranOrder() const { return fortran_order_; }


     int64_t NumBytes() const { return NumElements() * word_size_; }


     int64_t NumElements() const { return shape_.NumElements(); }


     core::Tensor ToTensor() const {

         if (fortran_order_) {

             utility::LogError("Cannot load Numpy array with fortran_order.");

         }

         core::Dtype dtype = GetDtype();

         if (dtype.GetDtypeCode() == core::Dtype::DtypeCode::Undefined) {

             utility::LogError(

                     "Cannot load Numpy array with Numpy dtype={} and "

                     "word_size={}.",

                     type_, word_size_);

         }

         // t.blob_ is the same as blob_, no need for memory copy.

         core::Tensor t(shape_, core::shape_util::DefaultStrides(shape_),

                        const_cast<void*>(GetDataPtr<void>()), dtype, blob_);

         return t;

     }


     void Save(std::string file_name) const {

         utility::filesystem::CFile cfile;

         if (!cfile.Open(file_name, "wb")) {

             utility::LogError("Failed to open file {}, error: {}.", file_name,

                               cfile.GetError());

         }

         FILE* fp = cfile.GetFILE();


         CharVector header = CreateNumpyHeader(shape_, GetDtype());

         fseek(fp, 0, SEEK_SET);

         fwrite(header.Data(), sizeof(char), header.Size(), fp);

         fseek(fp, 0, SEEK_END);

         fwrite(GetDataPtr<void>(), static_cast<size_t>(GetDtype().ByteSize()),

                static_cast<size_t>(shape_.NumElements()), fp);

     }


 private:

     std::shared_ptr<core::Blob> blob_ = nullptr;

     core::SizeVector shape_;

     char type_;

     int64_t word_size_;

     bool fortran_order_;

 };


 static NumpyArray CreateNumpyArrayFromFile(FILE* fp) {

     if (!fp) {

         utility::LogError("Unable to open file ptr.");

     }


     core::SizeVector shape;

     char type;

     int64_t word_size;

     bool fortran_order;

     std::tie(shape, type, word_size, fortran_order) =

             ParseNpyHeaderFromFile(fp);


     NumpyArray arr(shape, type, word_size, fortran_order);

     size_t nread = fread(arr.GetDataPtr<char>(), 1,

                          static_cast<size_t>(arr.NumBytes()), fp);

     if (nread != static_cast<size_t>(arr.NumBytes())) {

         utility::LogError("Failed to read array data.");

     }

     return arr;

 }


 static NumpyArray CreateNumpyArrayFromCompressedFile(

         FILE* fp,

         uint32_t num_compressed_bytes,

         uint32_t num_uncompressed_bytes) {

     CharVector buffer_compressed(num_compressed_bytes);

     CharVector buffer_uncompressed(num_uncompressed_bytes);

     size_t nread = fread(buffer_compressed.Data(), 1, num_compressed_bytes, fp);

     if (nread != num_compressed_bytes) {

         utility::LogError("Failed to read compressed data.");

     }


     int err;

     z_stream d_stream;


     d_stream.zalloc = Z_NULL;

     d_stream.zfree = Z_NULL;

     d_stream.opaque = Z_NULL;

     d_stream.avail_in = 0;

     d_stream.next_in = Z_NULL;

     err = inflateInit2(&d_stream, -MAX_WBITS);


     d_stream.avail_in = num_compressed_bytes;

     d_stream.next_in =

             reinterpret_cast<unsigned char*>(buffer_compressed.Data());

     d_stream.avail_out = num_uncompressed_bytes;

     d_stream.next_out =

             reinterpret_cast<unsigned char*>(buffer_uncompressed.Data());


     err = inflate(&d_stream, Z_FINISH);

     err = inflateEnd(&d_stream);

     if (err != Z_OK) {

         utility::LogError("Failed to decompress data.");

     }


     core::SizeVector shape;

     char type;

     size_t word_size;

     bool fortran_order;

     std::tie(shape, type, word_size, fortran_order) =

             ParseNpyHeaderFromBuffer(buffer_uncompressed.Data());


     NumpyArray array(shape, type, word_size, fortran_order);


     size_t offset = num_uncompressed_bytes - array.NumBytes();

     memcpy(array.GetDataPtr<char>(), buffer_uncompressed.Data() + offset,

            array.NumBytes());


     return array;

 }


 core::Tensor ReadNpy(const std::string& file_name) {

     utility::filesystem::CFile cfile;

     if (!cfile.Open(file_name, "rb")) {

         utility::LogError("Failed to open file {}, error: {}.", file_name,

                           cfile.GetError());

     }

     return CreateNumpyArrayFromFile(cfile.GetFILE()).ToTensor();

 }


 void WriteNpy(const std::string& file_name, const core::Tensor& tensor) {

     NumpyArray(tensor).Save(file_name);

 }


 std::unordered_map<std::string, core::Tensor> ReadNpz(

         const std::string& file_name) {

     utility::filesystem::CFile cfile;

     if (!cfile.Open(file_name, "rb")) {

         utility::LogError("Failed to open file {}, error: {}.", file_name,

                           cfile.GetError());

     }

     FILE* fp = cfile.GetFILE();


     std::unordered_map<std::string, core::Tensor> tensor_map;


     // It's possible to check tensor_name and only one selected numpy array,

     // here we load all of them.

     while (true) {

         CharVector local_header(30);

         size_t local_header_bytes =

                 fread(local_header.Data(), sizeof(char), 30, fp);


         // An empty zip file has exactly 22 bytes.

         if (local_header_bytes == 22) {

             const char empty_zip_bytes[22] = {

                     0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00,

                     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

                     0x00, 0x00, 0x00, 0x00, 0x00, 0x00};

             if (std::memcmp(empty_zip_bytes, local_header.Data(), 22) == 0) {

                 break;

             } else {

                 utility::LogError("Invalid empty .npz file.");

             }

         }


         if (local_header_bytes != 30) {

             utility::LogError("Failed to read local header in npz.");

         }


         // If we've reached the global header, stop reading.

         if (local_header[2] != 0x03 || local_header[3] != 0x04) {

             break;

         }


         // Read tensor name.

         uint16_t tensor_name_len =

                 *reinterpret_cast<uint16_t*>(&local_header[26]);

         CharVector tensor_name_buf(tensor_name_len, ' ');

         if (fread(tensor_name_buf.Data(), sizeof(char), tensor_name_len, fp) !=

             tensor_name_len) {

             utility::LogError("Failed to read tensor name in npz.");

         }


         // Erase the trailing ".npy".

         std::string tensor_name(tensor_name_buf.Begin(), tensor_name_buf.End());

         tensor_name.erase(tensor_name.end() - 4, tensor_name.end());


         // Read extra field.

         uint16_t extra_field_len =

                 *reinterpret_cast<uint16_t*>(&local_header[28]);

         if (extra_field_len > 0) {

             CharVector buff(extra_field_len);

             if (fread(buff.Data(), sizeof(char), extra_field_len, fp) !=

                 extra_field_len) {

                 utility::LogError("Failed to read extra field in npz.");

             }

         }


         uint16_t compressed_method =

                 *reinterpret_cast<uint16_t*>(&local_header[8]);

         uint32_t num_compressed_bytes =

                 *reinterpret_cast<uint32_t*>(&local_header[18]);

         uint32_t num_uncompressed_bytes =

                 *reinterpret_cast<uint32_t*>(&local_header[22]);


         if (compressed_method == 0) {

             tensor_map[tensor_name] = CreateNumpyArrayFromFile(fp).ToTensor();

         } else {

             tensor_map[tensor_name] =

                     CreateNumpyArrayFromCompressedFile(fp, num_compressed_bytes,

                                                        num_uncompressed_bytes)

                             .ToTensor();

         }

     }


     return tensor_map;

 }


 void WriteNpz(const std::string& file_name,

               const std::unordered_map<std::string, core::Tensor>& tensor_map) {

     if (tensor_map.empty()) {

         WriteNpzEmpty(file_name);

     }


     std::unordered_map<std::string, core::Tensor> contiguous_tensor_map;

     for (auto it = tensor_map.begin(); it != tensor_map.end(); ++it) {

         contiguous_tensor_map[it->first] =

                 it->second.To(core::Device("CPU:0")).Contiguous();

     }


     // TODO: WriteNpzOneTensor is called multiple times in order to write

     // multiple tensors. This requires opening/closing the npz file for multiple

     // times, which is not optimal.

     // TODO: Support writing in compressed mode: np.savez_compressed().

     bool is_first_tensor = true;

     for (auto it = tensor_map.begin(); it != tensor_map.end(); ++it) {

         core::Tensor tensor = it->second.To(core::Device("CPU:0")).Contiguous();

         if (is_first_tensor) {

             WriteNpzOneTensor(file_name, it->first, tensor, /*append=*/false);

             is_first_tensor = false;

         } else {

             WriteNpzOneTensor(file_name, it->first, tensor, /*append=*/true);

         }

     }

 }


 }  // namespace io

 }  // namespace t

 }  // namespace cloudViewer

Blob.h

Dispatch.h

Dtype.h

format
filament::Texture::InternalFormat format
Definition: FilamentResourceManager.cpp:181

size
int size
Definition: FileIOFactory.cpp:130

count
int count
Definition: FileIOFactory.cpp:132

offset
int offset
Definition: FileIOFactory.cpp:135

type
char type
Definition: FileIOFactory.cpp:131

FileSystem.h

NumpyIO.h

SizeVector.h

cloudViewer::core::Device
Definition: Device.h:18

cloudViewer::core::Dtype
Definition: Dtype.h:21

cloudViewer::core::Dtype::ToString
std::string ToString() const
Definition: Dtype.h:65

cloudViewer::core::Dtype::DtypeCode::Undefined
@ Undefined

cloudViewer::core::Dtype::ByteSize
int64_t ByteSize() const
Definition: Dtype.h:59

cloudViewer::core::Dtype::GetDtypeCode
DtypeCode GetDtypeCode() const
Definition: Dtype.h:61

cloudViewer::core::SizeVector
Definition: SizeVector.h:70

cloudViewer::core::SizeVector::NumElements
int64_t NumElements() const
Definition: SizeVector.cpp:108

cloudViewer::core::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:119

cloudViewer::core::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:650

cloudViewer::core::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:459

cloudViewer::core::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:301

cloudViewer::core::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:303

cloudViewer::core::Tensor
Definition: Tensor.h:32

cloudViewer::core::Tensor::Contiguous
Tensor Contiguous() const
Definition: Tensor.cpp:772

cloudViewer::core::Tensor::GetDtype
Dtype GetDtype() const
Definition: Tensor.h:1164

cloudViewer::core::Tensor::GetDataPtr
T * GetDataPtr()
Definition: Tensor.h:1144

cloudViewer::core::Tensor::GetShape
SizeVector GetShape() const
Definition: Tensor.h:1127

cloudViewer::core::Tensor::To
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:739

cloudViewer::core::Tensor::GetBlob
std::shared_ptr< Blob > GetBlob() const
Definition: Tensor.h:1168

cloudViewer::t::io::CharVector
Definition: NumpyIO.cpp:54

cloudViewer::t::io::CharVector::operator[]
char & operator[](size_t index)
Definition: NumpyIO.cpp:119

cloudViewer::t::io::CharVector::operator[]
const char & operator[](size_t index) const
Definition: NumpyIO.cpp:121

cloudViewer::t::io::CharVector::Append
CharVector & Append(const T &rhs)
Definition: NumpyIO.cpp:61

cloudViewer::t::io::CharVector::Append
CharVector & Append(InputIt first, InputIt last)
Definition: NumpyIO.cpp:88

cloudViewer::t::io::CharVector::Append
CharVector & Append(const std::string &rhs)
Definition: NumpyIO.cpp:72

cloudViewer::t::io::CharVector::Append
CharVector & Append(size_t count, const T &value)
Definition: NumpyIO.cpp:94

cloudViewer::t::io::CharVector::Data
const char * Data() const
Definition: NumpyIO.cpp:117

cloudViewer::t::io::CharVector::CharVector
CharVector()
Definition: NumpyIO.cpp:56

cloudViewer::t::io::CharVector::Append
CharVector & Append(const CharVector &other)
Definition: NumpyIO.cpp:101

cloudViewer::t::io::CharVector::Begin
std::vector< char >::iterator Begin()
Definition: NumpyIO.cpp:107

cloudViewer::t::io::CharVector::Resize
void Resize(size_t count)
Definition: NumpyIO.cpp:113

cloudViewer::t::io::CharVector::CharVector
CharVector(size_t size)
Definition: NumpyIO.cpp:57

cloudViewer::t::io::CharVector::Size
size_t Size() const
Definition: NumpyIO.cpp:111

cloudViewer::t::io::CharVector::Append
CharVector & Append(const char *rhs)
Definition: NumpyIO.cpp:77

cloudViewer::t::io::CharVector::CharVector
CharVector(size_t size, const char value)
Definition: NumpyIO.cpp:58

cloudViewer::t::io::CharVector::End
std::vector< char >::iterator End()
Definition: NumpyIO.cpp:109

cloudViewer::t::io::CharVector::Data
char * Data()
Definition: NumpyIO.cpp:115

cloudViewer::t::io::NumpyArray
Definition: NumpyIO.cpp:495

cloudViewer::t::io::NumpyArray::NumpyArray
NumpyArray(const core::Tensor &t)
Definition: NumpyIO.cpp:497

cloudViewer::t::io::NumpyArray::NumBytes
int64_t NumBytes() const
Definition: NumpyIO.cpp:546

cloudViewer::t::io::NumpyArray::NumpyArray
NumpyArray(const core::SizeVector &shape, char type, int64_t word_size, bool fortran_order)
Definition: NumpyIO.cpp:505

cloudViewer::t::io::NumpyArray::IsFortranOrder
bool IsFortranOrder() const
Definition: NumpyIO.cpp:544

cloudViewer::t::io::NumpyArray::GetShape
core::SizeVector GetShape() const
Definition: NumpyIO.cpp:542

cloudViewer::t::io::NumpyArray::ToTensor
core::Tensor ToTensor() const
Definition: NumpyIO.cpp:550

cloudViewer::t::io::NumpyArray::GetDataPtr
const T * GetDataPtr() const
Definition: NumpyIO.cpp:522

cloudViewer::t::io::NumpyArray::NumElements
int64_t NumElements() const
Definition: NumpyIO.cpp:548

cloudViewer::t::io::NumpyArray::Save
void Save(std::string file_name) const
Definition: NumpyIO.cpp:567

cloudViewer::t::io::NumpyArray::GetDtype
core::Dtype GetDtype() const
Definition: NumpyIO.cpp:526

cloudViewer::t::io::NumpyArray::GetDataPtr
T * GetDataPtr()
Definition: NumpyIO.cpp:517

cloudViewer::utility::filesystem::CFile
Definition: FileSystem.h:219

cloudViewer::utility::filesystem::CFile::GetError
std::string GetError()
Returns the last encountered error for this file.
Definition: FileSystem.cpp:748

cloudViewer::utility::filesystem::CFile::Open
bool Open(const std::string &filename, const std::string &mode)
Open a file.
Definition: FileSystem.cpp:739

cloudViewer::utility::filesystem::CFile::GetFILE
FILE * GetFILE()
Returns the underlying C FILE pointer.
Definition: FileSystem.h:264

LogError
#define LogError(...)
Definition: Logging.h:60

Logging.h

cloudViewer::core::shape_util::DefaultStrides
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition: ShapeUtil.cpp:214

cloudViewer::core::Undefined
const Dtype Undefined
Definition: Dtype.cpp:41

cloudViewer::core::Int8
const Dtype Int8
Definition: Dtype.cpp:44

cloudViewer::core::Bool
const Dtype Bool
Definition: Dtype.cpp:52

cloudViewer::core::Int64
const Dtype Int64
Definition: Dtype.cpp:47

cloudViewer::core::UInt64
const Dtype UInt64
Definition: Dtype.cpp:51

cloudViewer::core::UInt32
const Dtype UInt32
Definition: Dtype.cpp:50

cloudViewer::core::UInt8
const Dtype UInt8
Definition: Dtype.cpp:48

cloudViewer::core::Int16
const Dtype Int16
Definition: Dtype.cpp:45

cloudViewer::core::Float64
const Dtype Float64
Definition: Dtype.cpp:43

cloudViewer::core::UInt16
const Dtype UInt16
Definition: Dtype.cpp:49

cloudViewer::core::Int32
const Dtype Int32
Definition: Dtype.cpp:46

cloudViewer::core::Float32
const Dtype Float32
Definition: Dtype.cpp:42

cloudViewer::t::io::ReadNpz
std::unordered_map< std::string, core::Tensor > ReadNpz(const std::string &file_name)
Definition: NumpyIO.cpp:675

cloudViewer::t::io::ParseZipFooter
static std::tuple< size_t, size_t, size_t > ParseZipFooter(FILE *fp)
Definition: NumpyIO.cpp:331

cloudViewer::t::io::ReadNpy
core::Tensor ReadNpy(const std::string &file_name)
Definition: NumpyIO.cpp:662

cloudViewer::t::io::WriteNpz
void WriteNpz(const std::string &file_name, const std::unordered_map< std::string, core::Tensor > &tensor_map)
Definition: NumpyIO.cpp:759

cloudViewer::t::io::BigEndianChar
static char BigEndianChar()
Definition: NumpyIO.cpp:127

cloudViewer::t::io::ParseNpyHeaderFromBuffer
static std::tuple< core::SizeVector, char, int64_t, bool > ParseNpyHeaderFromBuffer(const char *buffer)
Definition: NumpyIO.cpp:325

cloudViewer::t::io::CreateNumpyHeader
static CharVector CreateNumpyHeader(const core::SizeVector &shape, const core::Dtype &dtype)
Definition: NumpyIO.cpp:157

cloudViewer::t::io::CreateNumpyArrayFromCompressedFile
static NumpyArray CreateNumpyArrayFromCompressedFile(FILE *fp, uint32_t num_compressed_bytes, uint32_t num_uncompressed_bytes)
Definition: NumpyIO.cpp:612

cloudViewer::t::io::DtypeToChar
static char DtypeToChar(const core::Dtype &dtype)
Definition: NumpyIO.cpp:132

cloudViewer::t::io::CreateNumpyArrayFromFile
static NumpyArray CreateNumpyArrayFromFile(FILE *fp)
Definition: NumpyIO.cpp:591

cloudViewer::t::io::WriteNpzOneTensor
static void WriteNpzOneTensor(const std::string &file_name, const std::string &tensor_name, const core::Tensor &tensor, bool append)
Definition: NumpyIO.cpp:360

cloudViewer::t::io::WriteNpzEmpty
static void WriteNpzEmpty(const std::string &file_name)
Definition: NumpyIO.cpp:468

cloudViewer::t::io::ParseNpyPreamble
static size_t ParseNpyPreamble(const char *preamble)
Definition: NumpyIO.cpp:281

cloudViewer::t::io::WriteNpy
void WriteNpy(const std::string &file_name, const core::Tensor &tensor)
Definition: NumpyIO.cpp:671

cloudViewer::t::io::ParsePropertyDict
static std::tuple< core::SizeVector, char, int64_t, bool > ParsePropertyDict(const std::string &header)
Definition: NumpyIO.cpp:207

cloudViewer::t::io::ParseNpyHeaderFromFile
static std::tuple< core::SizeVector, char, int64_t, bool > ParseNpyHeaderFromFile(FILE *fp)
Definition: NumpyIO.cpp:303

cloudViewer
Generic file read and write utility for python interface.
Definition: AutoSegmentationTools.h:16

SEEK_SET
#define SEEK_SET
Definition: qioapi.cpp:38

SEEK_END
#define SEEK_END
Definition: qioapi.cpp:34