ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
Dataset.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 #include <FileSystem.h>
11 #include <Logging.h>
12 
13 #include <string>
14 
17 
18 namespace cloudViewer {
19 namespace data {
20 
21 std::string LocateDataRoot() {
22  std::string data_root = "";
23  if (const char* env_p = std::getenv("CLOUDVIEWER_DATA_ROOT")) {
24  data_root = std::string(env_p);
25  }
26  if (data_root.empty()) {
27  data_root =
28  utility::filesystem::GetHomeDirectory() + "/cloudViewer_data";
29  }
30  return data_root;
31 }
32 
33 static std::string InternalDownloadsPrefix =
34  "https://github.com/isl-org/open3d_downloads/releases/download/";
35 
36 void SetCustomDownloadsPrefix(const std::string& prefix) {
37  if (prefix.empty()) {
38  utility::LogError("prefix cannot be empty.");
39  }
40  if (prefix.back() != '/') {
41  InternalDownloadsPrefix = prefix + "/";
42  } else {
43  InternalDownloadsPrefix = prefix;
44  }
45 }
46 
47 std::string GetCustomDownloadsPrefix() { return InternalDownloadsPrefix; }
48 
50  return "https://github.com/isl-org/open3d_downloads/releases/download/";
51 }
52 
53 Dataset::Dataset(const std::string& prefix, const std::string& data_root)
54  : prefix_(prefix) {
55  if (data_root.empty()) {
57  } else {
58  data_root_ = data_root;
59  }
60  if (prefix_.empty()) {
61  utility::LogError("prefix cannot be empty.");
62  }
63 }
64 
65 void Dataset::CheckPathsExist(const std::vector<std::string>& paths) const {
66  const size_t num_expected_paths = paths.size();
67  size_t num_existing_paths = 0;
68  for (const auto& path : paths) {
70  num_existing_paths++;
71  } else {
72  utility::LogWarning("{} does not exist.", path);
73  }
74  }
75  if (num_existing_paths != num_expected_paths) {
77  "Expected {} files, but only found {} files. Please "
78  "re-download and re-extract the dataset.",
79  num_expected_paths, num_existing_paths);
80  }
81 }
82 
83 DownloadDataset::DownloadDataset(const std::string& prefix,
85  const std::string& data_root)
87  prefix, std::vector<DataDescriptor>{data_descriptor}, data_root) {
88 }
89 
91  const std::string& prefix,
92  const std::vector<DataDescriptor>& data_descriptors,
93  const std::string& data_root)
94  : Dataset(prefix, data_root), data_descriptors_(data_descriptors) {
95  // Download.
96  for (const auto& data_descriptor : data_descriptors) {
100  GetDownloadDir());
102  utility::LogError("Download failed integrity check.");
103  }
104  }
105  }
106 
107  // Extract.
108  // TODO: All dataset constructors should have full knowledge of the list of
109  // extracted files. If CheckPathsExist() fails, we shall trigger
110  // re-extraction automatically or throw an error. To enable this, we need to
111  // modify the dataset constructors to provide a list of extracted files for
112  // each dataset.
113  const std::string base_extract_dir = GetExtractDir();
114  if (!utility::filesystem::DirectoryExists(base_extract_dir)) {
115  for (const auto& data_descriptor : data_descriptors) {
116  const std::string download_name =
119  const std::string download_path =
120  GetDownloadDir() + "/" + download_name;
121 
122  std::string extract_dir = base_extract_dir;
123  if (!data_descriptor.extract_in_subdir_.empty()) {
124  extract_dir += "/" + data_descriptor.extract_in_subdir_;
125  }
126 
127  if (utility::IsSupportedCompressedFilePath(download_path)) {
128  utility::Extract(download_path, extract_dir);
129  } else {
131  utility::filesystem::Copy(download_path, extract_dir);
132  }
133  }
134  }
135 }
136 
138  const DataDescriptor& data_descriptor) const {
139  // Check directory.
141  return false;
142  }
143  // Check file exists.
144  const std::string download_path =
145  GetDownloadDir() + "/" +
148  if (!utility::filesystem::FileExists(download_path)) {
149  return false;
150  }
151  // Check MD5.
152  if (utility::GetMD5(download_path) != data_descriptor.md5_) {
153  return false;
154  }
155  return true;
156 }
157 
158 } // namespace data
159 } // namespace cloudViewer
Base CloudViewer dataset class.
Definition: Dataset.h:61
std::string prefix_
Dataset prefix.
Definition: Dataset.h:103
const std::string GetExtractDir() const
Get absolute path to extract directory. i.e. ${data_root}/extract/${prefix}.
Definition: Dataset.h:94
const std::string GetDownloadDir() const
Get absolute path to download directory. i.e. ${data_root}/download/${prefix}.
Definition: Dataset.h:88
std::string data_root_
CloudViewer data root.
Definition: Dataset.h:100
void CheckPathsExist(const std::vector< std::string > &paths) const
Check if the paths exists after extraction.
Definition: Dataset.cpp:65
Dataset(const std::string &prefix, const std::string &data_root="")
Parameterized Constructor.
Definition: Dataset.cpp:53
Dataset class with one or more downloaded file.
Definition: Dataset.h:152
bool HasDownloaded(const DataDescriptor &data_descriptor) const
Check if all files are downloaded and MD5 checksums are valid.
Definition: Dataset.cpp:137
DownloadDataset(const std::string &prefix, const DataDescriptor &data_descriptor, const std::string &data_root="")
Definition: Dataset.cpp:83
#define LogWarning(...)
Definition: Logging.h:72
#define LogError(...)
Definition: Logging.h:60
std::string CloudViewerDownloadsPrefix()
Definition: Dataset.cpp:49
static const DataDescriptor data_descriptor
std::string LocateDataRoot()
Definition: Dataset.cpp:21
static const std::vector< DataDescriptor > data_descriptors
std::string GetCustomDownloadsPrefix()
Definition: Dataset.cpp:47
void SetCustomDownloadsPrefix(const std::string &prefix)
Definition: Dataset.cpp:36
static const std::string path
Definition: PointCloud.cpp:59
bool Copy(const std::string &from, const std::string &to, bool include_parent_dir=false, const std::string &extname="")
Copy a file or directory.
Definition: FileSystem.cpp:249
bool MakeDirectoryHierarchy(const std::string &directory)
Definition: FileSystem.cpp:499
bool DirectoryExists(const std::string &directory)
Definition: FileSystem.cpp:473
bool FileExists(const std::string &filename)
Definition: FileSystem.cpp:524
std::string GetHomeDirectory()
Get the HOME directory for the user.
Definition: FileSystem.cpp:74
std::string GetFileNameWithoutDirectory(const std::string &filename)
Definition: FileSystem.cpp:301
bool IsSupportedCompressedFilePath(const std::string &file_path)
Returns true if the file is a supported compressed file path. It does not check if the file exists....
Definition: Extract.cpp:27
std::string GetMD5(const std::string &file_path)
Computes MD5 Hash for the given file.
Definition: Download.cpp:46
void Extract(const std::string &file_path, const std::string &extract_dir)
Function to extract compressed files.
Definition: Extract.cpp:33
std::string DownloadFromMirrors(const std::vector< std::string > &mirrors, const std::string &md5, const std::string &download_dir)
Download a file from list of mirror URLs. If a file already exists and the MD5 hash matches,...
Definition: Download.cpp:160
Generic file read and write utility for python interface.
Definition: Eigen.h:85
Infomation about a file to be downloaded.
Definition: Dataset.h:111
std::string md5_
MD5 checksum of the downloaded file.
Definition: Dataset.h:137
std::vector< std::string > urls_
List of URL mirrors.
Definition: Dataset.h:134