ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
vocab_tree.cc
Go to the documentation of this file.
1 // Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 //
14 // * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
15 // its contributors may be used to endorse or promote products derived
16 // from this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
22 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 // POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
31 
32 #include "exe/vocab_tree.h"
33 
34 #include <numeric>
35 
36 #include "base/database.h"
37 #include "exe/gui.h"
38 #include "feature/matching.h"
39 #include "feature/sift.h"
40 #include "feature/utils.h"
41 #include "retrieval/resources.h"
42 #include "retrieval/visual_index.h"
43 #include "util/download.h"
44 #include "util/misc.h"
45 #include "util/opengl_utils.h"
46 #include "util/option_manager.h"
47 
48 namespace colmap {
49 namespace {
50 
51 // Loads descriptors for training from the database. Loads all descriptors from
52 // the database if max_num_images < 0, otherwise the descriptors of a random
53 // subset of images are selected.
54 FeatureDescriptors LoadRandomDatabaseDescriptors(
55  const std::string& database_path, const int max_num_images) {
56  Database database(database_path);
57  DatabaseTransaction database_transaction(&database);
58 
59  const std::vector<Image> images = database.ReadAllImages();
60 
62 
63  std::vector<size_t> image_idxs;
64  size_t num_descriptors = 0;
65  if (max_num_images < 0) {
66  // All images in the database.
67  image_idxs.resize(images.size());
68  std::iota(image_idxs.begin(), image_idxs.end(), 0);
69  num_descriptors = database.NumDescriptors();
70  } else {
71  // Random subset of images in the database.
72  CHECK_LE(max_num_images, images.size());
73  RandomSampler random_sampler(max_num_images);
74  random_sampler.Initialize(images.size());
75  image_idxs = random_sampler.Sample();
76  for (const auto image_idx : image_idxs) {
77  const auto& image = images.at(image_idx);
78  num_descriptors += database.NumDescriptorsForImage(image.ImageId());
79  }
80  }
81 
82  descriptors.resize(num_descriptors, 128);
83 
84  size_t descriptor_row = 0;
85  for (const auto image_idx : image_idxs) {
86  const auto& image = images.at(image_idx);
87  const FeatureDescriptors image_descriptors =
88  database.ReadDescriptors(image.ImageId());
89  descriptors.block(descriptor_row, 0, image_descriptors.rows(), 128) =
90  image_descriptors;
91  descriptor_row += image_descriptors.rows();
92  }
93 
94  CHECK_EQ(descriptor_row, num_descriptors);
95 
96  return descriptors;
97 }
98 
99 std::vector<Image> ReadVocabTreeRetrievalImageList(const std::string& path,
100  Database* database) {
101  std::vector<Image> images;
102  if (path.empty()) {
103  images.reserve(database->NumImages());
104  for (const auto& image : database->ReadAllImages()) {
105  images.push_back(image);
106  }
107  } else {
108  DatabaseTransaction database_transaction(database);
109 
110  const auto image_names = ReadTextFileLines(path);
111  images.reserve(image_names.size());
112  for (const auto& image_name : image_names) {
113  const auto image = database->ReadImageWithName(image_name);
114  CHECK_NE(image.ImageId(), kInvalidImageId);
115  images.push_back(image);
116  }
117  }
118  return images;
119 }
120 
121 } // namespace
122 
123 int RunVocabTreeBuilder(int argc, char** argv) {
124  std::string vocab_tree_path = retrieval::kDefaultVocabTreeUri;
126  int max_num_images = -1;
127 
128  OptionManager options;
129  options.AddDatabaseOptions();
130  options.AddRequiredOption("vocab_tree_path", &vocab_tree_path);
131  options.AddDefaultOption("num_visual_words", &build_options.num_visual_words);
132  options.AddDefaultOption("num_checks", &build_options.num_checks);
133  options.AddDefaultOption("branching", &build_options.branching);
134  options.AddDefaultOption("num_iterations", &build_options.num_iterations);
135  options.AddDefaultOption("max_num_images", &max_num_images);
136  options.Parse(argc, argv);
137 
138  retrieval::VisualIndex<> visual_index;
139 
140  std::cout << "Loading descriptors..." << std::endl;
141  const auto descriptors =
142  LoadRandomDatabaseDescriptors(*options.database_path, max_num_images);
143  std::cout << " => Loaded a total of " << descriptors.rows() << " descriptors"
144  << std::endl;
145 
146  std::cout << "Building index for visual words..." << std::endl;
147  visual_index.Build(build_options, descriptors);
148  std::cout << " => Quantized descriptor space using "
149  << visual_index.NumVisualWords() << " visual words" << std::endl;
150 
151  std::cout << "Saving index to file..." << std::endl;
152  visual_index.Write(vocab_tree_path);
153 
154  return EXIT_SUCCESS;
155 }
156 
157 int RunVocabTreeRetriever(int argc, char** argv) {
158  std::string vocab_tree_path = retrieval::kDefaultVocabTreeUri;
159  std::string database_image_list_path;
160  std::string query_image_list_path;
161  std::string output_index_path;
163  int max_num_features = -1;
164 
165  OptionManager options;
166  options.AddDatabaseOptions();
167  options.AddRequiredOption("vocab_tree_path", &vocab_tree_path);
168  options.AddDefaultOption("database_image_list_path",
169  &database_image_list_path);
170  options.AddDefaultOption("query_image_list_path", &query_image_list_path);
171  options.AddDefaultOption("output_index_path", &output_index_path);
172  options.AddDefaultOption("max_num_images", &query_options.max_num_images);
173  options.AddDefaultOption("num_neighbors", &query_options.num_neighbors);
174  options.AddDefaultOption("num_checks", &query_options.num_checks);
175  options.AddDefaultOption("num_images_after_verification",
176  &query_options.num_images_after_verification);
177  options.AddDefaultOption("max_num_features", &max_num_features);
178  options.Parse(argc, argv);
179 
180  // Automatically download and cache if URI format is provided.
181  std::string resolved_vocab_tree_path =
182  MaybeDownloadAndCacheFile(vocab_tree_path).string();
183  retrieval::VisualIndex<> visual_index;
184  visual_index.Read(resolved_vocab_tree_path);
185 
186  Database database(*options.database_path);
187 
188  const auto database_images =
189  ReadVocabTreeRetrievalImageList(database_image_list_path, &database);
190  const auto query_images =
191  (!query_image_list_path.empty() || output_index_path.empty())
192  ? ReadVocabTreeRetrievalImageList(query_image_list_path, &database)
193  : std::vector<Image>();
194 
196  // Perform image indexing
198 
199  for (size_t i = 0; i < database_images.size(); ++i) {
200  Timer timer;
201  timer.Start();
202 
203  std::cout << StringPrintf("Indexing image [%d/%d]", i + 1,
204  database_images.size())
205  << std::flush;
206 
207  if (visual_index.ImageIndexed(database_images[i].ImageId())) {
208  std::cout << std::endl;
209  continue;
210  }
211 
212  auto keypoints = database.ReadKeypoints(database_images[i].ImageId());
213  auto descriptors = database.ReadDescriptors(database_images[i].ImageId());
214  if (max_num_features > 0 && descriptors.rows() > max_num_features) {
215  ExtractTopScaleFeatures(&keypoints, &descriptors, max_num_features);
216  }
217 
219  database_images[i].ImageId(), keypoints, descriptors);
220 
221  std::cout << StringPrintf(" in %.3fs", timer.ElapsedSeconds()) << std::endl;
222  }
223 
224  // Compute the TF-IDF weights, etc.
225  visual_index.Prepare();
226 
227  // Optionally save the indexing data for the database images (as well as the
228  // original vocabulary tree data) to speed up future indexing.
229  if (!output_index_path.empty()) {
230  visual_index.Write(output_index_path);
231  }
232 
233  if (query_images.empty()) {
234  return EXIT_SUCCESS;
235  }
236 
238  // Perform image queries
240 
241  std::unordered_map<image_t, const Image*> image_id_to_image;
242  image_id_to_image.reserve(database_images.size());
243  for (const auto& image : database_images) {
244  image_id_to_image.emplace(image.ImageId(), &image);
245  }
246 
247  for (size_t i = 0; i < query_images.size(); ++i) {
248  Timer timer;
249  timer.Start();
250 
251  std::cout << StringPrintf("Querying for image %s [%d/%d]",
252  query_images[i].Name().c_str(), i + 1,
253  query_images.size())
254  << std::flush;
255 
256  auto keypoints = database.ReadKeypoints(query_images[i].ImageId());
257  auto descriptors = database.ReadDescriptors(query_images[i].ImageId());
258  if (max_num_features > 0 && descriptors.rows() > max_num_features) {
259  ExtractTopScaleFeatures(&keypoints, &descriptors, max_num_features);
260  }
261 
262  std::vector<retrieval::ImageScore> image_scores;
263  visual_index.Query(query_options, keypoints, descriptors, &image_scores);
264 
265  std::cout << StringPrintf(" in %.3fs", timer.ElapsedSeconds()) << std::endl;
266  for (const auto& image_score : image_scores) {
267  const auto& image = *image_id_to_image.at(image_score.image_id);
268  std::cout << StringPrintf(" image_id=%d, image_name=%s, score=%f",
269  image_score.image_id, image.Name().c_str(),
270  image_score.score)
271  << std::endl;
272  }
273  }
274 
275  return EXIT_SUCCESS;
276 }
277 
278 } // namespace colmap
std::shared_ptr< core::Tensor > image
FeatureDescriptors ReadDescriptors(const image_t image_id) const
Definition: database.cc:448
FeatureKeypoints ReadKeypoints(const image_t image_id) const
Definition: database.cc:436
void AddRequiredOption(const std::string &name, T *option, const std::string &help_text="")
void AddDefaultOption(const std::string &name, T *option, const std::string &help_text="")
std::shared_ptr< std::string > database_path
void Parse(const int argc, char **argv)
void Start()
Definition: timer.cc:43
double ElapsedSeconds() const
Definition: timer.cc:82
bool ImageIndexed(const int image_id) const
Definition: visual_index.h:231
void Read(const std::string &path)
Definition: visual_index.h:544
void Add(const IndexOptions &options, const int image_id, const GeomType &geometries, const DescType &descriptors)
Definition: visual_index.h:187
void Build(const BuildOptions &options, const DescType &descriptors)
Definition: visual_index.h:515
void Query(const QueryOptions &options, const DescType &descriptors, std::vector< ImageScore > *image_scores) const
Definition: visual_index.h:237
void Write(const std::string &path)
Definition: visual_index.h:595
QTextStream & endl(QTextStream &stream)
Definition: QtCompat.h:718
static const std::string path
Definition: PointCloud.cpp:59
static const std::string kDefaultVocabTreeUri
Definition: resources.h:25
void ExtractTopScaleFeatures(FeatureKeypoints *keypoints, FeatureDescriptors *descriptors, const size_t num_features)
Definition: utils.cc:79
int RunVocabTreeRetriever(int argc, char **argv)
Definition: vocab_tree.cc:157
Eigen::Matrix< uint8_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > FeatureDescriptors
Definition: types.h:79
int RunVocabTreeBuilder(int argc, char **argv)
Definition: vocab_tree.cc:123
std::vector< std::string > ReadTextFileLines(const std::string &path)
Definition: misc.cc:308
std::string StringPrintf(const char *format,...)
Definition: string.cc:131
const image_t kInvalidImageId
Definition: types.h:76
std::filesystem::path MaybeDownloadAndCacheFile(const std::string &uri)
CorePointDescSet * descriptors