11 #include <Eigen/Dense>
16 #include <unordered_map>
17 #include <unordered_set>
30 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
34 typedef Eigen::Matrix<kDescType, Eigen::Dynamic, kDescDim, Eigen::RowMajor>
59 const Eigen::VectorXi& word_ids);
73 const Eigen::MatrixXi& word_ids,
74 std::vector<ImageScore>* image_scores)
const;
79 std::bitset<kEmbeddingDim>* binary_descriptor)
const;
84 const std::unordered_set<int>& image_ids,
85 std::vector<const EntryType*>* matches)
const;
94 void Read(std::ifstream* ifs);
95 void Write(std::ofstream* ofs)
const;
98 void ComputeWeightsAndNormalizationConstants();
101 std::vector<InvertedFile<kEmbeddingDim>,
102 Eigen::aligned_allocator<InvertedFile<kEmbeddingDim>>>
107 std::unordered_map<int, float> normalization_constants_;
117 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
119 std::numeric_limits<int>::max();
121 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
123 proj_matrix_.resize(kEmbeddingDim, kDescDim);
124 proj_matrix_.setIdentity();
127 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
129 return static_cast<int>(inverted_files_.size());
132 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
134 const int num_words) {
135 CHECK_GT(num_words, 0);
136 inverted_files_.resize(num_words);
137 for (
auto& inverted_file : inverted_files_) {
138 inverted_file.Reset();
142 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
144 CHECK_GT(NumVisualWords(), 0);
146 for (
auto& inverted_file : inverted_files_) {
147 inverted_file.SortEntries();
150 ComputeWeightsAndNormalizationConstants();
153 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
156 Eigen::MatrixXf random_matrix(kDescDim, kDescDim);
160 const Eigen::MatrixXf Q = random_matrix.colPivHouseholderQr().matrixQ();
161 proj_matrix_ = Q.topRows<kEmbeddingDim>();
164 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
171 const size_t kMinEntries = 5;
174 std::vector<std::vector<int>> indices_per_word(NumVisualWords());
176 indices_per_word.at(word_ids(i)).push_back(i);
181 for (
int i = 0; i < NumVisualWords(); ++i) {
182 const auto& indices = indices_per_word[i];
183 if (indices.size() < kMinEntries) {
187 Eigen::Matrix<float, Eigen::Dynamic, kEmbeddingDim> proj_desc(
188 indices.size(), kEmbeddingDim);
189 for (
size_t j = 0; j < indices.size(); ++j) {
190 proj_desc.row(j) = proj_matrix_ *
descriptors.row(indices[j])
192 .template cast<float>();
195 inverted_files_[i].ComputeHammingEmbedding(proj_desc);
199 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
206 CHECK_EQ(descriptor.size(), kDescDim);
208 proj_matrix_ * descriptor.transpose().template cast<float>();
209 inverted_files_.at(word_id).AddEntry(image_id, feature_idx, proj_desc,
213 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
215 for (
auto& inverted_file : inverted_files_) {
216 inverted_file.ClearEntries();
220 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
223 const Eigen::MatrixXi& word_ids,
224 std::vector<ImageScore>* image_scores)
const {
227 image_scores->clear();
230 const float self_similarity = ComputeSelfSimilarity(word_ids);
231 float normalization_weight = 1.0f;
232 if (self_similarity > 0.0f) {
233 normalization_weight = 1.0f / std::sqrt(self_similarity);
236 std::unordered_map<int, int> score_map;
237 std::vector<ImageScore> inverted_file_scores;
242 descriptors.row(i).transpose().template cast<float>();
244 const int word_id = word_ids(i, n);
245 if (word_id == kInvalidWordId) {
249 inverted_files_.at(word_id).ScoreFeature(proj_descriptor,
250 &inverted_file_scores);
252 for (
const ImageScore& score : inverted_file_scores) {
253 const auto score_map_it = score_map.find(score.image_id);
254 if (score_map_it == score_map.end()) {
256 score_map.emplace(score.image_id,
257 static_cast<int>(image_scores->size()));
258 image_scores->push_back(score);
262 (*image_scores).at(score_map_it->second).score +=
271 score.score *= normalization_weight *
272 normalization_constants_.at(score.image_id);
276 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
281 std::bitset<kEmbeddingDim>* binary_descriptor)
const {
283 proj_matrix_ * descriptor.transpose().template cast<float>();
284 inverted_files_.at(word_id).ConvertToBinaryDescriptor(proj_desc,
288 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
290 const int word_id)
const {
291 return inverted_files_.at(word_id).IDFWeight();
294 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
297 const std::unordered_set<int>& image_ids,
298 std::vector<const EntryType*>* matches)
const {
300 const auto& entries = inverted_files_.at(word_id).GetEntries();
301 for (
const auto& entry : entries) {
302 if (image_ids.count(entry.image_id)) {
303 matches->emplace_back(&entry);
308 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
310 const Eigen::MatrixXi& word_ids)
const {
311 double self_similarity = 0.0;
313 const int word_id = word_ids(i);
314 if (word_id != kInvalidWordId) {
315 const auto& inverted_file = inverted_files_.at(word_id);
317 inverted_file.IDFWeight() * inverted_file.IDFWeight();
320 return static_cast<float>(self_similarity);
323 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
325 std::unordered_set<int>* image_ids)
const {
326 for (
const auto& inverted_file : inverted_files_) {
327 inverted_file.GetImageIds(image_ids);
331 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
333 std::ifstream* ifs) {
334 CHECK(ifs->is_open());
336 int32_t num_words = 0;
337 ifs->read(
reinterpret_cast<char*
>(&num_words),
sizeof(int32_t));
338 CHECK_GT(num_words, 0);
340 Initialize(num_words);
343 ifs->read(
reinterpret_cast<char*
>(&N_t),
sizeof(int32_t));
344 CHECK_EQ(N_t, kEmbeddingDim)
345 <<
"The length of the binary strings should be " << kEmbeddingDim
346 <<
" but is " << N_t <<
". The indices are not compatible!";
348 for (
int i = 0; i < kEmbeddingDim; ++i) {
349 for (
int j = 0; j < kDescDim; ++j) {
350 ifs->read(
reinterpret_cast<char*
>(&proj_matrix_(i, j)),
355 for (
auto& inverted_file : inverted_files_) {
356 inverted_file.Read(ifs);
359 int32_t num_images = 0;
360 ifs->read(
reinterpret_cast<char*
>(&num_images),
sizeof(int32_t));
361 CHECK_GE(num_images, 0);
363 normalization_constants_.clear();
364 normalization_constants_.reserve(num_images);
365 for (int32_t i = 0; i < num_images; ++i) {
368 ifs->read(
reinterpret_cast<char*
>(&image_id),
sizeof(
int));
369 ifs->read(
reinterpret_cast<char*
>(&value),
sizeof(
float));
370 normalization_constants_[image_id] = value;
374 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
376 std::ofstream* ofs)
const {
377 CHECK(ofs->is_open());
379 int32_t num_words =
static_cast<int32_t
>(NumVisualWords());
380 ofs->write(
reinterpret_cast<const char*
>(&num_words),
sizeof(int32_t));
381 CHECK_GT(num_words, 0);
383 const int32_t N_t =
static_cast<int32_t
>(kEmbeddingDim);
384 ofs->write(
reinterpret_cast<const char*
>(&N_t),
sizeof(int32_t));
386 for (
int i = 0; i < kEmbeddingDim; ++i) {
387 for (
int j = 0; j < kDescDim; ++j) {
388 ofs->write(
reinterpret_cast<const char*
>(&proj_matrix_(i, j)),
393 for (
const auto& inverted_file : inverted_files_) {
394 inverted_file.Write(ofs);
397 const int32_t num_images = normalization_constants_.size();
398 ofs->write(
reinterpret_cast<const char*
>(&num_images),
sizeof(int32_t));
400 for (
const auto& constant : normalization_constants_) {
401 ofs->write(
reinterpret_cast<const char*
>(&constant.first),
sizeof(int));
402 ofs->write(
reinterpret_cast<const char*
>(&constant.second),
407 template <
typename kDescType,
int kDescDim,
int kEmbeddingDim>
410 std::unordered_set<int> image_ids;
411 GetImageIds(&image_ids);
413 for (
auto& inverted_file : inverted_files_) {
414 inverted_file.ComputeIDFWeight(image_ids.size());
417 std::unordered_map<int, double> self_similarities(image_ids.size());
418 for (
const auto& inverted_file : inverted_files_) {
419 inverted_file.ComputeImageSelfSimilarities(&self_similarities);
422 normalization_constants_.clear();
423 normalization_constants_.reserve(image_ids.size());
424 for (
const auto& self_similarity : self_similarities) {
425 if (self_similarity.second > 0.0) {
426 normalization_constants_[self_similarity.first] =
427 static_cast<float>(1.0 / std::sqrt(self_similarity.second));
429 normalization_constants_[self_similarity.first] = 0.0f;
void ComputeHammingEmbedding(const DescType &descriptors, const Eigen::VectorXi &word_ids)
Eigen::Matrix< float, Eigen::Dynamic, kDescDim > ProjMatrixType
Eigen::VectorXf ProjDescType
InvertedFile< kEmbeddingDim >::GeomType GeomType
void GetImageIds(std::unordered_set< int > *image_ids) const
void ConvertToBinaryDescriptor(const int word_id, const DescType &descriptor, std::bitset< kEmbeddingDim > *binary_descriptor) const
void Initialize(const int num_words)
InvertedFile< kEmbeddingDim >::EntryType EntryType
void Write(std::ofstream *ofs) const
Eigen::Matrix< kDescType, Eigen::Dynamic, kDescDim, Eigen::RowMajor > DescType
void Query(const DescType &descriptors, const Eigen::MatrixXi &word_ids, std::vector< ImageScore > *image_scores) const
float GetIDFWeight(const int word_id) const
static const int kInvalidWordId
int NumVisualWords() const
float ComputeSelfSimilarity(const Eigen::MatrixXi &word_ids) const
void AddEntry(const int image_id, const int word_id, typename DescType::Index feature_idx, const DescType &descriptor, const GeomType &geometry)
void Read(std::ifstream *ifs)
void GenerateHammingEmbeddingProjection()
void FindMatches(const int word_id, const std::unordered_set< int > &image_ids, std::vector< const EntryType * > *matches) const
T RandomGaussian(const T mean, const T stddev)
Eigen::MatrixXd::Index Index