15 #include <unordered_map>
16 #include <unordered_set>
33 template <
int kEmbeddingDim>
53 const std::vector<EntryType>&
GetEntries()
const;
87 std::bitset<kEmbeddingDim>* binary_descriptor)
const;
100 const Eigen::Matrix<float, Eigen::Dynamic, kEmbeddingDim>&
105 std::vector<ImageScore>* image_scores)
const;
108 void GetImageIds(std::unordered_set<int>* ids)
const;
116 std::unordered_map<int, double>* self_similarities)
const;
119 void Read(std::ifstream* ifs);
120 void Write(std::ofstream* ofs)
const;
130 std::vector<EntryType> entries_;
137 hamming_dist_weight_functor_;
144 template <
int kEmbeddingDim>
145 const HammingDistWeightFunctor<kEmbeddingDim>
146 InvertedFile<kEmbeddingDim>::hamming_dist_weight_functor_;
148 template <
int kEmbeddingDim>
150 : status_(UNUSABLE), idf_weight_(0.0f) {
151 static_assert(kEmbeddingDim % 8 == 0,
152 "Dimensionality of projected space needs to"
153 " be a multiple of 8.");
154 static_assert(kEmbeddingDim > 0,
155 "Dimensionality of projected space needs to be > 0.");
157 thresholds_.resize(kEmbeddingDim);
158 thresholds_.setZero();
161 template <
int kEmbeddingDim>
163 return entries_.size();
166 template <
int kEmbeddingDim>
167 const std::vector<typename InvertedFile<kEmbeddingDim>::EntryType>&
172 template <
int kEmbeddingDim>
174 return status_ & HAS_EMBEDDING;
177 template <
int kEmbeddingDim>
179 return status_ & ENTRIES_SORTED;
182 template <
int kEmbeddingDim>
184 return status_ & USABLE;
187 template <
int kEmbeddingDim>
192 CHECK_GE(image_id, 0);
193 CHECK_EQ(descriptor.size(), kEmbeddingDim);
198 ConvertToBinaryDescriptor(descriptor, &entry.
descriptor);
199 entries_.push_back(entry);
200 status_ &= ~ENTRIES_SORTED;
203 template <
int kEmbeddingDim>
205 std::sort(entries_.begin(), entries_.end(),
207 return entry1.image_id < entry2.image_id;
209 status_ |= ENTRIES_SORTED;
212 template <
int kEmbeddingDim>
215 status_ &= ~ENTRIES_SORTED;
218 template <
int kEmbeddingDim>
223 thresholds_.setZero();
226 template <
int kEmbeddingDim>
229 std::bitset<kEmbeddingDim>* binary_descriptor)
const {
230 CHECK_EQ(descriptor.size(), kEmbeddingDim);
231 for (
int i = 0; i < kEmbeddingDim; ++i) {
232 (*binary_descriptor)[i] = descriptor[i] > thresholds_[i];
236 template <
int kEmbeddingDim>
238 if (entries_.empty()) {
242 std::unordered_set<int> image_ids;
243 GetImageIds(&image_ids);
245 idf_weight_ = std::log(
static_cast<double>(num_total_images) /
246 static_cast<double>(image_ids.size()));
249 template <
int kEmbeddingDim>
254 template <
int kEmbeddingDim>
256 const Eigen::Matrix<float, Eigen::Dynamic, kEmbeddingDim>&
258 const int num_descriptors =
static_cast<int>(
descriptors.rows());
259 if (num_descriptors < 2) {
263 std::vector<float> elements(num_descriptors);
264 for (
int n = 0; n < kEmbeddingDim; ++n) {
265 for (
int i = 0; i < num_descriptors; ++i) {
268 thresholds_[n] =
Median(elements);
271 status_ |= HAS_EMBEDDING;
274 template <
int kEmbeddingDim>
277 std::vector<ImageScore>* image_scores)
const {
278 CHECK_EQ(descriptor.size(), kEmbeddingDim);
280 image_scores->clear();
286 if (entries_.size() == 0) {
290 const float squared_idf_weight = idf_weight_ * idf_weight_;
292 std::bitset<kEmbeddingDim> bin_descriptor;
293 ConvertToBinaryDescriptor(descriptor, &bin_descriptor);
296 image_score.
image_id = entries_.front().image_id;
297 image_score.
score = 0.0f;
298 int num_image_votes = 0;
302 for (
const auto& entry : entries_) {
303 if (image_score.
image_id < entry.image_id) {
304 if (num_image_votes > 0) {
314 std::sqrt(
static_cast<float>(num_image_votes));
315 image_score.
score *= squared_idf_weight;
316 image_scores->push_back(image_score);
319 image_score.
image_id = entry.image_id;
320 image_score.
score = 0.0f;
324 const size_t hamming_dist = (bin_descriptor ^ entry.descriptor).
count();
326 if (hamming_dist <= hamming_dist_weight_functor_.kMaxHammingDistance) {
327 image_score.
score += hamming_dist_weight_functor_(hamming_dist);
328 num_image_votes += 1;
333 if (num_image_votes > 0) {
334 image_score.
score /= std::sqrt(
static_cast<float>(num_image_votes));
335 image_score.
score *= squared_idf_weight;
336 image_scores->push_back(image_score);
340 template <
int kEmbeddingDim>
342 std::unordered_set<int>* ids)
const {
343 for (
const EntryType& entry : entries_) {
344 ids->insert(entry.image_id);
348 template <
int kEmbeddingDim>
350 std::unordered_map<int, double>* self_similarities)
const {
351 const double squared_idf_weight = idf_weight_ * idf_weight_;
352 for (
const auto& entry : entries_) {
353 (*self_similarities)[entry.image_id] += squared_idf_weight;
357 template <
int kEmbeddingDim>
359 CHECK(ifs->is_open());
361 ifs->read(
reinterpret_cast<char*
>(&status_),
sizeof(uint8_t));
362 ifs->read(
reinterpret_cast<char*
>(&idf_weight_),
sizeof(
float));
364 for (
int i = 0; i < kEmbeddingDim; ++i) {
365 ifs->read(
reinterpret_cast<char*
>(&thresholds_[i]),
sizeof(
float));
368 uint32_t num_entries = 0;
369 ifs->read(
reinterpret_cast<char*
>(&num_entries),
sizeof(uint32_t));
370 entries_.resize(num_entries);
372 for (uint32_t i = 0; i < num_entries; ++i) {
373 entries_[i].Read(ifs);
377 template <
int kEmbeddingDim>
379 CHECK(ofs->is_open());
381 ofs->write(
reinterpret_cast<const char*
>(&status_),
sizeof(uint8_t));
382 ofs->write(
reinterpret_cast<const char*
>(&idf_weight_),
sizeof(
float));
384 for (
int i = 0; i < kEmbeddingDim; ++i) {
385 ofs->write(
reinterpret_cast<const char*
>(&thresholds_[i]),
389 const uint32_t num_entries =
static_cast<uint32_t
>(entries_.size());
390 ofs->write(
reinterpret_cast<const char*
>(&num_entries),
sizeof(uint32_t));
392 for (uint32_t i = 0; i < num_entries; ++i) {
393 entries_[i].Write(ofs);
void ComputeIDFWeight(const int num_total_images)
void ScoreFeature(const DescType &descriptor, std::vector< ImageScore > *image_scores) const
void GetImageIds(std::unordered_set< int > *ids) const
InvertedFileEntry< kEmbeddingDim > EntryType
void AddEntry(const int image_id, typename DescType::Index feature_idx, const DescType &descriptor, const GeomType &geometry)
void Read(std::ifstream *ifs)
const std::vector< EntryType > & GetEntries() const
void ComputeImageSelfSimilarities(std::unordered_map< int, double > *self_similarities) const
void Write(std::ofstream *ofs) const
void ConvertToBinaryDescriptor(const DescType &descriptor, std::bitset< kEmbeddingDim > *binary_descriptor) const
size_t NumEntries() const
void ComputeHammingEmbedding(const Eigen::Matrix< float, Eigen::Dynamic, kEmbeddingDim > &descriptors)
bool HasHammingEmbedding() const
bool EntriesSorted() const
double Median(const std::vector< T > &elems)
Eigen::MatrixXd::Index Index
std::bitset< N > descriptor