24 #if defined(SIFTGPU_CUDA_ENABLED)
34 #include <cuda_runtime.h>
36 #include "CuTexImage.h"
38 #include "ProgramCU.h"
42 #define MULT_TBLOCK_DIMX 128
43 #define MULT_TBLOCK_DIMY 1
44 #define MULT_BLOCK_DIMX (MULT_TBLOCK_DIMX)
45 #define MULT_BLOCK_DIMY (8 * MULT_TBLOCK_DIMY)
47 SiftMatchCU::SiftMatchCU(
int max_sift) :
SiftMatchGPU() {
48 _num_sift[0] = _num_sift[1] = 0;
49 _id_sift[0] = _id_sift[1] = 0;
50 _have_loc[0] = _have_loc[1] = 0;
51 __max_sift = max_sift <= 0 ? 4096 : ((max_sift + 31) / 32 * 32);
55 bool SiftMatchCU::Allocate(
int max_sift,
int mbm) {
58 for (
int index = 0; index < 2; ++index) {
59 if (!_texDes[index].InitTexture(8 * __max_sift, 1, 4) ||
60 !_texLoc[index].InitTexture(__max_sift, 1, 2)) {
65 if (!_texDot.InitTexture(__max_sift, __max_sift) ||
66 !_texMatch[0].InitTexture(__max_sift, 1)) {
71 const int cols = (__max_sift + MULT_BLOCK_DIMY - 1) / MULT_BLOCK_DIMY;
72 if (!_texCRT.InitTexture(__max_sift, cols, 32) ||
73 !_texMatch[1].InitTexture(__max_sift, 1)) {
78 _num_sift[0] = __max_sift;
79 _num_sift[1] = __max_sift;
84 void SiftMatchCU::SetMaxSift(
int max_sift) {
85 max_sift = ((max_sift + 31) / 32) * 32;
86 __max_sift = max_sift;
89 int SiftMatchCU::CheckCudaDevice(
int device) {
90 return ProgramCU::CheckCudaDevice(device);
93 void SiftMatchCU::InitSiftMatch() {
94 if (_initialized)
return;
99 void SiftMatchCU::SetDescriptors(
int index,
int num,
101 if (_initialized == 0)
return;
102 if (index > 1) index = 1;
103 if (index < 0) index = 0;
104 _have_loc[index] = 0;
106 if (
id != -1 &&
id == _id_sift[index])
return;
107 _id_sift[index] = id;
108 if (num > __max_sift) num = __max_sift;
109 _num_sift[index] = num;
110 _texDes[index].InitTexture(8 * num, 1, 4);
114 void SiftMatchCU::SetDescriptors(
int index,
int num,
const float*
descriptors,
116 if (_initialized == 0)
return;
117 if (index > 1) index = 1;
118 if (index < 0) index = 0;
119 if (num > __max_sift) num = __max_sift;
121 sift_buffer.resize(num * 128 / 4);
122 unsigned char* pub = (
unsigned char*)&sift_buffer[0];
123 for (
int i = 0; i < 128 * num; ++i) {
126 SetDescriptors(index, num, pub,
id);
129 void SiftMatchCU::SetFeautreLocation(
int index,
const float* locations,
131 if (_num_sift[index] <= 0)
return;
132 _texLoc[index].InitTexture(_num_sift[index], 1, 2);
134 _texLoc[index].CopyFromHost(locations);
136 sift_buffer.resize(_num_sift[index] * 2);
137 float* pbuf = (
float*)(&sift_buffer[0]);
138 for (
int i = 0; i < _num_sift[index]; ++i) {
139 pbuf[i * 2] = *locations++;
140 pbuf[i * 2 + 1] = *locations++;
143 _texLoc[index].CopyFromHost(pbuf);
145 _have_loc[index] = 1;
148 int SiftMatchCU::GetGuidedSiftMatch(
int max_match, uint32_t match_buffer[][2],
149 float* H,
float* F,
float distmax,
150 float ratiomax,
float hdistmax,
151 float fdistmax,
int mbm) {
152 if (_initialized == 0)
return 0;
153 if (_num_sift[0] <= 0 || _num_sift[1] <= 0)
return 0;
154 if (_have_loc[0] == 0 || _have_loc[1] == 0)
return 0;
155 ProgramCU::MultiplyDescriptorG(_texDes, _texDes + 1, _texLoc, _texLoc + 1,
156 &_texDot, (mbm ? &_texCRT :
NULL), H, hdistmax,
158 return GetBestMatch(max_match, match_buffer, distmax, ratiomax, mbm);
161 int SiftMatchCU::GetSiftMatch(
int max_match, uint32_t match_buffer[][2],
162 float distmax,
float ratiomax,
int mbm) {
163 if (_initialized == 0)
return 0;
164 if (_num_sift[0] <= 0 || _num_sift[1] <= 0)
return 0;
165 ProgramCU::MultiplyDescriptor(_texDes, _texDes + 1, &_texDot,
166 (mbm ? &_texCRT :
NULL));
167 return GetBestMatch(max_match, match_buffer, distmax, ratiomax, mbm);
170 int SiftMatchCU::GetBestMatch(
int max_match, uint32_t match_buffer[][2],
171 float distmax,
float ratiomax,
int mbm) {
172 sift_buffer.resize(_num_sift[0] + _num_sift[1]);
173 int *buffer1 = (
int*)&sift_buffer[0],
174 *buffer2 = (
int*)&sift_buffer[_num_sift[0]];
175 _texMatch[0].InitTexture(_num_sift[0], 1);
176 ProgramCU::GetRowMatch(&_texDot, _texMatch, distmax, ratiomax);
177 _texMatch[0].CopyToHost(buffer1);
179 _texMatch[1].InitTexture(_num_sift[1], 1);
180 ProgramCU::GetColMatch(&_texCRT, _texMatch + 1, distmax, ratiomax);
181 _texMatch[1].CopyToHost(buffer2);
184 for (
int i = 0; i < _num_sift[0] && nmatch < max_match; ++i) {
186 if (j >= 0 && (!mbm ||
int(buffer2[j]) == i)) {
187 match_buffer[nmatch][0] = i;
188 match_buffer[nmatch][1] = j;
193 cudaError_t
error = cudaGetLastError();
194 if (
error != cudaSuccess) {
static void error(char *msg)