39 #if defined(SIFTGPU_CUDA_ENABLED)
40 #include "CuTexImage.h"
47 s_multiply = s_col_max = s_row_max = s_guided_mult =
NULL;
48 _num_sift[0] = _num_sift[1] = 0;
49 _id_sift[0] = _id_sift[1] = 0;
50 _have_loc[0] = _have_loc[1] = 0;
51 __max_sift = max_sift <=0 ? 4096 : ((max_sift + 31)/ 32 * 32) ;
55 _sift_per_row = _sift_per_stripe * _sift_num_stripe;
61 if(s_multiply)
delete s_multiply;
62 if(s_guided_mult)
delete s_guided_mult;
63 if(s_col_max)
delete s_col_max;
64 if(s_row_max)
delete s_row_max;
69 return glGetError() == GL_NO_ERROR;
75 max_sift = ((max_sift + 31)/32)*32;
81 _have_loc[0] = _have_loc[1] = 0;
82 _id_sift[0] = _id_sift[1] = -1;
83 _num_sift[0] = _num_sift[1] = 1;
90 void SiftMatchGL::AllocateSiftMatch()
97 if ( n > 1) {_sift_num_stripe *= n; _sift_per_row *= n; }
110 #define GL_R32F 0x822E
126 if(_initialized)
return;
130 LoadSiftMatchShadersGLSL();
137 if(_initialized == 0)
return;
138 if (index > 1) index = 1;
139 if (index < 0) index = 0;
140 _have_loc[index] = 0;
143 if(
id !=-1 &&
id == _id_sift[index]) return ;
144 _id_sift[index] = id;
148 sift_buffer.resize(num * 128 /4);
150 _num_sift[index] = num;
151 int w = _sift_per_row * _pixel_per_sift;
152 int h = (num + _sift_per_row - 1)/ _sift_per_row;
153 sift_buffer.resize(w * h * 4, 0);
156 if(_sift_num_stripe == 1)
161 for(
int i = 0; i < _sift_num_stripe; ++i)
163 int ws = _sift_per_stripe * _pixel_per_sift;
165 int pos = i * ws * h * 4;
175 if(_num_sift[index] <=0)
return;
176 int w = _sift_per_row ;
177 int h = (_num_sift[index] + _sift_per_row - 1)/ _sift_per_row;
178 sift_buffer.resize(_num_sift[index] * 2);
181 memcpy(&sift_buffer[0], locations, _num_sift[index] * 2 *
sizeof(
float));
184 for(
int i = 0; i < _num_sift[index]; ++i)
186 sift_buffer[i*2] = *locations++;
187 sift_buffer[i*2+1]= *locations ++;
191 sift_buffer.resize(w * h * 2, 0);
194 if(_sift_num_stripe == 1)
199 for(
int i = 0; i < _sift_num_stripe; ++i)
201 int ws = _sift_per_stripe;
203 int pos = i * ws * h * 2;
204 glTexSubImage2D(
GlobalUtil::_texTarget, 0, x, 0, ws, h, GL_LUMINANCE_ALPHA , GL_FLOAT, &sift_buffer[pos]);
208 _have_loc[index] = 1;
213 if(_initialized == 0)
return;
214 if (index > 1) index = 1;
215 if (index < 0) index = 0;
216 _have_loc[index] = 0;
219 if(
id !=-1 &&
id == _id_sift[index]) return ;
220 _id_sift[index] = id;
224 sift_buffer.resize(num * 128 /4);
225 unsigned char * pub = (
unsigned char*) &sift_buffer[0];
226 for(
int i = 0; i < 128 * num; ++i)
230 _num_sift[index] = num;
231 int w = _sift_per_row * _pixel_per_sift;
232 int h = (num + _sift_per_row - 1)/ _sift_per_row;
233 sift_buffer.resize(w * h * 4, 0);
236 if(_sift_num_stripe == 1)
241 for(
int i = 0; i < _sift_num_stripe; ++i)
243 int ws = _sift_per_stripe * _pixel_per_sift;
245 int pos = i * ws * h * 4;
253 void SiftMatchGL::LoadSiftMatchShadersGLSL()
258 out <<
"#pragma optionNV(ifcvt none)\n"
259 "#pragma optionNV(unroll all)\n";
261 out <<
"#define SIFT_PER_STRIPE " << _sift_per_stripe <<
".0\n"
262 "#define PIXEL_PER_SIFT " << _pixel_per_sift <<
"\n"
263 "uniform sampler2DRect tex1, tex2; uniform vec2 size;\n"
266 <<
" vec4 val = vec4(0.0, 0.0, 0.0, 0.0), data1, buf;\n"
267 " vec2 index = gl_FragCoord.yx; \n"
268 " vec2 stripe_size = size.xy * SIFT_PER_STRIPE;\n"
269 " vec2 temp_div1 = index / stripe_size;\n"
270 " vec2 stripe_index = floor(temp_div1);\n"
271 " index = floor(stripe_size * (temp_div1 - stripe_index));\n"
272 " vec2 temp_div2 = index * vec2(1.0 / float(SIFT_PER_STRIPE));\n"
273 " vec2 temp_floor2 = floor(temp_div2);\n"
274 " vec2 index_v = temp_floor2 + vec2(0.5);\n "
275 " vec2 index_h = vec2(SIFT_PER_STRIPE)* (temp_div2 - temp_floor2);\n"
276 " vec2 tx = (index_h + stripe_index * vec2(SIFT_PER_STRIPE))* vec2(PIXEL_PER_SIFT) + 0.5;\n"
277 " vec2 tpos1, tpos2; \n"
278 " vec4 tpos = vec4(tx, index_v);\n"
280 " for(int i = 0; i < PIXEL_PER_SIFT; ++i){\n"
281 " buf = texture2DRect(tex2, tpos.yw);\n"
282 " data1 = texture2DRect(tex1, tpos.xz);\n"
283 " val += (data1 * buf);\n"
284 " tpos.xy = tpos.xy + vec2(1.0, 1.0);\n"
286 " const float factor = 0.248050689697265625; \n"
287 " gl_FragColor =vec4(dot(val, vec4(factor)), index, 0);\n"
291 s_multiply = program=
new ProgramGLSL(out.str().c_str());
293 _param_multiply_tex1 = glGetUniformLocation(*program,
"tex1");
294 _param_multiply_tex2 = glGetUniformLocation(*program,
"tex2");
295 _param_multiply_size = glGetUniformLocation(*program,
"size");
299 out <<
"#pragma optionNV(ifcvt none)\n"
300 "#pragma optionNV(unroll all)\n";
302 out <<
"#define SIFT_PER_STRIPE " << _sift_per_stripe <<
".0\n"
303 "#define PIXEL_PER_SIFT " << _pixel_per_sift <<
"\n"
304 "uniform sampler2DRect tex1, tex2;\n"
305 "uniform sampler2DRect texL1;\n"
306 "uniform sampler2DRect texL2; \n"
309 "uniform vec4 size; \n"
312 <<
" vec4 val = vec4(0.0, 0.0, 0.0, 0.0), data1, buf;\n"
313 " vec2 index = gl_FragCoord.yx; \n"
314 " vec2 stripe_size = size.xy * SIFT_PER_STRIPE;\n"
315 " vec2 temp_div1 = index / stripe_size;\n"
316 " vec2 stripe_index = floor(temp_div1);\n"
317 " index = floor(stripe_size * (temp_div1 - stripe_index));\n"
318 " vec2 temp_div2 = index * vec2(1.0/ float(SIFT_PER_STRIPE));\n"
319 " vec2 temp_floor2 = floor(temp_div2);\n"
320 " vec2 index_v = temp_floor2 + vec2(0.5);\n "
321 " vec2 index_h = vec2(SIFT_PER_STRIPE)* (temp_div2 - temp_floor2);\n"
324 " vec4 tlpos = vec4((index_h + stripe_index * vec2(SIFT_PER_STRIPE)) + 0.5, index_v);\n"
325 " vec3 loc1 = vec3(texture2DRect(texL1, tlpos.xz).xw, 1.0);\n"
326 " vec3 loc2 = vec3(texture2DRect(texL2, tlpos.yw).xw, 1.0);\n"
329 " vec3 hxloc1 = H* loc1;\n"
330 " vec2 diff = loc2.xy- (hxloc1.xy/hxloc1.z);\n"
331 " float disth = diff.x * diff.x + diff.y * diff.y;\n"
332 " if(disth > size.z ) {gl_FragColor = vec4(0.0, index, 0.0); return;}\n"
335 " vec3 fx1 = (F * loc1), ftx2 = (loc2 * F);\n"
336 " float x2tfx1 = dot(loc2, fx1);\n"
337 " vec4 temp = vec4(fx1.xy, ftx2.xy); \n"
338 " float sampson_error = (x2tfx1 * x2tfx1) / dot(temp, temp);\n"
339 " if(sampson_error > size.w) {gl_FragColor = vec4(0.0, index, 0.0); return;}\n"
342 " vec2 tx = (index_h + stripe_index * SIFT_PER_STRIPE)* vec2(PIXEL_PER_SIFT) + 0.5;\n"
343 " vec2 tpos1, tpos2; \n"
344 " vec4 tpos = vec4(tx, index_v);\n"
345 " for(int i = 0; i < PIXEL_PER_SIFT; ++i){\n"
346 " buf = texture2DRect(tex2, tpos.yw);\n"
347 " data1 = texture2DRect(tex1, tpos.xz);\n"
348 " val += data1 * buf;\n"
349 " tpos.xy = tpos.xy + vec2(1.0, 1.0);\n"
351 " const float factor = 0.248050689697265625; \n"
352 " gl_FragColor =vec4(dot(val, vec4(factor)), index, 0.0);\n"
356 s_guided_mult = program=
new ProgramGLSL(out.str().c_str());
358 _param_guided_mult_tex1 = glGetUniformLocation(*program,
"tex1");
359 _param_guided_mult_tex2= glGetUniformLocation(*program,
"tex2");
360 _param_guided_mult_texl1 = glGetUniformLocation(*program,
"texL1");
361 _param_guided_mult_texl2 = glGetUniformLocation(*program,
"texL2");
362 _param_guided_mult_h = glGetUniformLocation(*program,
"H");
363 _param_guided_mult_f = glGetUniformLocation(*program,
"F");
364 _param_guided_mult_param = glGetUniformLocation(*program,
"size");
368 out <<
"#define BLOCK_WIDTH 16.0\n"
369 "uniform sampler2DRect tex; uniform vec3 param;\n"
372 " float index = gl_FragCoord.x + floor(gl_FragCoord.y) * BLOCK_WIDTH; \n"
373 " vec2 bestv = vec2(-1.0); float imax = -1.0;\n"
374 " for(float i = 0.0; i < param.x; i ++){\n "
375 " float v = texture2DRect(tex, vec2(i + 0.5, index)).r; \n"
376 " imax = v > bestv.r ? i : imax; \n "
377 " bestv = v > bestv.r? vec2(v, bestv.r) : max(bestv, vec2(v));\n "
379 " bestv = acos(min(bestv, 1.0));\n"
380 " if(bestv.x >= param.y || bestv.x >= param.z * bestv.y) imax = -1.0;\n"
381 " gl_FragColor = vec4(imax, bestv, index);\n"
384 s_row_max = program=
new ProgramGLSL(out.str().c_str());
385 _param_rowmax_param = glGetUniformLocation(*program,
"param");
388 out <<
"#define BLOCK_WIDTH 16.0\n"
389 "uniform sampler2DRect tex; uniform vec3 param;\n"
392 " float index = gl_FragCoord.x + floor(gl_FragCoord.y) * BLOCK_WIDTH; \n"
393 " vec2 bestv = vec2(-1.0); float imax = -1.0;\n"
394 " for(float i = 0.0; i < param.x; i ++){\n "
395 " float v = texture2DRect(tex, vec2(index, i + 0.5)).r; \n"
396 " imax = (v > bestv.r)? i : imax; \n "
397 " bestv = v > bestv.r? vec2(v, bestv.r) : max(bestv, vec2(v));\n "
399 " bestv = acos(min(bestv, 1.0));\n"
400 " if(bestv.x >= param.y || bestv.x >= param.z * bestv.y) imax = -1.0;\n"
401 " gl_FragColor = vec4(imax, bestv, index);\n"
404 s_col_max = program =
new ProgramGLSL(out.str().c_str());
405 _param_colmax_param = glGetUniformLocation(*program,
"param");
411 float distmax,
float ratiomax,
float hdistmax,
float fdistmax,
int mbm)
414 int dw = _num_sift[1];
415 int dh = _num_sift[0];
416 if(_initialized ==0)
return 0;
417 if(dw <= 0 || dh <=0)
return 0;
418 if(_have_loc[0] == 0 || _have_loc[1] == 0)
return 0;
421 glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
428 glActiveTexture(GL_TEXTURE0);
430 glActiveTexture(GL_TEXTURE1);
432 glActiveTexture(GL_TEXTURE2);
434 glActiveTexture(GL_TEXTURE3);
442 float dot_param[4] = {(float)_texDes[0].GetDrawHeight(), (float) _texDes[1].GetDrawHeight(), hdistmax, fdistmax};
443 glUniform1i(_param_guided_mult_tex1, 0);
444 glUniform1i(_param_guided_mult_tex2, 1);
445 glUniform1i(_param_guided_mult_texl1, 2);
446 glUniform1i(_param_guided_mult_texl2, 3);
447 glUniformMatrix3fv(_param_guided_mult_h, 1, GL_TRUE, H);
448 glUniformMatrix3fv(_param_guided_mult_f, 1, GL_TRUE, F);
449 glUniform4fv(_param_guided_mult_param, 1, dot_param);
455 return GetBestMatch(max_match, match_buffer, distmax, ratiomax, mbm);
458 int SiftMatchGL::GetBestMatch(
int max_match, uint32_t match_buffer[][2],
float distmax,
float ratiomax,
int mbm)
461 glActiveTexture(GL_TEXTURE0);
465 sift_buffer.resize(_num_sift[0] + _num_sift[1] + 16);
466 float * buffer1 = &sift_buffer[0], * buffer2 = &sift_buffer[_num_sift[0]];
470 _texMatch[0].
SetImageSize(16, ( _num_sift[0] + 15) / 16);
475 glUniform3f(_param_rowmax_param, (
float)_num_sift[1], distmax, ratiomax);
478 glReadPixels(0, 0, 16, (_num_sift[0] + 15)/16, GL_RED, GL_FLOAT, buffer1);
484 _texMatch[1].
SetImageSize(16, (_num_sift[1] + 15) / 16);
488 glUniform3f(_param_rowmax_param, (
float)_num_sift[0], distmax, ratiomax);
490 glReadPixels(0, 0, 16, (_num_sift[1] + 15) / 16, GL_RED, GL_FLOAT, buffer2);
502 for(
int i = 0; i < _num_sift[0] && nmatch < max_match; ++i)
505 if( j>= 0 && (!mbm ||
int(buffer2[j]) == i))
507 match_buffer[nmatch][0] = i;
508 match_buffer[nmatch][1] = j;
513 const GLenum error_code(glGetError());
514 if (error_code != GL_NO_ERROR) {
523 int dw = _num_sift[1];
524 int dh = _num_sift[0];
525 if(_initialized ==0)
return 0;
526 if(dw <= 0 || dh <=0)
return 0;
529 glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
535 glActiveTexture(GL_TEXTURE0);
537 glActiveTexture(GL_TEXTURE1);
544 float heights[2] = {(float)_texDes[0].GetDrawHeight(), (float)_texDes[1].GetDrawHeight()};
546 glUniform1i(_param_multiply_tex1, 0);
547 glUniform1i(_param_multiply_tex2 , 1);
548 glUniform2fv(_param_multiply_size, 1, heights);
552 glActiveTexture(GL_TEXTURE1);
555 return GetBestMatch(max_match, match_buffer, distmax, ratiomax, mbm);
565 #if SIFTGPU_CUDA_ENABLED
579 #ifdef SIFTGPU_CUDA_ENABLED
595 std::cerr <<
"---------------------------------------------------------------------------\n"
596 <<
"CUDA not supported in this binary! To enable it, please use SiftGPU_CUDA_Enable\n"
597 <<
"Project for VS2005+ or set siftgpu_enable_cuda to 1 in makefile\n"
598 <<
"----------------------------------------------------------------------------\n";
607 std::cout <<
"[SiftMatchGPU]: " << (__language ==
SIFTMATCH_CUDA?
"CUDA" :
"GLSL") <<
"\n\n";
609 __matcher->InitSiftMatch();
622 if(__matcher)
return;
624 #ifdef SIFTGPU_CUDA_ENABLED
632 if(__matcher)
return;
638 const bool success = __matcher->
Allocate(max_sift, mbm);
658 if(__matcher)
delete __matcher;
677 float distmax,
float ratiomax,
float hdistmax,
float fdistmax,
int mutual_best_match)
681 return __matcher->
GetSiftMatch(max_match, match_buffer, distmax, ratiomax, mutual_best_match);
684 float Z[9] = {1, 0, 0, 0, 1, 0, 0, 0, 1}, ti = (1.0e+20F);
687 distmax, ratiomax, H? hdistmax: ti, F? fdistmax: ti, mutual_best_match);
693 return __matcher->
GetSiftMatch(max_match, match_buffer, distmax, ratiomax, mutual_best_match);
filament::Texture::InternalFormat format
SiftMatchGPU * CreateNewSiftMatchGPU(int max_sift)
virtual void InitTexture(int width, int height, int clamp_to_edge=1)
virtual void SetImageSize(int width, int height)
static void DrawQuad(float x1, float x2, float y1, float y2)
static void UnbindMultiTex(int n)
static int _SupportNVFloat
static int _SupportTextureRG
static void CleanupOpenGL()
static void SetDeviceParam(int argc, char **argv)
static void InitGLParam(int NotTargetGL=0)
static int CreateWindowEZ()
virtual int UseProgram()=0
bool Allocate(int max_sift, int mbm) override
SiftMatchGL(int max_sift, int use_glsl)
void SetFeautreLocation(int index, const float *locatoins, int gap)
int GetSiftMatch(int max_match, uint32_t match_buffer[][2], float distmax, float ratiomax, int mbm)
int GetGuidedSiftMatch(int max_match, uint32_t match_buffer[][2], float *H, float *F, float distmax, float ratiomax, float hdistmax, float fdistmax, int mbm)
void SetDescriptors(int index, int num, const unsigned char *descriptor, int id=-1)
void SetMaxSift(int max_sift) override
virtual int _VerifyContextGL()
@ SIFTMATCH_SAME_AS_SIFTGPU
virtual void SetLanguage(int gpu_language)
virtual void SetMaxSift(int max_sift)
SiftMatchGPU(int max_sift=4096)
virtual bool Allocate(int max_sift, int mbm)
virtual int GetGuidedSiftMatch(int max_match, uint32_t match_buffer[][2], float *H, float *F, float distmax=0.7, float ratiomax=0.8, float hdistmax=32, float fdistmax=16, int mutual_best_match=1)
virtual void SetDescriptors(int index, int num, const float *descriptors, int id=-1)
virtual void SetDeviceParam(int argc, char **argv)
virtual int GetSiftMatch(int max_match, uint32_t match_buffer[][2], float distmax=0.7, float ratiomax=0.8, int mutual_best_match=1)
virtual int _CreateContextGL()
virtual void SetFeautreLocation(int index, const float *locations, int gap=0)