ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
ProgramGLSL.cpp
Go to the documentation of this file.
1 // File: ProgramGLSL.cpp
3 // Author: Changchang Wu
4 // Description : GLSL related classes
5 // class ProgramGLSL A simple wrapper of GLSL programs
6 // class ShaderBagGLSL GLSL shaders for SIFT
7 // class FilterGLSL GLSL gaussian filters for SIFT
8 //
9 // Copyright (c) 2007 University of North Carolina at Chapel Hill
10 // All Rights Reserved
11 //
12 // Permission to use, copy, modify and distribute this software and its
13 // documentation for educational, research and non-profit purposes, without
14 // fee, and without a written agreement is hereby granted, provided that the
15 // above copyright notice and the following paragraph appear in all copies.
16 //
17 // The University of North Carolina at Chapel Hill make no representations
18 // about the suitability of this software for any purpose. It is provided
19 // 'as is' without express or implied warranty.
20 //
21 // Please send BUG REPORTS to ccwu@cs.unc.edu
22 //
24 
25 
26 #include "GL/glew.h"
27 #include <string.h>
28 #include <stdio.h>
29 #include <iomanip>
30 #include <iostream>
31 #include <sstream>
32 #include <vector>
33 #include <algorithm>
34 #include <math.h>
35 using namespace std;
36 
37 #include "GlobalUtil.h"
38 #include "ProgramGLSL.h"
39 #include "GLTexImage.h"
40 #include "ShaderMan.h"
41 #include "SiftGPU.h"
42 
43 ProgramGLSL::ShaderObject::ShaderObject(int shadertype, const char * source, int filesource)
44 {
45 
46 
47  _type = shadertype;
48  _compiled = 0;
49 
50 
51  _shaderID = glCreateShader(shadertype);
52  if(_shaderID == 0) return;
53 
54  if(source)
55  {
56 
57  GLint code_length;
58  if(filesource ==0)
59  {
60  const char* code = source;
61  code_length = (GLint) strlen(code);
62  glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
63  }else
64  {
65  char * code;
66  if((code_length= ReadShaderFile(source, code)) ==0) return;
67  glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
68  delete code;
69  }
70 
71  glCompileShader(_shaderID);
72 
73  CheckCompileLog();
74 
75  if(!_compiled) std::cout << source;
76  }
77 
78 
79 
80 
81 }
82 
83 int ProgramGLSL::ShaderObject::ReadShaderFile(const char *sourcefile, char*& code )
84 {
85  code = NULL;
86  FILE * file;
87  int len=0;
88 
89  if(sourcefile == NULL) return 0;
90 
91  file = fopen(sourcefile,"rt");
92  if(file == NULL) return 0;
93 
94 
95  fseek(file, 0, SEEK_END);
96  len = ftell(file);
97  rewind(file);
98  if(len >1)
99  {
100  code = new char[len+1];
101  fread(code, sizeof( char), len, file);
102  code[len] = 0;
103  }else
104  {
105  len = 0;
106  }
107 
108  fclose(file);
109 
110  return len;
111 
112 }
113 
114 void ProgramGLSL::ShaderObject::CheckCompileLog()
115 {
116 
117  GLint status;
118  glGetShaderiv(_shaderID, GL_COMPILE_STATUS, &status);
119  _compiled = (status ==GL_TRUE);
120 
121  if(_compiled == 0) PrintCompileLog(std::cout);
122 
123 
124 }
125 
126 ProgramGLSL::ShaderObject::~ShaderObject()
127 {
128  if(_shaderID) glDeleteShader(_shaderID);
129 
130 }
131 
132 int ProgramGLSL::ShaderObject::IsValidFragmentShader()
133 {
134  return _type == GL_FRAGMENT_SHADER && _shaderID && _compiled;
135 }
136 
137 int ProgramGLSL::ShaderObject::IsValidVertexShader()
138 {
139  return _type == GL_VERTEX_SHADER && _shaderID && _compiled;
140 }
141 
142 
143 void ProgramGLSL::ShaderObject::PrintCompileLog(ostream&os)
144 {
145  GLint len = 0;
146 
147  glGetShaderiv(_shaderID, GL_INFO_LOG_LENGTH , &len);
148  if(len <=1) return;
149 
150  char * compileLog = new char[len+1];
151  if(compileLog == NULL) return;
152 
153  glGetShaderInfoLog(_shaderID, len, &len, compileLog);
154 
155 
156  os<<"Compile Log\n"<<compileLog<<"\n";
157 
158  delete[] compileLog;
159 }
160 
161 
163 {
164  _linked = 0;
165  _TextureParam0 = -1;
166  _programID = glCreateProgram();
167 }
169 {
170  if(_programID)glDeleteProgram(_programID);
171 }
172 void ProgramGLSL::AttachShaderObject(ShaderObject &shader)
173 {
174  if(_programID && shader.IsValidShaderObject())
175  glAttachShader(_programID, shader.GetShaderID());
176 }
177 void ProgramGLSL::DetachShaderObject(ShaderObject &shader)
178 {
179  if(_programID && shader.IsValidShaderObject())
180  glDetachShader(_programID, shader.GetShaderID());
181 }
183 {
184  _linked = 0;
185 
186  if(_programID==0) return 0;
187 
188  glLinkProgram(_programID);
189 
190  CheckLinkLog();
191 
192 // GlobalUtil::StartTimer("100 link test");
193 // for(int i = 0; i<100; i++) glLinkProgram(_programID);
194 // GlobalUtil::StopTimer();
195 
196  return _linked;
197 }
198 
200 {
201  GLint status;
202  glGetProgramiv(_programID, GL_LINK_STATUS, &status);
203 
204  _linked = (status == GL_TRUE);
205 
206 }
207 
208 
210 {
211  if(_programID && _linked)
212  {
214 // glValidateProgram(_programID);
215 // glGetProgramiv(_programID, GL_VALIDATE_STATUS, &status);
216 // return status == GL_TRUE;
217  return 1;
218  }
219  else
220  return 0;
221 }
222 
223 void ProgramGLSL::PrintLinkLog(std::ostream &os)
224 {
225  GLint len = 0;
226 
227  glGetProgramiv(_programID, GL_INFO_LOG_LENGTH , &len);
228  if(len <=1) return;
229 
230  char* linkLog = new char[len+1];
231  if(linkLog == NULL) return;
232 
233  glGetProgramInfoLog(_programID, len, &len, linkLog);
234 
235  linkLog[len] = 0;
236 
237  if(strstr(linkLog, "failed"))
238  {
239  os<<linkLog + (linkLog[0] == ' '? 1:0)<<"\n";
240  _linked = 0;
241  }
242 
243  delete[] linkLog;
244 }
245 
247 {
248  if(ValidateProgram())
249  {
250  glUseProgram(_programID);
251  if (_TextureParam0 >= 0) glUniform1i(_TextureParam0, 0);
252  return true;
253  }
254  else
255  {
256  return false;
257  }
258 }
259 
260 
261 ProgramGLSL::ProgramGLSL(const char *frag_source)
262 {
263  _linked = 0;
264  _programID = glCreateProgram();
265  _TextureParam0 = -1;
266  ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
267 
268  if(shader.IsValidFragmentShader())
269  {
270  AttachShaderObject(shader);
271  LinkProgram();
272 
273  if(!_linked)
274  {
275  //shader.PrintCompileLog(std::cout);
276  PrintLinkLog(std::cout);
277  } else
278  {
279  _TextureParam0 = glGetUniformLocation(_programID, "tex");
280  }
281  }else
282  {
283  _linked = 0;
284  }
285 
286 }
287 
288 /*
289 ProgramGLSL::ProgramGLSL(char*frag_source, char * vert_source)
290 {
291  _used = 0;
292  _linked = 0;
293  _programID = glCreateProgram();
294  ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
295  ShaderObject vertex_shader(GL_VERTEX_SHADER, vert_source);
296  AttachShaderObject(shader);
297  AttachShaderObject(vertex_shader);
298  LinkProgram();
299  if(!_linked)
300  {
301  shader.PrintCompileLog(std::cout);
302  vertex_shader.PrintCompileLog(std::cout);
303  PrintLinkLog(std::cout);
304  std::cout<<vert_source;
305  std::cout<<frag_source;
306  }
307 
308 }
309 */
310 
311 
312 
314 {
315  glLinkProgram(_programID);
316 }
317 
319 {
320  return _linked;
321 }
322 
324 {
325  //pixel inside 3*sigma box
326  int sz = int( ceil( GlobalUtil::_FilterWidthFactor * sigma -0.5) ) ;//
327  int width = 2*sz + 1;
328 
329  //filter size truncation
331  {
332  std::cout<<"Filter size truncated from "<<width<<" to "<<GlobalUtil::_MaxFilterWidth<<endl;
334  width = 2 * sz + 1;
335  }
336 
337  int i;
338  float * kernel = new float[width];
339  float rv = 1.0f/(sigma*sigma);
340  float v, ksum =0;
341 
342  // pre-compute filter
343  for( i = -sz ; i <= sz ; ++i)
344  {
345  kernel[i+sz] = v = exp(-0.5f * i * i *rv) ;
346  ksum += v;
347  }
348 
349  //normalize the kernel
350  rv = 1.0f / ksum;
351  for(i = 0; i< width ;i++) kernel[i]*=rv;
352  //
353 
354  MakeFilterProgram(kernel, width);
355 
356  _size = sz;
357 
358  delete[] kernel;
359  if(GlobalUtil::_verbose && GlobalUtil::_timingL) std::cout<<"Filter: sigma = "<<sigma<<", size = "<<width<<"x"<<width<<endl;
360 }
361 
362 
363 void FilterGLSL::MakeFilterProgram(float kernel[], int width)
364 {
366  {
367  s_shader_h = CreateFilterHPK(kernel, width);
368  s_shader_v = CreateFilterVPK(kernel, width);
369  }else
370  {
371  s_shader_h = CreateFilterH(kernel, width);
372  s_shader_v = CreateFilterV(kernel, width);
373  }
374 }
375 
376 ProgramGPU* FilterGLSL::CreateFilterH(float kernel[], int width)
377 {
378  ostringstream out;
379  out<<setprecision(8);
380 
381  out<< "uniform sampler2DRect tex;";
382  out<< "\nvoid main(void){ float intensity = 0.0 ; vec2 pos;\n";
383 
384  int half_width = width / 2;
385  for(int i = 0; i< width; i++)
386  {
387  if(i == half_width)
388  {
389 
390  out<<"float or = texture2DRect(tex, gl_TexCoord[0].st).r;\n";
391  out<<"intensity+= or * "<<kernel[i]<<";\n";
392  }else
393  {
394  out<<"pos = gl_TexCoord[0].st + vec2(float("<< (i - half_width) <<") , 0);\n";
395  out<<"intensity+= "<<kernel[i]<<"*texture2DRect(tex, pos).r;\n";
396  }
397  }
398 
399  //copy original data to red channel
400  out<<"gl_FragColor.r = or;\n";
401  out<<"gl_FragColor.b = intensity;}\n"<<'\0';
402 
403  return new ProgramGLSL(out.str().c_str());
404 }
405 
406 
407 ProgramGPU* FilterGLSL::CreateFilterV(float kernel[], int height)
408 {
409  ostringstream out;
410  out<<setprecision(8);
411 
412  out<< "uniform sampler2DRect tex;";
413  out<< "\nvoid main(void){ float intensity = 0.0;vec2 pos; \n";
414  int half_height = height / 2;
415  for(int i = 0; i< height; i++)
416  {
417 
418  if(i == half_height)
419  {
420  out<<"vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;\n";
421  out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
422 
423  }else
424  {
425  out<<"pos = gl_TexCoord[0].st + vec2(0, float("<<(i - half_height) <<") );\n";
426  out<<"intensity+= texture2DRect(tex, pos).b * "<<kernel[i]<<";\n";
427  }
428 
429  }
430 
431  out<<"gl_FragColor.b = orb.y;\n";
432  out<<"gl_FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
433  out<<"gl_FragColor.r = intensity;}\n"<<'\0';
434 
435 // std::cout<<buffer<<endl;
436  return new ProgramGLSL(out.str().c_str());
437 }
438 
439 
440 
441 ProgramGPU* FilterGLSL::CreateFilterHPK(float kernel[], int width)
442 {
443  //both h and v are packed...
444  int i, j , xw, xwn;
445 
446  int halfwidth = width >>1;
447  float * pf = kernel + halfwidth;
448  int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up
449  int npixel = (nhpixel<<1)+1;//
450  float weight[3];
451  ostringstream out;;
452  out<<setprecision(8);
453 
454  out<< "uniform sampler2DRect tex;";
455  out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
457  out<<"vec4 pc; vec2 coord; \n";
458  for( i = 0 ; i < npixel ; i++)
459  {
460  out<<"coord = gl_TexCoord[0].xy + vec2(float("<<i-nhpixel<<"),0);\n";
461  out<<"pc=texture2DRect(tex, coord);\n";
462  if(GlobalUtil::_PreciseBorder) out<<"if(coord.x < 0.0) pc = pc.rrbb;\n";
463  //for each sub-pixel j in center, the weight of sub-pixel k
464  xw = (i - nhpixel)*2;
465  for( j = 0; j < 3; j++)
466  {
467  xwn = xw + j -1;
468  weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
469  }
470  if(weight[1] == 0.0)
471  {
472  out<<"result += vec4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
473  }
474  else
475  {
476  out<<"result += vec4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
477  out<<"result += vec4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
478  }
479 
480  }
481  out<<"gl_FragColor = result;}\n"<<'\0';
482 
483  return new ProgramGLSL(out.str().c_str());
484 
485 
486 }
487 
488 
489 ProgramGPU* FilterGLSL::CreateFilterVPK(float kernel[], int height)
490 {
491 
492  //both h and v are packed...
493  int i, j, yw, ywn;
494 
495  int halfh = height >>1;
496  float * pf = kernel + halfh;
497  int nhpixel = (halfh+1)>>1; //how many neighbour pixels need to be looked up
498  int npixel = (nhpixel<<1)+1;//
499  float weight[3];
500  ostringstream out;;
501  out<<setprecision(8);
502 
503  out<< "uniform sampler2DRect tex;";
504  out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
506  out<<"vec4 pc; vec2 coord;\n";
507  for( i = 0 ; i < npixel ; i++)
508  {
509  out<<"coord = gl_TexCoord[0].xy + vec2(0, float("<<i-nhpixel<<"));\n";
510  out<<"pc=texture2DRect(tex, coord);\n";
511  if(GlobalUtil::_PreciseBorder) out<<"if(coord.y < 0.0) pc = pc.rgrg;\n";
512 
513  //for each sub-pixel j in center, the weight of sub-pixel k
514  yw = (i - nhpixel)*2;
515  for( j = 0; j < 3; j++)
516  {
517  ywn = yw + j -1;
518  weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
519  }
520  if(weight[1] == 0.0)
521  {
522  out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
523  }else
524  {
525  out<<"result += vec4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
526  out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
527  }
528  }
529  out<<"gl_FragColor = result;}\n"<<'\0';
530 
531  return new ProgramGLSL(out.str().c_str());
532 }
533 
534 
535 
537 {
538  s_debug = 0;
539  s_orientation = 0;
540  s_display_gaussian = 0;
541  s_display_dog = 0;
542  s_display_grad = 0;
543  s_display_keys = 0;
544  s_sampling = 0;
545  s_grad_pass = 0;
546  s_dog_pass = 0;
547  s_keypoint = 0;
548  s_genlist_init_tight = 0;
549  s_genlist_init_ex = 0;
550  s_genlist_histo = 0;
551  s_genlist_start = 0;
552  s_genlist_step = 0;
553  s_genlist_end = 0;
554  s_vertex_list = 0;
555  s_descriptor_fp = 0;
556  s_margin_copy = 0;
558  f_gaussian_skip0 = NULL;
559  f_gaussian_skip1 = NULL;
560  f_gaussian_step = NULL;
561  _gaussian_step_num = 0;
562 
563 }
564 
566 {
567  if(s_debug)delete s_debug;
568  if(s_orientation)delete s_orientation;
569  if(s_display_gaussian)delete s_display_gaussian;
570  if(s_display_dog)delete s_display_dog;
571  if(s_display_grad)delete s_display_grad;
572  if(s_display_keys)delete s_display_keys;
573  if(s_sampling)delete s_sampling;
574  if(s_grad_pass)delete s_grad_pass;
575  if(s_dog_pass) delete s_dog_pass;
576  if(s_keypoint)delete s_keypoint;
577  if(s_genlist_init_tight)delete s_genlist_init_tight;
578  if(s_genlist_init_ex)delete s_genlist_init_ex;
579  if(s_genlist_histo)delete s_genlist_histo;
580  if(s_genlist_start)delete s_genlist_start;
581  if(s_genlist_step)delete s_genlist_step;
582  if(s_genlist_end)delete s_genlist_end;
583  if(s_vertex_list)delete s_vertex_list;
584  if(s_descriptor_fp)delete s_descriptor_fp;
585  if(s_margin_copy) delete s_margin_copy;
586 
588  if(f_gaussian_skip1) delete f_gaussian_skip1;
589 
590  for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
591  {
592  if(f_gaussian_skip0_v[i]) delete f_gaussian_skip0_v[i];
593  }
594  if(f_gaussian_step && _gaussian_step_num > 0)
595  {
596  for(int i = 0; i< _gaussian_step_num; i++)
597  {
598  delete f_gaussian_step[i];
599  }
600  delete[] f_gaussian_step;
601  }
602 }
603 
604 
606 {
607  float sigma = param.GetInitialSmoothSigma(octave_min);
608  if(sigma == 0)
609  {
610  f_gaussian_skip0 = NULL;
611  }else
612  {
613  for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
614  {
615  if(f_gaussian_skip0_v[i]->_id == octave_min)
616  {
617  f_gaussian_skip0 = f_gaussian_skip0_v[i];
618  return ;
619  }
620  }
621  FilterGLSL * filter = new FilterGLSL(sigma);
622  filter->_id = octave_min;
623  f_gaussian_skip0_v.push_back(filter);
624  f_gaussian_skip0 = filter;
625  }
626 }
627 
629 {
630  if(param._sigma_skip0>0.0f)
631  {
632  FilterGLSL * filter;
633  f_gaussian_skip0 = filter = new FilterGLSL(param._sigma_skip0);
635  f_gaussian_skip0_v.push_back(filter);
636  }
637  if(param._sigma_skip1>0.0f)
638  {
639  f_gaussian_skip1 = new FilterGLSL(param._sigma_skip1);
640  }
641 
642  f_gaussian_step = new FilterProgram*[param._sigma_num];
643  for(int i = 0; i< param._sigma_num; i++)
644  {
645  f_gaussian_step[i] = new FilterGLSL(param._sigma[i]);
646  }
647  _gaussian_step_num = param._sigma_num;
648 }
649 
650 
652 {
653  LoadKeypointShader(param._dog_threshold, param._edge_threshold);
654  LoadGenListShader(param._dog_level_num, 0);
655  CreateGaussianFilters(param);
656 }
657 
658 
660 {
661 
662 
663  s_gray = new ProgramGLSL(
664  "uniform sampler2DRect tex; void main(void){\n"
665  "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex, gl_TexCoord[0].st ).rgb);\n"
666  "gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}");
667 
668 
669  s_debug = new ProgramGLSL( "void main(void){gl_FragColor.rg = gl_TexCoord[0].st;}");
670 
671 
672  s_sampling = new ProgramGLSL(
673  "uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}");
674 
675  //
676  s_grad_pass = new ProgramGLSL(
677  "uniform sampler2DRect tex; void main ()\n"
678  "{\n"
679  " vec4 v1, v2, gg;\n"
680  " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
681  " gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;\n"
682  " gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;\n"
683  " gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;\n"
684  " gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;\n"
685  " vec2 dxdy = (gg.yw - gg.xz); \n"
686  " float grad = 0.5*length(dxdy);\n"
687  " float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
688  " gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
689  "}\n\0");
690 
691  ProgramGLSL * program;
692  s_margin_copy = program = new ProgramGLSL(
693  "uniform sampler2DRect tex; uniform vec2 truncate;\n"
694  "void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }");
695 
696  _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
697 
698 
700  LoadOrientationShader();
701 
702  if(s_orientation == NULL)
703  {
704  //Load a simplified version if the right version is not supported
705  s_orientation = program = new ProgramGLSL(
706  "uniform sampler2DRect tex; uniform sampler2DRect oTex;\n"
707  " uniform float size; void main(){\n"
708  " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
709  " vec4 oo = texture2DRect(oTex, cc.rg);\n"
710  " gl_FragColor.rg = cc.rg;\n"
711  " gl_FragColor.b = oo.a;\n"
712  " gl_FragColor.a = size;}");
713 
714  _param_orientation_gtex = glGetUniformLocation(*program, "oTex");
715  _param_orientation_size = glGetUniformLocation(*program, "size");
718  std::cerr<<"Orientation simplified on this hardware"<<endl;
719  }
720 
721  if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
722  if(s_descriptor_fp == NULL)
723  {
725  std::cerr<<"Descriptor ignored on this hardware"<<endl;
726  }
727 
728  s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
729 }
730 
731 
733 {
734  s_copy_key = new ProgramGLSL(
735  "uniform sampler2DRect tex; void main(){\n"
736  "gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0); }");
737 
738 
739  ProgramGLSL * program;
740  s_vertex_list = program = new ProgramGLSL(
741  "uniform vec4 sizes; uniform sampler2DRect tex;\n"
742  "void main(void){\n"
743  "float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w; \n"
744  "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
745  "float px = mod(index, twidth);\n"
746  "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
747  "vec4 cc = texture2DRect(tex, tpos );\n"
748  "float size = 3.0 * cc.a; //sizes.x;// \n"
749  "gl_FragColor.zw = vec2(0.0, 1.0);\n"
750  "if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy; }\n"
751  "else {float type = fract(px);\n"
752  "vec2 dxy = vec2(0); \n"
753  "dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
754  "dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
755  "float s = sin(cc.b); float c = cos(cc.b); \n"
756  "gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
757  "gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n}\n");
758 
759  _param_genvbo_size = glGetUniformLocation(*program, "sizes");
760 
761  s_display_gaussian = new ProgramGLSL(
762  "uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;\n"
763  "gl_FragColor = vec4(r, r, r, 1);}" );
764 
765  s_display_dog = new ProgramGLSL(
766  "uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);\n"
767  "gl_FragColor = vec4(g, g, g, 0.0);}" );
768 
769  s_display_grad = new ProgramGLSL(
770  "uniform sampler2DRect tex; void main(void){\n"
771  " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}");
772 
773  s_display_keys= new ProgramGLSL(
774  "uniform sampler2DRect tex; void main(void){\n"
775  " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
776  " if(cc.r ==0.0) discard; gl_FragColor = (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}");
777 }
778 
779 void ShaderBagGLSL::LoadKeypointShader(float threshold, float edge_threshold)
780 {
781  float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
782  float threshold1 = threshold;
783  float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
784  ostringstream out;;
785  streampos pos;
786 
787  //tex(X)(Y)
788  //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
789  //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
791  {
792  out << "#define THRESHOLD0 (" << threshold0 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
793  "#define THRESHOLD1 (" << threshold1 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
794  "#define THRESHOLD2 " << threshold2 << "\n";
795  }else
796  {
797  out << "#define THRESHOLD0 " << threshold0 << "\n"
798  "#define THRESHOLD1 " << threshold1 << "\n"
799  "#define THRESHOLD2 " << threshold2 << "\n";
800  }
801 
802  out<<
803  "uniform sampler2DRect tex, texU, texD; void main ()\n"
804  "{\n"
805  " vec4 v1, v2, gg, temp;\n"
806  " vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
807  " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
808  " temp = texture2DRect(tex, gl_TexCoord[1].xy);\n"
809  " v1.x = temp.g; gg.x = temp.r;\n"
810  " temp = texture2DRect(tex, gl_TexCoord[2].xy) ;\n"
811  " v1.y = temp.g; gg.y = temp.r;\n"
812  " temp = texture2DRect(tex, gl_TexCoord[3].xy) ;\n"
813  " v1.z = temp.g; gg.z = temp.r;\n"
814  " temp = texture2DRect(tex, gl_TexCoord[4].xy) ;\n"
815  " v1.w = temp.g; gg.w = temp.r;\n"
816  " v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;\n"
817  " v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;\n"
818  " v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;\n"
819  " v2.w = texture2DRect(tex, TexRU.xy).g;\n"
820  " vec2 dxdy = (gg.yw - gg.xz); \n"
821  " float grad = 0.5*length(dxdy);\n"
822  " float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
823  " gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
824 
825  //test against 8 neighbours
826  //use variable to identify type of extremum
827  //1.0 for local maximum and 0.5 for minimum
828  <<
829  " float dog = 0.0; \n"
830  " gl_FragData[1] = vec4(0, 0, 0, 0); \n"
831  " dog = cc.g > float(THRESHOLD0) && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;\n"
832  " dog = cc.g < float(-THRESHOLD0) && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;\n"
833  " if(dog == 0.0) return;\n";
834 
835  pos = out.tellp();
836  //do edge supression first..
837  //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
838  //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
839 
840  out<<
841  " float fxx, fyy, fxy; \n"
842  " vec4 D2 = v1.xyzw - cc.gggg;\n"
843  " vec2 D4 = v2.xw - v2.yz;\n"
844  " fxx = D2.x + D2.y;\n"
845  " fyy = D2.z + D2.w;\n"
846  " fxy = 0.25*(D4.x + D4.y);\n"
847  " float fxx_plus_fyy = fxx + fyy;\n"
848  " float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
849  " float score_down = (fxx*fyy - fxy*fxy);\n"
850  " if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)return;\n";
851 
852  //...
853  out<<" \n"
854  " vec2 D5 = 0.5*(v1.yw-v1.xz); \n"
855  " float fx = D5.x, fy = D5.y ; \n"
856  " float fs, fss , fxs, fys ; \n"
857  " vec2 v3; vec4 v4, v5, v6;\n"
858  //read 9 pixels of upper level
859  <<
860  " v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;\n"
861  " v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;\n"
862  " v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;\n"
863  " v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;\n"
864  " v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;\n"
865  " v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;\n"
866  " v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;\n"
867  " v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;\n"
868  " v6.w = texture2DRect(texU, TexRU.xy).g;\n"
869  //compare with 9 pixels of upper level
870  //read and compare with 9 pixels of lower level
871  //the maximum case
872  <<
873  " if(dog == 1.0)\n"
874  " {\n"
875  " if(cc.g < v3.x || any(lessThan(cc.gggg, v4)) ||any(lessThan(cc.gggg, v6)))return; \n"
876  " v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
877  " v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
878  " v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
879  " v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
880  " v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
881  " v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
882  " v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
883  " v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
884  " v6.w = texture2DRect(texD, TexRU.xy).g;\n"
885  " if(cc.g < v3.y || any(lessThan(cc.gggg, v5)) ||any(lessThan(cc.gggg, v6)))return; \n"
886  " }\n"
887  //the minimum case
888  <<
889  " else{\n"
890  " if(cc.g > v3.x || any(greaterThan(cc.gggg, v4)) ||any(greaterThan(cc.gggg, v6)))return; \n"
891  " v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
892  " v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
893  " v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
894  " v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
895  " v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
896  " v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
897  " v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
898  " v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
899  " v6.w = texture2DRect(texD, TexRU.xy).g;\n"
900  " if(cc.g > v3.y || any(greaterThan(cc.gggg, v5)) ||any(greaterThan(cc.gggg, v6)))return; \n"
901  " }\n";
902 
904 
905  // sub-pixel localization FragData1 = vec4(dog, 0, 0, 0); return;
906  out <<
907  " fs = 0.5*( v3.x - v3.y ); \n"
908  " fss = v3.x + v3.y - cc.g - cc.g;\n"
909  " fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
910  " fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
911 
912  //
913  // let dog difference be quatratic function of dx, dy, ds;
914  // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds +
915  // + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
916  // + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
917  // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
918 
919  //the local extremum satisfies
920  // df/dx = 0, df/dy = 0, df/dz = 0
921 
922  //that is
923  // |-fx| | fxx fxy fxs | |dx|
924  // |-fy| = | fxy fyy fys | * |dy|
925  // |-fs| | fxs fys fss | |ds|
926  // need to solve dx, dy, ds
927 
928  // Use Gauss elimination to solve the linear system
929  <<
930  " vec3 dxys = vec3(0.0); \n"
931  " vec4 A0, A1, A2 ; \n"
932  " A0 = vec4(fxx, fxy, fxs, -fx); \n"
933  " A1 = vec4(fxy, fyy, fys, -fy); \n"
934  " A2 = vec4(fxs, fys, fss, -fs); \n"
935  " vec3 x3 = abs(vec3(fxx, fxy, fxs)); \n"
936  " float maxa = max(max(x3.x, x3.y), x3.z); \n"
937  " if(maxa >= 1e-10 ) { \n"
938  " if(x3.y ==maxa ) \n"
939  " { \n"
940  " vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
941  " }else if( x3.z == maxa ) \n"
942  " { \n"
943  " vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
944  " } \n"
945  " A0 /= A0.x; \n"
946  " A1 -= A1.x * A0; \n"
947  " A2 -= A2.x * A0; \n"
948  " vec2 x2 = abs(vec2(A1.y, A2.y)); \n"
949  " if( x2.y > x2.x ) \n"
950  " { \n"
951  " vec3 TEMP = A2.yzw; \n"
952  " A2.yzw = A1.yzw; \n"
953  " A1.yzw = TEMP; \n"
954  " x2.x = x2.y; \n"
955  " } \n"
956  " if(x2.x >= 1e-10) { \n"
957  " A1.yzw /= A1.y; \n"
958  " A2.yzw -= A2.y * A1.yzw; \n"
959  " if(abs(A2.z) >= 1e-10) { \n"
960  // compute dx, dy, ds:
961  <<
962  " \n"
963  " dxys.z = A2.w /A2.z; \n"
964  " dxys.y = A1.w - dxys.z*A1.z; \n"
965  " dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y; \n"
966 
967  //one more threshold which I forgot in versions prior to 286
968  <<
969  " bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= float(THRESHOLD1)) ;\n"
970  " if(dog_test || any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0.0;\n"
971  " }\n"
972  " }\n"
973  " }\n"
974  //keep the point when the offset is less than 1
975  <<
976  " gl_FragData[1] = vec4( dog, dxys); \n";
977  else
978 
979  out<<
980  " gl_FragData[1] = vec4( dog, 0.0, 0.0, 0.0) ; \n";
981 
982  out<<
983  "}\n" <<'\0';
984 
985 
986 
987  ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
988  if(program->IsNative())
989  {
990  s_keypoint = program ;
991  //parameter
992  }else
993  {
994  delete program;
995  out.seekp(pos);
996  out <<
997  " gl_FragData[1] = vec4(dog, 0.0, 0.0, 0.0) ; \n"
998  "}\n" <<'\0';
999  s_keypoint = program = new ProgramGLSL(out.str().c_str());
1001  std::cerr<<"Detection simplified on this hardware"<<endl;
1002  }
1003 
1004  _param_dog_texu = glGetUniformLocation(*program, "texU");
1005  _param_dog_texd = glGetUniformLocation(*program, "texD");
1006 }
1007 
1008 
1009 void ShaderBagGLSL::SetDogTexParam(int texU, int texD)
1010 {
1011  glUniform1i(_param_dog_texu, 1);
1012  glUniform1i(_param_dog_texd, 2);
1013 }
1014 
1015 void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0)
1016 {
1017  glUniform1i(_param_genlist_step_tex0, 1);
1018 }
1019 void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth, float size)
1020 {
1021  float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
1022  glUniform4fv(_param_genvbo_size, 1, sizes);
1023 
1024 }
1025 
1026 
1027 
1029 {
1030  glUseProgram(0);
1031 }
1032 
1033 
1034 
1035 void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev)
1036 {
1037  ProgramGLSL * program;
1038 
1039  s_genlist_init_tight = new ProgramGLSL(
1040  "uniform sampler2DRect tex; void main (void){\n"
1041  "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1042  "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1043  "gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"
1044  "}");
1045 
1046 
1047  s_genlist_init_ex = program = new ProgramGLSL(
1048  "uniform sampler2DRect tex;uniform vec2 bbox;\n"
1049  "void main (void ){\n"
1050  "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1051  "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1052  "bvec4 helper2 = bvec4( \n"
1053  "all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0.0,\n"
1054  "all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0.0,\n"
1055  "all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0.0,\n"
1056  "all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0.0);\n"
1057  "gl_FragColor = vec4(helper2);\n"
1058  "}");
1059  _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1060 
1061 
1062  //reduction ...
1063  s_genlist_histo = new ProgramGLSL(
1064  "uniform sampler2DRect tex; void main (void){\n"
1065  "vec4 helper; vec4 helper2; \n"
1066  "helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
1067  "helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
1068  "gl_FragColor.rg = helper2.xz + helper2.yw;\n"
1069  "helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
1070  "helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
1071  "gl_FragColor.ba= helper2.xz+helper2.yw;\n"
1072  "}");
1073 
1074 
1075  //read of the first part, which generates tex coordinates
1076  s_genlist_start= program = LoadGenListStepShader(1, 1);
1077  _param_ftex_width= glGetUniformLocation(*program, "width");
1078  _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
1079  //stepping
1080  s_genlist_step = program = LoadGenListStepShader(0, 1);
1081  _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
1082 
1083 }
1084 
1085 void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax)
1086 {
1087  float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
1088  glUniform2fv(_param_margin_copy_truncate, 1, truncate);
1089 }
1090 
1092 {
1093  float bbox[2] = {w - 1.0f, h - 1.0f};
1094  glUniform2fv(_param_genlist_init_bbox, 1, bbox);
1095 }
1097 {
1098  glUniform1f(_param_ftex_width, width);
1099  glUniform1i(_param_genlist_start_tex0, 0);
1100 }
1101 
1102 
1104 {
1105  int i;
1106  // char chanels[5] = "rgba";
1107  ostringstream out;
1108 
1109  for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n";
1110  if(start)
1111  {
1112  out<<"uniform float width;\n";
1113  out<<"void main(void){\n";
1114  out<<"float index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";
1115  out<<"vec2 pos = vec2(0.5, 0.5);\n";
1116  }else
1117  {
1118  out<<"uniform sampler2DRect tex;\n";
1119  out<<"void main(void){\n";
1120  out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";
1121  out<<"vec2 pos = tc.rg; float index = tc.b;\n";
1122  }
1123  out<<"vec2 sum; vec4 cc;\n";
1124 
1125 
1126  if(step>0)
1127  {
1128  out<<"vec2 cpos = vec2(-0.5, 0.5);\t vec2 opos;\n";
1129  for(i = 0; i < step; i++)
1130  {
1131 
1132  out<<"cc = texture2DRect(tex"<<i<<", pos);\n";
1133  out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b; \n";
1134  out<<"if (index <cc.r){ opos = cpos.xx;}\n";
1135  out<<"else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
1136  out<<"else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
1137  out<<"else {opos = cpos.yy; index -= sum.y;}\n";
1138  out<<"pos = (pos + pos + opos);\n";
1139  }
1140  }
1141  out<<"gl_FragColor = vec4(pos, index, 1.0);\n";
1142  out<<"}\n"<<'\0';
1143  return new ProgramGLSL(out.str().c_str());
1144 }
1145 
1146 
1148 {
1149  ostringstream out;
1150 
1152  {
1153  out << "#pragma optionNV(ifcvt none)\n"
1154  "#pragma optionNV(unroll all)\n";
1155  }
1156 
1157  out<<"\n"
1158  "#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
1159  "#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1160  "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1161  "uniform sampler2DRect tex; \n"
1162  "uniform sampler2DRect gradTex; \n"
1163  "uniform vec4 size; \n"
1164  << ((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)? " uniform sampler2DRect texS; \n" : " ") <<
1165  "void main() \n"
1166  "{ \n"
1167  " vec4 bins[10]; \n"
1168  " bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"
1169  " bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"
1170  " bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"
1171  " vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1172  " vec2 pos = loc.xy; \n"
1173  " bool orientation_mode = (size.z != 0.0); \n"
1174  " float sigma = orientation_mode? abs(size.z) : loc.w; \n";
1176  {
1177  out<<
1178  " if(orientation_mode){\n"
1179  " vec4 offset = texture2DRect(texS, pos);\n"
1180  " pos.xy = pos.xy + offset.yz; \n"
1181  " sigma = sigma * pow(size.w, offset.w);\n"
1182  " #if "<< GlobalUtil::_KeepExtremumSign << "\n"
1183  " if(offset.x < 0.6) sigma = -sigma; \n"
1184  " #endif\n"
1185  " }\n";
1186  }
1187  out<<
1188  " //bool fixed_orientation = (size.z < 0.0); \n"
1189  " if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
1190  " float gsigma = sigma * GAUSSIAN_WF; \n"
1191  " vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF))) ; \n"
1192  " vec2 dim = size.xy; \n"
1193  " float dist_threshold = win.x*win.x+0.5; \n"
1194  " float factor = -0.5/(gsigma*gsigma); \n"
1195  " vec4 sz; vec2 spos; \n"
1196  " //if(any(pos.xy <= 1)) discard; \n"
1197  " sz.xy = max( pos - win, vec2(1,1)); \n"
1198  " sz.zw = min( pos + win, dim-vec2(2, 2)); \n"
1199  " sz = floor(sz)+0.5;";
1200  //loop to get the histogram
1201 
1202  out<<"\n"
1203  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1204  " { \n"
1205  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1206  " { \n"
1207  " vec2 offset = spos - pos; \n"
1208  " float sq_dist = dot(offset,offset); \n"
1209  " if( sq_dist < dist_threshold){ \n"
1210  " vec4 cc = texture2DRect(gradTex, spos); \n"
1211  " float grad = cc.b; float theta = cc.a; \n"
1212  " float idx = floor(degrees(theta)*0.1); \n"
1213  " if(idx < 0.0 ) idx += 36.0; \n"
1214  " float weight = grad*exp(sq_dist * factor); \n"
1215  " float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ; \n"
1216  " vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";
1217 
1219  {
1220  //dynamic indexing may not be faster
1221  out<<"\n"
1222  " int iidx = int((idx*0.25)); \n"
1223  " bins[iidx]+=inc; \n"
1224  " } \n"
1225  " } \n"
1226  " }";
1227 
1228  }else
1229  {
1230  //nvfp40 still does not support dynamic array indexing
1231  //unrolled binary search...
1232  out<<"\n"
1233  " if(idx < 16.0) \n"
1234  " { \n"
1235  " if(idx < 8.0) \n"
1236  " { \n"
1237  " if(idx < 4.0) { bins[0]+=inc;} \n"
1238  " else { bins[1]+=inc;} \n"
1239  " }else \n"
1240  " { \n"
1241  " if(idx < 12.0){ bins[2]+=inc;} \n"
1242  " else { bins[3]+=inc;} \n"
1243  " } \n"
1244  " }else if(idx < 32.0) \n"
1245  " { \n"
1246  " if(idx < 24.0) \n"
1247  " { \n"
1248  " if(idx <20.0) { bins[4]+=inc;} \n"
1249  " else { bins[5]+=inc;} \n"
1250  " }else \n"
1251  " { \n"
1252  " if(idx < 28.0){ bins[6]+=inc;} \n"
1253  " else { bins[7]+=inc;} \n"
1254  " } \n"
1255  " }else \n"
1256  " { \n"
1257  " bins[8]+=inc; \n"
1258  " } \n"
1259  " } \n"
1260  " } \n"
1261  " }";
1262 
1263  }
1264 
1265  WriteOrientationCodeToStream(out);
1266 
1267  ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1268  if(program->IsNative())
1269  {
1270  s_orientation = program ;
1271  _param_orientation_gtex = glGetUniformLocation(*program, "gradTex");
1272  _param_orientation_size = glGetUniformLocation(*program, "size");
1273  _param_orientation_stex = glGetUniformLocation(*program, "texS");
1274  }else
1275  {
1276  delete program;
1277  }
1278 }
1279 
1280 
1282 {
1283  //smooth histogram and find the largest
1284 /*
1285  smoothing kernel: (1 3 6 7 6 3 1 )/27
1286  the same as 3 pass of (1 1 1)/3 averaging
1287  maybe better to use 4 pass on the vectors...
1288 */
1289 
1290 
1291  //the inner loop on different array numbers is always unrolled in fp40
1292 
1293  //bug fixed here:)
1294  out<<"\n"
1295  " //mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0; \n"
1296  " mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0; \n"
1297  " mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"
1298  " #define FILTER_CODE(i) { \\\n"
1299  " vec4 newb = (bins[i]* m2); \\\n"
1300  " newb.xyz += ( prev.yzw * m1); \\\n"
1301  " prev = bins[i]; \\\n"
1302  " newb.wzy += ( bins[i+1].zyx *m1); \\\n"
1303  " bins[i] = newb;}\n"
1304  " for (int j=0; j<2; j++) \n"
1305  " { \n"
1306  " vec4 prev = bins[8]; \n"
1307  " bins[9] = bins[0]; \n";
1308 
1310  {
1311  out<<
1312  " for (int i=0; i<9; i++) \n"
1313  " { \n"
1314  " FILTER_CODE(i); \n"
1315  " } \n"
1316  " }";
1317 
1318  }else
1319  {
1320  //manually unroll the loop for ATI.
1321  out <<
1322  " FILTER_CODE(0);\n"
1323  " FILTER_CODE(1);\n"
1324  " FILTER_CODE(2);\n"
1325  " FILTER_CODE(3);\n"
1326  " FILTER_CODE(4);\n"
1327  " FILTER_CODE(5);\n"
1328  " FILTER_CODE(6);\n"
1329  " FILTER_CODE(7);\n"
1330  " FILTER_CODE(8);\n"
1331  " }\n";
1332  }
1333  //find the maximum voting
1334  out<<"\n"
1335  " vec4 maxh; vec2 maxh2; \n"
1336  " vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"
1337  " bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"
1338  " maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";
1339 
1340  std::string testpeak_code;
1341  std::string savepeak_code;
1342 
1343  //save two/three/four orientations with the largest votings?
1344 
1346  {
1347  out<<"\n"
1348  " vec4 Orientations = vec4(0.0, 0.0, 0.0, 0.0); \n"
1349  " vec4 weights = vec4(0.0,0.0,0.0,0.0); ";
1350 
1351  testpeak_code = "\\\n"
1352  " {test = greaterThan(bins[i], hh);";
1353 
1354  //save the orientations in weight-decreasing order
1356  {
1357  savepeak_code = "\\\n"
1358  " if(weight <=weights.g){}\\\n"
1359  " else if(weight >weights.r)\\\n"
1360  " {weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"
1361  " else {weights.g = weight; Orientations.g = th;}";
1362  }else if(GlobalUtil::_MaxOrientation ==3)
1363  {
1364  savepeak_code = "\\\n"
1365  " if(weight <=weights.b){}\\\n"
1366  " else if(weight >weights.r)\\\n"
1367  " {weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"
1368  " else if(weight >weights.g)\\\n"
1369  " {weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"
1370  " else {weights.b = weight; Orientations.b = th;}";
1371  }else
1372  {
1373  savepeak_code = "\\\n"
1374  " if(weight <=weights.a){}\\\n"
1375  " else if(weight >weights.r)\\\n"
1376  " {weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"
1377  " else if(weight >weights.g)\\\n"
1378  " {weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"
1379  " else if(weight >weights.b)\\\n"
1380  " {weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"
1381  " else {weights.a = weight; Orientations.a = th;}";
1382  }
1383 
1384  }else
1385  {
1386  out<<"\n"
1387  " float Orientation; ";
1388  testpeak_code ="\\\n"
1389  " if(npeaks<=0.0){\\\n"
1390  " test = equal(bins[i], maxh) ;";
1391  savepeak_code="\\\n"
1392  " npeaks++; \\\n"
1393  " Orientation = th;";
1394 
1395  }
1396  //find the peaks
1397  out <<"\n"
1398  " #define FINDPEAK(i, k)" <<testpeak_code<<"\\\n"
1399  " if( any ( test) ) \\\n"
1400  " { \\\n"
1401  " if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y ) \\\n"
1402  " { \\\n"
1403  " float di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \\\n"
1404  " float th = (k+di+0.5); float weight = bins[i].x;"
1405  <<savepeak_code<<"\\\n"
1406  " }\\\n"
1407  " else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) ) \\\n"
1408  " { \\\n"
1409  " float di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \\\n"
1410  " float th = (k+di+1.5); float weight = bins[i].y; "
1411  <<savepeak_code<<" \\\n"
1412  " }\\\n"
1413  " if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) ) \\\n"
1414  " { \\\n"
1415  " float di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \\\n"
1416  " float th = (k+di+2.5); float weight = bins[i].z; "
1417  <<savepeak_code<<" \\\n"
1418  " }\\\n"
1419  " else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x ) \\\n"
1420  " { \\\n"
1421  " float di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \\\n"
1422  " float th = (k+di+3.5); float weight = bins[i].w; "
1423  <<savepeak_code<<" \\\n"
1424  " }\\\n"
1425  " }}\\\n"
1426  " prevb = bins[i].w;";
1427  //the following loop will be unrolled anyway in fp40,
1428  //taking more than 1000 instrucsions..
1429  //....
1431  {
1432  out<<"\n"
1433  " vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test; \n"
1434  " bins[9] = bins[0]; \n"
1435  " float npeaks = 0.0, k = 0.0; \n"
1436  " float prevb = bins[8].w; \n"
1437  " for (int i = 0; i < 9; i++) \n"
1438  " {\n"
1439  " FINDPEAK(i, k);\n"
1440  " k = k + 4.0; \n"
1441  " }";
1442  }else
1443  {
1444  //loop unroll for ATI.
1445  out <<"\n"
1446  " vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"
1447  " bins[9] = bins[0]; \n"
1448  " float npeaks = 0.0; \n"
1449  " float prevb = bins[8].w; \n"
1450  " FINDPEAK(0, 0.0);\n"
1451  " FINDPEAK(1, 4.0);\n"
1452  " FINDPEAK(2, 8.0);\n"
1453  " FINDPEAK(3, 12.0);\n"
1454  " FINDPEAK(4, 16.0);\n"
1455  " FINDPEAK(5, 20.0);\n"
1456  " FINDPEAK(6, 24.0);\n"
1457  " FINDPEAK(7, 28.0);\n"
1458  " FINDPEAK(8, 32.0);\n";
1459  }
1460  //WRITE output
1462  {
1463  out<<"\n"
1464  " if(orientation_mode){\n"
1465  " npeaks = dot(vec4(1,1,"
1466  <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
1467  <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n"
1468  " gl_FragData[0] = vec4(pos, npeaks, sigma);\n"
1469  " gl_FragData[1] = radians((Orientations )*10.0);\n"
1470  " }else{\n"
1471  " gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"
1472  " }\n";
1473  }else
1474  {
1475  out<<"\n"
1476  " gl_FragData[0] = vec4(pos, radians((Orientation)*10.0), sigma);\n";
1477  }
1478  //end
1479  out<<"\n"
1480  "}\n"<<'\0';
1481 
1482 
1483 }
1484 
1485 void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
1486 {
1487  glUniform1i(_param_orientation_gtex, 1);
1488  glUniform1f(_param_orientation_size, sigma);
1489 }
1490 
1491 
1492 
1493 
1494 void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
1495 {
1497  glUniform1i(_param_orientation_gtex, 1);
1498 
1500  {
1501  //specify texutre for subpixel subscale localization
1502  glUniform1i(_param_orientation_stex, 2);
1503  }
1504 
1505  float size[4];
1506  size[0] = (float)width;
1507  size[1] = (float)height;
1508  size[2] = sigma;
1509  size[3] = step;
1510  glUniform4fv(_param_orientation_size, 1, size);
1511 }
1512 
1513 
1515 {
1516  //one shader outpout 128/8 = 16 , each fragout encodes 4
1517  //const double twopi = 2.0*3.14159265358979323846;
1518  //const double rpi = 8.0/twopi;
1519  ostringstream out;
1520  out<<setprecision(8);
1521 
1522  out<<"\n"
1523  "#define M_PI 3.14159265358979323846\n"
1524  "#define TWO_PI (2.0*M_PI)\n"
1525  "#define RPI 1.2732395447351626861510701069801\n"
1526  "#define WF size.z\n"
1527  "uniform sampler2DRect tex; \n"
1528  "uniform sampler2DRect gradTex; \n"
1529  "uniform vec4 dsize; \n"
1530  "uniform vec3 size; \n"
1531  "void main() \n"
1532  "{\n"
1533  " vec2 dim = size.xy; //image size \n"
1534  " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
1535  " float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5)); \n"
1536  " index = floor(index*0.125) + 0.49; \n"
1537  " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
1538  " vec2 pos = texture2DRect(tex, coord).xy; \n"
1539  " if(any(lessThanEqual(pos.xy, vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard; \n"
1540  " { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
1541  " float anglef = texture2DRect(tex, coord).z;\n"
1542  " if(anglef > M_PI) anglef -= TWO_PI;\n"
1543  " float sigma = texture2DRect(tex, coord).w; \n"
1544  " float spt = abs(sigma * WF); //default to be 3*sigma \n";
1545 
1546  //rotation
1547  out<<
1548  " vec4 cscs, rots; \n"
1549  " cscs.y = sin(anglef); cscs.x = cos(anglef); \n"
1550  " cscs.zw = - cscs.xy; \n"
1551  " rots = cscs /spt; \n"
1552  " cscs *= spt; \n";
1553 
1554  //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
1555  //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
1556  //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
1557  //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
1558 
1559  out<<
1560  "vec4 temp; vec2 pt, offsetpt; \n"
1561  " /*the fraction part of idx is .5*/ \n"
1562  " offsetpt.x = 4.0* fract(idx*0.25) - 2.0; \n"
1563  " offsetpt.y = floor(idx*0.25) - 1.5; \n"
1564  " temp = cscs.xwyx*offsetpt.xyxy; \n"
1565  " pt = pos + temp.xz + temp.yw; \n";
1566 
1567  //get a horizontal bounding box of the rotated rectangle
1568  out<<
1569  " vec2 bwin = abs(cscs.xy); \n"
1570  " float bsz = bwin.x + bwin.y; \n"
1571  " vec4 sz; \n"
1572  " sz.xy = max(pt - vec2(bsz), vec2(1,1));\n"
1573  " sz.zw = min(pt + vec2(bsz), dim - vec2(2, 2)); \n"
1574  " sz = floor(sz)+0.5;"; //move sample point to pixel center
1575  //get voting for two box
1576 
1577  out<<"\n"
1578  " vec4 DA, DB; vec2 spos; \n"
1579  " DA = DB = vec4(0.0, 0.0, 0.0, 0.0); \n"
1580  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1581  " { \n"
1582  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1583  " { \n"
1584  " vec2 diff = spos - pt; \n"
1585  " temp = rots.xywx * diff.xyxy;\n"
1586  " vec2 nxy = (temp.xz + temp.yw); \n"
1587  " vec2 nxyn = abs(nxy); \n"
1588  " if(all( lessThan(nxyn, vec2(1.0)) ))\n"
1589  " {\n"
1590  " vec4 cc = texture2DRect(gradTex, spos); \n"
1591  " float mod = cc.b; float angle = cc.a; \n"
1592  " float theta0 = RPI * (anglef - angle); \n"
1593  " float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"
1594  " diff = nxy + offsetpt.xy; \n"
1595  " float ww = exp(-0.125*dot(diff, diff));\n"
1596  " vec2 weights = vec2(1) - nxyn;\n"
1597  " float weight = weights.x * weights.y *mod*ww; \n"
1598  " float theta1 = floor(theta); \n"
1599  " float weight2 = (theta - theta1) * weight;\n"
1600  " float weight1 = weight - weight2;\n"
1601  " DA += vec4(equal(vec4(theta1), vec4(0, 1, 2, 3)))*weight1;\n"
1602  " DA += vec4(equal(vec4(theta1), vec4(7, 0, 1, 2)))*weight2; \n"
1603  " DB += vec4(equal(vec4(theta1), vec4(4, 5, 6, 7)))*weight1;\n"
1604  " DB += vec4(equal(vec4(theta1), vec4(3, 4, 5, 6)))*weight2; \n"
1605  " }\n"
1606  " }\n"
1607  " }\n";
1608 
1609  out<<
1610  " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
1611  "}\n"<<'\0';
1612 
1613  ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1614 
1615  if(program->IsNative())
1616  {
1617  s_descriptor_fp = program ;
1618  _param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");
1619  _param_descriptor_size = glGetUniformLocation(*program, "size");
1620  _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
1621  }else
1622  {
1623  delete program;
1624  }
1625 
1626 
1627 }
1628 
1630 {
1632  LoadDescriptorShaderF2();
1633 }
1634 
1635 
1636 void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
1637 {
1639  glUniform1i(_param_descriptor_gtex, 1);
1640 
1641  float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
1642  glUniform4fv(_param_descriptor_dsize, 1, dsize);
1643  float size[3];
1644  size[0] = width;
1645  size[1] = height;
1647  glUniform3fv(_param_descriptor_size, 1, size);
1648 
1649 }
1650 
1652 
1654 {
1655  ProgramGLSL * program;
1656 
1657 
1658  s_gray = new ProgramGLSL(
1659  "uniform sampler2DRect tex; void main(){\n"
1660  "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"
1661  "gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}" );
1662 
1663 
1664  s_sampling = new ProgramGLSL(
1665  "uniform sampler2DRect tex; void main(){\n"
1666  "gl_FragColor= vec4( texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"
1667  " texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}" );
1668 
1669 
1670  s_margin_copy = program = new ProgramGLSL(
1671  "uniform sampler2DRect tex; uniform vec4 truncate; void main(){\n"
1672  "vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"
1673  "bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"
1674  "if(ob.y) { gl_FragColor = (truncate.z ==0.0 ? cc.rrbb : cc.ggaa); } \n"
1675  "else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
1676  "else { vec4 weights = vec4(vec4(0.0, 1.0, 2.0, 3.0) == truncate.wwww);\n"
1677  "float v = dot(weights, cc); gl_FragColor = vec4(v);}}");
1678 
1679  _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
1680 
1681 
1682 
1683  s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
1684 
1685 
1686 
1687  s_grad_pass = program = new ProgramGLSL(
1688  "uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"
1689  "{\n"
1690  " vec4 v1, v2, gg;\n"
1691  " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
1692  " vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"
1693  " gl_FragData[0] = cc - cp; \n"
1694  " vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"
1695  " vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
1696  " vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;\n"
1697  " vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;\n"
1698  " vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"
1699  " gl_FragData[1] = grad;\n"
1700  " vec4 invalid = vec4(equal(grad, vec4(0.0))); \n"
1701  " vec4 ov = atan(dy, dx + invalid); \n"
1702  " gl_FragData[2] = ov; \n"
1703  "}\n\0"); //when
1704 
1705  _param_grad_pass_texp = glGetUniformLocation(*program, "texp");
1706 
1707 
1709  LoadOrientationShader();
1710 
1711  if(s_orientation == NULL)
1712  {
1713  //Load a simplified version if the right version is not supported
1714  s_orientation = program = new ProgramGLSL(
1715  "uniform sampler2DRect tex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"
1716  " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
1717  " vec2 co = cc.xy * 0.5; \n"
1718  " vec4 oo = texture2DRect(oTex, co);\n"
1719  " bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"
1720  " float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
1721  " gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");
1722 
1723  _param_orientation_gtex= glGetUniformLocation(*program, "oTex");
1724  _param_orientation_size= glGetUniformLocation(*program, "size");
1727  std::cerr<<"Orientation simplified on this hardware"<<endl;
1728  }
1729 
1731  {
1732  LoadDescriptorShader();
1733  if(s_descriptor_fp == NULL)
1734  {
1736  std::cerr<<"Descriptor ignored on this hardware"<<endl;
1737  }
1738  }
1739 }
1740 
1741 
1743 {
1744  ProgramGLSL * program;
1745 
1746  s_copy_key = new ProgramGLSL(
1747  "uniform sampler2DRect tex;void main(){\n"
1748  "gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");
1749 
1750  //shader used to write a vertex buffer object
1751  //which is used to draw the quads of each feature
1752  s_vertex_list = program = new ProgramGLSL(
1753  "uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"
1754  "float fwidth = sizes.y; \n"
1755  "float twidth = sizes.z; \n"
1756  "float rwidth = sizes.w; \n"
1757  "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
1758  "float px = mod(index, twidth);\n"
1759  "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
1760  "vec4 cc = texture2DRect(tex, tpos );\n"
1761  "float size = 3.0 * cc.a; \n"
1762  "gl_FragColor.zw = vec2(0.0, 1.0);\n"
1763  "if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"
1764  "{\n"
1765  " float type = fract(px);\n"
1766  " vec2 dxy; float s, c;\n"
1767  " dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
1768  " dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
1769  " s = sin(cc.b); c = cos(cc.b); \n"
1770  " gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
1771  " gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
1772  "}\n\0");
1773  /*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/
1774 
1775  _param_genvbo_size = glGetUniformLocation(*program, "sizes");
1776 
1777  s_display_gaussian = new ProgramGLSL(
1778  "uniform sampler2DRect tex; void main(){\n"
1779  "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1780  "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");
1781 
1782  s_display_dog = new ProgramGLSL(
1783  "uniform sampler2DRect tex; void main(){\n"
1784  "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1785  "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
1786  "gl_FragColor = vec4(g, g, g, 1.0);}" );
1787 
1788 
1789  s_display_grad = new ProgramGLSL(
1790  "uniform sampler2DRect tex; void main(){\n"
1791  "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1792  "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }");
1793 
1794  s_display_keys= new ProgramGLSL(
1795  "uniform sampler2DRect tex; void main(){\n"
1796  "vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1797  "vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1798  "bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));\n"
1799  "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
1800  "if(v == 0.0) discard; \n"
1801  "else if(oc.r > 0.0) gl_FragColor = vec4(1.0, 0.0, 0,1.0); \n"
1802  "else gl_FragColor = vec4(0.0,1.0,0.0,1.0); }" );
1803 }
1804 
1806 {
1807  ostringstream out;
1809  {
1810  out << "#pragma optionNV(ifcvt none)\n"
1811  "#pragma optionNV(unroll all)\n";
1812  }
1813  out<<"\n"
1814  "#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
1815  "#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1816  "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1817  "uniform sampler2DRect tex; uniform sampler2DRect gtex;\n"
1818  "uniform sampler2DRect otex; uniform vec4 size;\n"
1819  "void main() \n"
1820  "{ \n"
1821  " vec4 bins[10]; \n"
1822  " bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"
1823  " bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"
1824  " bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"
1825  " vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1826  " vec2 pos = sift.xy; \n"
1827  " bool orientation_mode = (size.z != 0.0); \n"
1828  " float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
1829  " //bool fixed_orientation = (size.z < 0.0); \n"
1830  " if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
1831  " float gsigma = sigma * GAUSSIAN_WF; \n"
1832  " vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF))); \n"
1833  " vec2 dim = size.xy; \n"
1834  " vec4 dist_threshold = vec4(win.x*win.x+0.5); \n"
1835  " float factor = -0.5/(gsigma*gsigma); \n"
1836  " vec4 sz; vec2 spos; \n"
1837  " //if(any(pos.xy <= float(1))) discard; \n"
1838  " sz.xy = max( pos - win, vec2(2.0,2.0)); \n"
1839  " sz.zw = min( pos + win, dim-vec2(3.0)); \n"
1840  " sz = floor(sz*0.5) + 0.5; ";
1841  //loop to get the histogram
1842 
1843  out<<"\n"
1844  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1845  " { \n"
1846  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1847  " { \n"
1848  " vec2 offset = 2.0 * spos - pos - vec2(0.5); \n"
1849  " vec4 off = vec4(offset, offset + vec2(1)); \n"
1850  " vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww; \n"
1851  " bvec4 inside = lessThan(distsq, dist_threshold); \n"
1852  " if(any(inside)) \n"
1853  " { \n"
1854  " vec4 gg = texture2DRect(gtex, spos); \n"
1855  " vec4 oo = texture2DRect(otex, spos); \n"
1856  " vec4 weight = gg * exp(distsq * factor); \n"
1857  " vec4 idxv = floor(degrees(oo)*0.1); \n"
1858  " idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0); \n"
1859  " vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0); \n";
1860  //
1862  {
1863  // it might be slow on some GPUs
1864  out<<"\n"
1865  " for(int i = 0 ; i < 4; i++)\n"
1866  " {\n"
1867  " if(inside[i])\n"
1868  " {\n"
1869  " float idx = idxv[i]; \n"
1870  " vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0.0,1.0,2.0,3.0))); \n"
1871  " int iidx = int(floor(idx*0.25)); \n"
1872  " bins[iidx]+=inc; \n"
1873  " } \n"
1874  " } \n"
1875  " } \n"
1876  " } \n"
1877  " }";
1878 
1879  }else
1880  {
1881  //nvfp40 still does not support dynamic array indexing
1882  //unrolled binary search
1883  //it seems to be faster than the dyanmic indexing version on some GPUs
1884  out<<"\n"
1885  " for(int i = 0 ; i < 4; i++)\n"
1886  " {\n"
1887  " if(inside[i])\n"
1888  " {\n"
1889  " float idx = idxv[i]; \n"
1890  " vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3))); \n"
1891  " if(idx < 16.0) \n"
1892  " { \n"
1893  " if(idx < 8.0) \n"
1894  " { \n"
1895  " if(idx < 4.0) { bins[0]+=inc;} \n"
1896  " else { bins[1]+=inc;} \n"
1897  " }else \n"
1898  " { \n"
1899  " if(idx < 12.0){ bins[2]+=inc;} \n"
1900  " else { bins[3]+=inc;} \n"
1901  " } \n"
1902  " }else if(idx < 32.0) \n"
1903  " { \n"
1904  " if(idx < 24.0) \n"
1905  " { \n"
1906  " if(idx <20.0) { bins[4]+=inc;} \n"
1907  " else { bins[5]+=inc;} \n"
1908  " }else \n"
1909  " { \n"
1910  " if(idx < 28.0){ bins[6]+=inc;} \n"
1911  " else { bins[7]+=inc;} \n"
1912  " } \n"
1913  " }else \n"
1914  " { \n"
1915  " bins[8]+=inc; \n"
1916  " } \n"
1917  " } \n"
1918  " } \n"
1919  " } \n"
1920  " } \n"
1921  " }";
1922 
1923  }
1924 
1925  //reuse the code from the unpacked version..
1927 
1928 
1929 
1930  ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1931  if(program->IsNative())
1932  {
1933  s_orientation = program ;
1934  _param_orientation_gtex = glGetUniformLocation(*program, "gtex");
1935  _param_orientation_otex = glGetUniformLocation(*program, "otex");
1936  _param_orientation_size = glGetUniformLocation(*program, "size");
1937  }else
1938  {
1939  delete program;
1940  }
1941 }
1942 
1944 {
1945  glUniform1f(_param_ftex_width, width);
1946  glUniform1i(_param_genlist_start_tex0, 0);
1947 }
1948 
1949 void ShaderBagPKSL::LoadGenListShader(int ndoglev,int nlev)
1950 {
1951  ProgramGLSL * program;
1952 
1953  s_genlist_init_tight = new ProgramGLSL(
1954  "uniform sampler2DRect tex; void main ()\n"
1955  "{\n"
1956  " vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, \n"
1957  " texture2DRect(tex, gl_TexCoord[1].xy).r, \n"
1958  " texture2DRect(tex, gl_TexCoord[2].xy).r, \n"
1959  " texture2DRect(tex, gl_TexCoord[3].xy).r); \n"
1960  " gl_FragColor = vec4(notEqual(key, vec4(0.0))); \n"
1961  "}");
1962 
1963  s_genlist_init_ex = program = new ProgramGLSL(
1964  "uniform sampler2DRect tex; uniform vec4 bbox; void main ()\n"
1965  "{\n"
1966  " vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1967  " vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1968  " vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1969  " vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1970  " vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox)); \n"
1971  " vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox)); \n"
1972  " vec4 bx2 = vec4(bx4.xy, bx1.zw); \n"
1973  " vec4 bx3 = vec4(bx1.xy, bx4.zw);\n"
1974  " helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);\n"
1975  " helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);\n"
1976  " helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);\n"
1977  " helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);\n"
1978  " gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0)))); \n"
1979  " gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0)))); \n"
1980  " gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0)))); \n"
1981  " gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0)))); \n"
1982  "}");
1983  _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1984 
1985  s_genlist_end = program = new ProgramGLSL(
1987 
1988  "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
1989  "{\n"
1990  " vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
1991  " vec2 pos = tc.rg; float index = tc.b;\n"
1992  " vec4 tk = texture2DRect( ktex, pos); \n"
1993  " vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1994  " vec2 opos; \n"
1995  " opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
1996  " opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
1997  " gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);\n"
1998  "}" :
1999 
2000  "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
2001  "{\n"
2002  " vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
2003  " vec2 pos = tc.rg; float index = tc.b;\n"
2004  " vec4 tk = texture2DRect( ktex, pos); \n"
2005  " vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))) \n"
2006  " vec2 opos; \n"
2007  " opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
2008  " opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
2009  " gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);\n"
2010  "}"
2011  );
2012 
2013  _param_genlist_end_ktex = glGetUniformLocation(*program, "ktex");
2014 
2015  //reduction ...
2016  s_genlist_histo = new ProgramGLSL(
2017  "uniform sampler2DRect tex; void main ()\n"
2018  "{\n"
2019  " vec4 helper; vec4 helper2; \n"
2020  " helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
2021  " helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
2022  " gl_FragColor.rg = helper2.xz + helper2.yw;\n"
2023  " helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
2024  " helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
2025  " gl_FragColor.ba= helper2.xz+helper2.yw;\n"
2026  "}");
2027 
2028 
2029  //read of the first part, which generates tex coordinates
2030 
2031  s_genlist_start= program = ShaderBagGLSL::LoadGenListStepShader(1, 1);
2032  _param_ftex_width= glGetUniformLocation(*program, "width");
2033  _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
2034  //stepping
2035  s_genlist_step = program = ShaderBagGLSL::LoadGenListStepShader(0, 1);
2036  _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
2037 
2038 }
2040 {
2041  glUseProgram(0);
2042 }
2043 void ShaderBagPKSL::LoadKeypointShader(float dog_threshold, float edge_threshold)
2044 {
2045  float threshold0 = dog_threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
2046  float threshold1 = dog_threshold;
2047  float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
2048  ostringstream out;;
2049  out<<setprecision(8);
2050 
2052  {
2053  out << "#pragma optionNV(ifcvt none)\n"
2054  "#pragma optionNV(unroll all)\n";
2055 
2056  }
2058  {
2059  out << "#define REPEAT4(FUNCTION)\\\n"
2060  "for(int i = 0; i < 4; ++i)\\\n"
2061  "{\\\n"
2062  " FUNCTION(i);\\\n"
2063  "}\n";
2064  }else
2065  {
2066  //loop unroll
2067  out << "#define REPEAT4(FUNCTION)\\\n"
2068  "FUNCTION(0);\\\n"
2069  "FUNCTION(1);\\\n"
2070  "FUNCTION(2);\\\n"
2071  "FUNCTION(3);\n";
2072  }
2073  //tex(X)(Y)
2074  //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
2075  //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
2076 
2078  {
2079  out << "#define THRESHOLD0(i) (" << threshold0 << "* ii[i])\n"
2080  "#define THRESHOLD1 (" << threshold1 << "* ii[0])\n"
2081  "#define THRESHOLD2 " << threshold2 << "\n"
2082  "#define DEFINE_EXTRA() vec4 ii = texture2DRect(texI, gl_TexCoord[0].xy); "
2083  "ii = min(2.0 * ii + 0.1, 1.0) \n"
2084  "#define MOVE_EXTRA(idx) ii[0] = ii[idx]\n";
2085  out << "uniform sampler2DRect texI;\n";
2086  }else
2087  {
2088  out << "#define THRESHOLD0(i) " << threshold0 << "\n"
2089  "#define THRESHOLD1 " << threshold1 << "\n"
2090  "#define THRESHOLD2 " << threshold2 << "\n"
2091  "#define DEFINE_EXTRA()\n"
2092  "#define MOVE_EXTRA(idx) \n" ;
2093  }
2094 
2095  out<<
2096  "uniform sampler2DRect tex; uniform sampler2DRect texU;\n"
2097  "uniform sampler2DRect texD; void main ()\n"
2098  "{\n"
2099  " vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
2100  " vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
2101  " vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);\n"
2102  " vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);\n"
2103  " vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);\n"
2104  " vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
2105  " vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);\n"
2106  " vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);\n"
2107  " vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);\n"
2108  " vec4 cru = texture2DRect(tex, TexRU.xy);\n"
2109  " vec4 cc = ccc;\n"
2110  " vec4 v1[4], v2[4];\n"
2111  " v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2112  " v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2113  " v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2114  " v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2115  " v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2116  " v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2117  " v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2118  " v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2119  " DEFINE_EXTRA();\n";
2120 
2121  //test against 8 neighbours
2122  //use variable to identify type of extremum
2123  //1.0 for local maximum and -1.0 for minimum
2124  out <<
2125  " vec4 key = vec4(0.0); \n"
2126  " #define KEYTEST_STEP0(i) \\\n"
2127  " {\\\n"
2128  " bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\\\n"
2129  " key[i] = cc[i] > float(THRESHOLD0(i)) && all(test1)?1.0: 0.0;\\\n"
2130  " key[i] = cc[i] < float(-THRESHOLD0(i)) && all(test2)? -1.0: key[i];\\\n"
2131  " }\n"
2132  " REPEAT4(KEYTEST_STEP0);\n"
2133  " if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}\n"
2134  " if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}\n"
2135  " gl_FragColor = vec4(0.0);\n"
2136  " if(any(notEqual(key, vec4(0.0)))) {\n";
2137 
2138  //do edge supression first..
2139  //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
2140  //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
2141 
2142  out<<
2143  " float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
2144  " #define EDGE_SUPPRESION(i) \\\n"
2145  " if(key[i] != 0.0)\\\n"
2146  " {\\\n"
2147  " vec4 D2 = v1[i].xyzw - cc[i];\\\n"
2148  " vec2 D4 = v2[i].xw - v2[i].yz;\\\n"
2149  " vec2 D5 = 0.5*(v1[i].yw-v1[i].xz); \\\n"
2150  " fx[i] = D5.x; fy[i] = D5.y ;\\\n"
2151  " fxx[i] = D2.x + D2.y;\\\n"
2152  " fyy[i] = D2.z + D2.w;\\\n"
2153  " fxy[i] = 0.25*(D4.x + D4.y);\\\n"
2154  " float fxx_plus_fyy = fxx[i] + fyy[i];\\\n"
2155  " float score_up = fxx_plus_fyy*fxx_plus_fyy; \\\n"
2156  " float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\\\n"
2157  " if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)key[i] = 0.0;\\\n"
2158  " }\n"
2159  " REPEAT4(EDGE_SUPPRESION);\n"
2160  " if(any(notEqual(key, vec4(0.0)))) {\n";
2161 
2163  //read 9 pixels of upper/lower level
2164  out<<
2165  " vec4 v4[4], v5[4], v6[4];\n"
2166  " ccc = texture2DRect(texU, gl_TexCoord[0].xy);\n"
2167  " clc = texture2DRect(texU, gl_TexCoord[1].xy);\n"
2168  " crc = texture2DRect(texU, gl_TexCoord[2].xy);\n"
2169  " ccd = texture2DRect(texU, gl_TexCoord[3].xy);\n"
2170  " ccu = texture2DRect(texU, gl_TexCoord[4].xy);\n"
2171  " cld = texture2DRect(texU, gl_TexCoord[5].xy);\n"
2172  " clu = texture2DRect(texU, gl_TexCoord[6].xy);\n"
2173  " crd = texture2DRect(texU, gl_TexCoord[7].xy);\n"
2174  " cru = texture2DRect(texU, TexRU.xy);\n"
2175  " vec4 cu = ccc;\n"
2176  " v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2177  " v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2178  " v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2179  " v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2180  " v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2181  " v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2182  " v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2183  " v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2184  <<
2185  " #define KEYTEST_STEP1(i)\\\n"
2186  " if(key[i] == 1.0)\\\n"
2187  " {\\\n"
2188  " bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \\\n"
2189  " if(cc[i] < cu[i] || any(test))key[i] = 0.0; \\\n"
2190  " }else if(key[i] == -1.0)\\\n"
2191  " {\\\n"
2192  " bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \\\n"
2193  " if(cc[i] > cu[i] || any(test) )key[i] = 0.0; \\\n"
2194  " }\n"
2195  " REPEAT4(KEYTEST_STEP1);\n"
2196  " if(any(notEqual(key, vec4(0.0)))) { \n"
2197  <<
2198  " ccc = texture2DRect(texD, gl_TexCoord[0].xy);\n"
2199  " clc = texture2DRect(texD, gl_TexCoord[1].xy);\n"
2200  " crc = texture2DRect(texD, gl_TexCoord[2].xy);\n"
2201  " ccd = texture2DRect(texD, gl_TexCoord[3].xy);\n"
2202  " ccu = texture2DRect(texD, gl_TexCoord[4].xy);\n"
2203  " cld = texture2DRect(texD, gl_TexCoord[5].xy);\n"
2204  " clu = texture2DRect(texD, gl_TexCoord[6].xy);\n"
2205  " crd = texture2DRect(texD, gl_TexCoord[7].xy);\n"
2206  " cru = texture2DRect(texD, TexRU.xy);\n"
2207  " vec4 cd = ccc;\n"
2208  " v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2209  " v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2210  " v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2211  " v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2212  " v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2213  " v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2214  " v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2215  " v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2216  <<
2217  " #define KEYTEST_STEP2(i)\\\n"
2218  " if(key[i] == 1.0)\\\n"
2219  " {\\\n"
2220  " bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\\\n"
2221  " if(cc[i] < cd[i] || any(test))key[i] = 0.0; \\\n"
2222  " }else if(key[i] == -1.0)\\\n"
2223  " {\\\n"
2224  " bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\\\n"
2225  " if(cc[i] > cd[i] || any(test))key[i] = 0.0; \\\n"
2226  " }\n"
2227  " REPEAT4(KEYTEST_STEP2);\n"
2228  " float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;\n"
2229  " //assume there is only one keypoint in the four. \n"
2230  " if(keysum==1.0) {\n";
2231 
2234 
2235  out <<
2236  " vec3 offset = vec3(0.0, 0.0, 0.0); \n"
2237  " #define TESTMOVE_KEYPOINT(idx) \\\n"
2238  " if(key[idx] != 0.0) \\\n"
2239  " {\\\n"
2240  " cu[0] = cu[idx]; cd[0] = cd[idx]; cc[0] = cc[idx]; \\\n"
2241  " v4[0] = v4[idx]; v5[0] = v5[idx]; \\\n"
2242  " fxy[0] = fxy[idx]; fxx[0] = fxx[idx]; fyy[0] = fyy[idx]; \\\n"
2243  " fx[0] = fx[idx]; fy[0] = fy[idx]; MOVE_EXTRA(idx); \\\n"
2244  " }\n"
2245  " TESTMOVE_KEYPOINT(1);\n"
2246  " TESTMOVE_KEYPOINT(2);\n"
2247  " TESTMOVE_KEYPOINT(3);\n"
2248  <<
2249 
2250  " float fs = 0.5*( cu[0] - cd[0] ); \n"
2251  " float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
2252  " float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
2253  " float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
2254  " vec4 A0, A1, A2 ; \n"
2255  " A0 = vec4(fxx[0], fxy[0], fxs, -fx[0]); \n"
2256  " A1 = vec4(fxy[0], fyy[0], fys, -fy[0]); \n"
2257  " A2 = vec4(fxs, fys, fss, -fs); \n"
2258  " vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs)); \n"
2259  " float maxa = max(max(x3.x, x3.y), x3.z); \n"
2260  " if(maxa >= 1e-10 ) \n"
2261  " { \n"
2262  " if(x3.y ==maxa ) \n"
2263  " { \n"
2264  " vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
2265  " }else if( x3.z == maxa ) \n"
2266  " { \n"
2267  " vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
2268  " } \n"
2269  " A0 /= A0.x; \n"
2270  " A1 -= A1.x * A0; \n"
2271  " A2 -= A2.x * A0; \n"
2272  " vec2 x2 = abs(vec2(A1.y, A2.y)); \n"
2273  " if( x2.y > x2.x ) \n"
2274  " { \n"
2275  " vec3 TEMP = A2.yzw; \n"
2276  " A2.yzw = A1.yzw; \n"
2277  " A1.yzw = TEMP; \n"
2278  " x2.x = x2.y; \n"
2279  " } \n"
2280  " if(x2.x >= 1e-10) { \n"
2281  " A1.yzw /= A1.y; \n"
2282  " A2.yzw -= A2.y * A1.yzw; \n"
2283  " if(abs(A2.z) >= 1e-10) {\n"
2284  " offset.z = A2.w /A2.z; \n"
2285  " offset.y = A1.w - offset.z*A1.z; \n"
2286  " offset.x = A0.w - offset.z*A0.z - offset.y*A0.y; \n"
2287  " bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>float(THRESHOLD1)) ;\n"
2288  " if(!test || any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);\n"
2289  " }\n"
2290  " }\n"
2291  " }\n"
2292  <<"\n"
2293  " float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2294  " gl_FragColor = vec4(keyv, offset);\n"
2295  " }}}}\n"
2296  "}\n" <<'\0';
2297 
2298  else out << "\n"
2299  " float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2300  " gl_FragColor = vec4(keyv, 0.0, 0.0, 0.0);\n"
2301  " }}}}\n"
2302  "}\n" <<'\0';
2303 
2304  ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
2305  s_keypoint = program ;
2306 
2307  //parameter
2308  _param_dog_texu = glGetUniformLocation(*program, "texU");
2309  _param_dog_texd = glGetUniformLocation(*program, "texD");
2310  if(GlobalUtil::_DarknessAdaption) _param_dog_texi = glGetUniformLocation(*program, "texI");
2311 }
2312 void ShaderBagPKSL::SetDogTexParam(int texU, int texD)
2313 {
2314  glUniform1i(_param_dog_texu, 1);
2315  glUniform1i(_param_dog_texd, 2);
2316  if(GlobalUtil::_DarknessAdaption)glUniform1i(_param_dog_texi, 3);
2317 }
2318 void ShaderBagPKSL::SetGenListStepParam(int tex, int tex0)
2319 {
2320  glUniform1i(_param_genlist_step_tex0, 1);
2321 }
2322 
2323 void ShaderBagPKSL::SetGenVBOParam(float width, float fwidth,float size)
2324 {
2325  float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
2326  glUniform4fv(_param_genvbo_size, 1, sizes);
2327 }
2329 {
2330  glUniform1i(_param_grad_pass_texp, 1);
2331 }
2332 
2334 {
2336  LoadDescriptorShaderF2();
2337  s_rect_description = LoadDescriptorProgramRECT();
2338 }
2339 
2341 {
2342  //one shader outpout 128/8 = 16 , each fragout encodes 4
2343  //const double twopi = 2.0*3.14159265358979323846;
2344  //const double rpi = 8.0/twopi;
2345  ostringstream out;
2346  out<<setprecision(8);
2348  {
2349  out << "#define REPEAT4(FUNCTION)\\\n"
2350  "for(int i = 0; i < 4; ++i)\\\n"
2351  "{\\\n"
2352  " FUNCTION(i);\\\n"
2353  "}\n";
2354  }else
2355  {
2356  //loop unroll for ATI
2357  out << "#define REPEAT4(FUNCTION)\\\n"
2358  "FUNCTION(0);\\\n"
2359  "FUNCTION(1);\\\n"
2360  "FUNCTION(2);\\\n"
2361  "FUNCTION(3);\n";
2362  }
2363 
2364  out<<"\n"
2365  "#define M_PI 3.14159265358979323846\n"
2366  "#define TWO_PI (2.0*M_PI)\n"
2367  "#define RPI 1.2732395447351626861510701069801\n"
2368  "#define WF size.z\n"
2369  "uniform sampler2DRect tex; \n"
2370  "uniform sampler2DRect gtex; \n"
2371  "uniform sampler2DRect otex; \n"
2372  "uniform vec4 dsize; \n"
2373  "uniform vec3 size; \n"
2374  "void main() \n"
2375  "{\n"
2376  " vec2 dim = size.xy; //image size \n"
2377  " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2378  " float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"
2379  " index = floor(index*0.125)+ 0.49; \n"
2380  " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2381  " vec2 pos = texture2DRect(tex, coord).xy; \n"
2382  " vec2 wsz = texture2DRect(tex, coord).zw;\n"
2383  " float aspect_ratio = wsz.y / wsz.x;\n"
2384  " float aspect_sq = aspect_ratio * aspect_ratio; \n"
2385  " vec2 spt = wsz * 0.25; vec2 ispt = 1.0 / spt; \n";
2386 
2387  //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2388  //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
2389  //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2390  //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2391  out<<
2392  " vec4 temp; vec2 pt; \n"
2393  " pt.x = pos.x + fract(idx*0.25) * wsz.x; \n"
2394  " pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y; \n";
2395 
2396  //get a horizontal bounding box of the rotated rectangle
2397  out<<
2398  " vec4 sz; \n"
2399  " sz.xy = max(pt - spt, vec2(2,2));\n"
2400  " sz.zw = min(pt + spt, dim - vec2(3)); \n"
2401  " sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2402  //get voting for two box
2403 
2404  out<<"\n"
2405  " vec4 DA, DB; vec2 spos; \n"
2406  " DA = DB = vec4(0.0, 0.0, 0.0, 0.0); \n"
2407  " vec4 nox = vec4(0.0, 1.0, 0.0, 1.0); \n"
2408  " vec4 noy = vec4(0.0, 0.0, 1.0, 1.0); \n"
2409  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
2410  " { \n"
2411  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
2412  " { \n"
2413  " vec2 tpt = spos * 2.0 - pt - 0.5; \n"
2414  " vec4 nx = (tpt.x + nox) * ispt.x; \n"
2415  " vec4 ny = (tpt.y + noy) * ispt.y; \n"
2416  " vec4 nxn = abs(nx), nyn = abs(ny); \n"
2417  " bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"
2418  " if(any(inside))\n"
2419  " {\n"
2420  " vec4 gg = texture2DRect(gtex, spos);\n"
2421  " vec4 oo = texture2DRect(otex, spos);\n"
2422  //" vec4 cc = cos(oo), ss = sin(oo); \n"
2423  //" oo = atan(ss* aspect_ratio, cc); \n"
2424  //" gg = gg * sqrt(ss * ss * aspect_sq + cc * cc); \n "
2425  " vec4 theta0 = (- oo)*RPI;\n"
2426  " vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"
2427  " vec4 theta1 = floor(theta); \n"
2428  " vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg; \n"
2429  " vec4 weight2 = (theta - theta1) * weight; \n"
2430  " vec4 weight1 = weight - weight2; \n"
2431  " #define ADD_DESCRIPTOR(i) \\\n"
2432  " if(inside[i])\\\n"
2433  " {\\\n"
2434  " DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
2435  " DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
2436  " DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
2437  " DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
2438  " }\n"
2439  " REPEAT4(ADD_DESCRIPTOR);\n"
2440  " }\n"
2441  " }\n"
2442  " }\n";
2443  out<<
2444  " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2445  "}\n"<<'\0';
2446 
2447  ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
2448  if(program->IsNative())
2449  {
2450  return program;
2451  }
2452  else
2453  {
2454  delete program;
2455  return NULL;
2456  }
2457 }
2458 
2460 {
2461  //one shader outpout 128/8 = 16 , each fragout encodes 4
2462  //const double twopi = 2.0*3.14159265358979323846;
2463  //const double rpi = 8.0/twopi;
2464  ostringstream out;
2465  out<<setprecision(8);
2466 
2468  {
2469  out << "#define REPEAT4(FUNCTION)\\\n"
2470  "for(int i = 0; i < 4; ++i)\\\n"
2471  "{\\\n"
2472  " FUNCTION(i);\\\n"
2473  "}\n";
2474  }else
2475  {
2476  //loop unroll for ATI
2477  out << "#define REPEAT4(FUNCTION)\\\n"
2478  "FUNCTION(0);\\\n"
2479  "FUNCTION(1);\\\n"
2480  "FUNCTION(2);\\\n"
2481  "FUNCTION(3);\n";
2482  }
2483 
2484  out<<"\n"
2485  "#define M_PI 3.14159265358979323846\n"
2486  "#define TWO_PI (2.0*M_PI)\n"
2487  "#define RPI 1.2732395447351626861510701069801\n"
2488  "#define WF size.z\n"
2489  "uniform sampler2DRect tex; \n"
2490  "uniform sampler2DRect gtex; \n"
2491  "uniform sampler2DRect otex; \n"
2492  "uniform vec4 dsize; \n"
2493  "uniform vec3 size; \n"
2494  "void main() \n"
2495  "{\n"
2496  " vec2 dim = size.xy; //image size \n"
2497  " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2498  " float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"
2499  " index = floor(index*0.125)+ 0.49; \n"
2500  " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2501  " vec2 pos = texture2DRect(tex, coord).xy; \n"
2502  " if(any(lessThan(pos.xy, vec2(1.0))) || any(greaterThan(pos.xy, dim-1.0))) "
2503  " //discard; \n"
2504  " { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
2505  " float anglef = texture2DRect(tex, coord).z;\n"
2506  " if(anglef > M_PI) anglef -= TWO_PI;\n"
2507  " float sigma = texture2DRect(tex, coord).w; \n"
2508  " float spt = abs(sigma * WF); //default to be 3*sigma \n";
2509  //rotation
2510  out<<
2511  " vec4 cscs, rots; \n"
2512  " cscs.x = cos(anglef); cscs.y = sin(anglef); \n"
2513  " cscs.zw = - cscs.xy; \n"
2514  " rots = cscs /spt; \n"
2515  " cscs *= spt; \n";
2516 
2517  //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2518  //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
2519  //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2520  //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2521  out<<
2522  " vec4 temp; vec2 pt, offsetpt; \n"
2523  " /*the fraction part of idx is .5*/ \n"
2524  " offsetpt.x = 4.0* fract(idx*0.25) - 2.0; \n"
2525  " offsetpt.y = floor(idx*0.25) - 1.5; \n"
2526  " temp = cscs.xwyx*offsetpt.xyxy; \n"
2527  " pt = pos + temp.xz + temp.yw; \n";
2528 
2529  //get a horizontal bounding box of the rotated rectangle
2530  out<<
2531  " vec2 bwin = abs(cscs.xy); \n"
2532  " float bsz = bwin.x + bwin.y; \n"
2533  " vec4 sz; \n"
2534  " sz.xy = max(pt - vec2(bsz), vec2(2,2));\n"
2535  " sz.zw = min(pt + vec2(bsz), dim - vec2(3)); \n"
2536  " sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2537  //get voting for two box
2538 
2539  out<<"\n"
2540  " vec4 DA, DB; vec2 spos; \n"
2541  " DA = DB = vec4(0.0, 0.0, 0.0, 0.0); \n"
2542  " vec4 nox = vec4(0.0, rots.xy, rots.x + rots.y); \n"
2543  " vec4 noy = vec4(0.0, rots.wx, rots.w + rots.x); \n"
2544  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
2545  " { \n"
2546  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
2547  " { \n"
2548  " vec2 tpt = spos * 2.0 - pt - 0.5; \n"
2549  " vec4 temp = rots.xywx * tpt.xyxy; \n"
2550  " vec2 temp2 = temp.xz + temp.yw; \n"
2551  " vec4 nx = temp2.x + nox; \n"
2552  " vec4 ny = temp2.y + noy; \n"
2553  " vec4 nxn = abs(nx), nyn = abs(ny); \n"
2554  " bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"
2555  " if(any(inside))\n"
2556  " {\n"
2557  " vec4 gg = texture2DRect(gtex, spos);\n"
2558  " vec4 oo = texture2DRect(otex, spos);\n"
2559  " vec4 theta0 = (anglef - oo)*RPI;\n"
2560  " vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"
2561  " vec4 theta1 = floor(theta); \n"
2562  " vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y; \n"
2563  " vec4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy )); \n"
2564  " vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg * ww; \n"
2565  " vec4 weight2 = (theta - theta1) * weight; \n"
2566  " vec4 weight1 = weight - weight2; \n"
2567  " #define ADD_DESCRIPTOR(i) \\\n"
2568  " if(inside[i])\\\n"
2569  " {\\\n"
2570  " DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
2571  " DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
2572  " DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
2573  " DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
2574  " }\n"
2575  " REPEAT4(ADD_DESCRIPTOR);\n"
2576  " }\n"
2577  " }\n"
2578  " }\n";
2579  out<<
2580  " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2581  "}\n"<<'\0';
2582 
2583  ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
2584  if(program->IsNative())
2585  {
2586  return program;
2587  }
2588  else
2589  {
2590  delete program;
2591  return NULL;
2592  }
2593 }
2594 
2596 {
2597 
2598  ProgramGLSL * program = LoadDescriptorProgramPKSL();
2599  if( program )
2600  {
2601  s_descriptor_fp = program;
2602  _param_descriptor_gtex = glGetUniformLocation(*program, "gtex");
2603  _param_descriptor_otex = glGetUniformLocation(*program, "otex");
2604  _param_descriptor_size = glGetUniformLocation(*program, "size");
2605  _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
2606  }
2607 }
2608 
2609 
2610 
2611 void ShaderBagPKSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
2612 {
2613  glUniform1i(_param_orientation_gtex, 1);
2614  glUniform2f(_param_orientation_size, sigma, sigma_step);
2615 }
2616 
2617 
2618 void ShaderBagPKSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
2619 {
2621  glUniform1i(_param_orientation_gtex, 1);
2622  glUniform1i(_param_orientation_otex, 2);
2623 
2624  float size[4];
2625  size[0] = (float)width;
2626  size[1] = (float)height;
2627  size[2] = sigma;
2628  size[3] = step;
2629  glUniform4fv(_param_orientation_size, 1, size);
2630 }
2631 
2632 void ShaderBagPKSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
2633 {
2634  if(sigma == 0 && s_rect_description)
2635  {
2636  //rectangle description mode
2637  s_rect_description->UseProgram();
2638  GLint param_descriptor_gtex = glGetUniformLocation(*s_rect_description, "gtex");
2639  GLint param_descriptor_otex = glGetUniformLocation(*s_rect_description, "otex");
2640  GLint param_descriptor_size = glGetUniformLocation(*s_rect_description, "size");
2641  GLint param_descriptor_dsize = glGetUniformLocation(*s_rect_description, "dsize");
2643  glUniform1i(param_descriptor_gtex, 1);
2644  glUniform1i(param_descriptor_otex, 2);
2645 
2646  float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2647  glUniform4fv(param_descriptor_dsize, 1, dsize);
2648  float size[3];
2649  size[0] = width;
2650  size[1] = height;
2652  glUniform3fv(param_descriptor_size, 1, size);
2653  }else
2654  {
2656  glUniform1i(_param_descriptor_gtex, 1);
2657  glUniform1i(_param_descriptor_otex, 2);
2658 
2659 
2660  float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2661  glUniform4fv(_param_descriptor_dsize, 1, dsize);
2662  float size[3];
2663  size[0] = width;
2664  size[1] = height;
2666  glUniform3fv(_param_descriptor_size, 1, size);
2667  }
2668 
2669 }
2670 
2671 
2673 {
2674  glUniform1i(_param_genlist_end_ktex, 1);
2675 }
2677 {
2678  float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f, (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
2679  glUniform4fv(_param_genlist_init_bbox, 1, bbox);
2680 }
2681 
2682 void ShaderBagPKSL::SetMarginCopyParam(int xmax, int ymax)
2683 {
2684  float truncate[4];
2685  truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1) >> 1) - 0.5f;
2686  truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1) >> 1) - 0.5f;
2687  truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
2688  truncate[3] = truncate[2] + (((ymax % 2) == 1)? 0.0f : 2.0f);
2689  glUniform4fv(_param_margin_copy_truncate, 1, truncate);
2690 }
int width
int size
int height
#define NULL
void MakeFilterProgram(float kernel[], int width)
FilterGLSL(float sigma)
static float _FilterWidthFactor
Definition: GlobalUtil.h:56
static int _SubpixelLocalization
Definition: GlobalUtil.h:72
static int _octave_min_default
Definition: GlobalUtil.h:78
static int _PreciseBorder
Definition: GlobalUtil.h:75
static float _DescriptorWindowFactor
Definition: GlobalUtil.h:58
static int _timingL
Definition: GlobalUtil.h:47
static int _OrientationPack2
Definition: GlobalUtil.h:60
static int _verbose
Definition: GlobalUtil.h:44
static float _MulitiOrientationThreshold
Definition: GlobalUtil.h:97
static float _OrientationWindowFactor
Definition: GlobalUtil.h:57
static int _MaxFilterWidth
Definition: GlobalUtil.h:55
static int _UseDynamicIndexing
Definition: GlobalUtil.h:53
static int _KeepExtremumSign
Definition: GlobalUtil.h:89
static int _DarknessAdaption
Definition: GlobalUtil.h:92
static float _OrientationGaussianFactor
Definition: GlobalUtil.h:96
static int _usePackedTex
Definition: GlobalUtil.h:48
static int _IsNvidia
Definition: GlobalUtil.h:49
static int _DescriptorPPT
Definition: GlobalUtil.h:69
static int _KeepShaderLoop
Definition: GlobalUtil.h:50
static int _MaxOrientation
Definition: GlobalUtil.h:59
static int _FullSupported
Definition: GlobalUtil.h:65
void PrintLinkLog(std::ostream &os)
int ValidateProgram()
int UseProgram()
void CheckLinkLog()
int LinkProgram()
virtual void LoadGenListShader(int ndoglev, int nlev)
virtual void SetMarginCopyParam(int xmax, int ymax)
virtual void LoadKeypointShader(float threshold, float edgeTrheshold)
void LoadDescriptorShaderF2()
virtual void SetGenListStepParam(int tex, int tex0)
virtual void SetDogTexParam(int texU, int texD)
virtual void SetGenListStartParam(float width, int tex0)
void LoadOrientationShader()
virtual void SetGenListInitParam(int w, int h)
virtual void LoadDisplayShaders()
virtual void UnloadProgram()
virtual void LoadFixedShaders()
virtual void SetGenVBOParam(float width, float fwidth, float size)
void SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
static void WriteOrientationCodeToStream(ostream &out)
virtual void LoadDescriptorShader()
virtual void SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
static ProgramGLSL * LoadGenListStepShader(int start, int step)
virtual void SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex=0, float step=1.0f)
virtual void UnloadProgram()
virtual void SetGenListEndParam(int ktex)
virtual void SetGenListStartParam(float width, int tex0)
virtual void SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
static ProgramGLSL * LoadDescriptorProgramRECT()
static ProgramGLSL * LoadDescriptorProgramPKSL()
virtual void SetGenListStepParam(int tex, int tex0)
virtual void LoadOrientationShader()
virtual void LoadDescriptorShader()
virtual void LoadDescriptorShaderF2()
virtual void SetDogTexParam(int texU, int texD)
virtual void LoadDisplayShaders()
virtual void SetMarginCopyParam(int xmax, int ymax)
virtual void LoadFixedShaders()
virtual void SetGenVBOParam(float width, float fwidth, float size)
virtual void LoadKeypointShader(float threshold, float edgeTrheshold)
virtual void SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
virtual void SetGenListInitParam(int w, int h)
virtual void LoadGenListShader(int ndoglev, int nlev)
virtual void SetGradPassParam(int texP)
virtual void SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
void CreateGaussianFilters(SiftParam &param)
virtual ~ShaderBag()
void LoadDynamicShaders(SiftParam &param)
void SelectInitialSmoothingFilter(int octave_min, SiftParam &param)
float GetInitialSmoothSigma(int octave_min)
Definition: SiftGPU.cpp:415
float * _sigma
Definition: SiftGPU.h:75
int _dog_level_num
Definition: SiftGPU.h:85
float _sigma_skip1
Definition: SiftGPU.h:77
int _sigma_num
Definition: SiftGPU.h:82
float _dog_threshold
Definition: SiftGPU.h:93
float _edge_threshold
Definition: SiftGPU.h:95
float _sigma_skip0
Definition: SiftGPU.h:76
QTextStream & endl(QTextStream &stream)
Definition: QtCompat.h:718
MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Definition: MiniVec.h:89
Definition: Eigen.h:85
#define SEEK_END
Definition: qioapi.cpp:34