ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
ProgramCG.cpp
Go to the documentation of this file.
1 // File: ProgramCG.cpp
3 // Author: Changchang Wu
4 // Description : implementation of cg related class.
5 // class ProgramCG A simple wrapper of Cg programs
6 // class ShaderBagCG cg shaders for SIFT
7 // class FilterCGGL cg gaussian filters for SIFT
8 //
9 // Copyright (c) 2007 University of North Carolina at Chapel Hill
10 // All Rights Reserved
11 //
12 // Permission to use, copy, modify and distribute this software and its
13 // documentation for educational, research and non-profit purposes, without
14 // fee, and without a written agreement is hereby granted, provided that the
15 // above copyright notice and the following paragraph appear in all copies.
16 //
17 // The University of North Carolina at Chapel Hill make no representations
18 // about the suitability of this software for any purpose. It is provided
19 // 'as is' without express or implied warranty.
20 //
21 // Please send BUG REPORTS to ccwu@cs.unc.edu
22 //
24 
25 #if defined(CG_SIFTGPU_ENABLED)
26 
27 #include "GL/glew.h"
28 
29 #include <iostream>
30 #include <iomanip>
31 #include <vector>
32 #include <strstream>
33 #include <algorithm>
34 #include <stdlib.h>
35 #include <math.h>
36 #include <string.h>
37 using namespace std;
38 
39 #include "GlobalUtil.h"
40 #include "ProgramCG.h"
41 #include "GLTexImage.h"
42 #include "ShaderMan.h"
43 #include "FrameBufferObject.h"
44 
45 
46 
47 #if defined(_WIN32)
48  #pragma comment (lib, "../../lib/cg.lib")
49  #pragma comment (lib, "../../lib/cggl.lib")
50 #endif
51 
52 CGcontext ProgramCG::_Context =0;
53 CGprofile ProgramCG::_FProfile;
54 
56 // Construction/Destruction
58 
59 ProgramCG::ProgramCG()
60 {
61  _programID = NULL;
62 }
63 
64 ProgramCG::~ProgramCG()
65 {
66  if(_programID) cgDestroyProgram(_programID);
67 }
68 
69 ProgramCG::ProgramCG(const char *code, const char** cg_compile_args, CGprofile profile)
70 {
71  _valid = 0;
72  _profile = profile;
73  GLint epos;
74  const char* ati_args[] = {"-po", "ATI_draw_buffers",0};
75  const char* fp40_args[] = {"-ifcvt", "none","-unroll", "all", GlobalUtil::_UseFastMath? "-fastmath" : 0, 0};
76  if(cg_compile_args == NULL) cg_compile_args = GlobalUtil::_IsNvidia? (GlobalUtil::_SupportFP40? fp40_args:NULL) : ati_args;
77  _programID = ::cgCreateProgram(_Context, CG_SOURCE, code, profile, NULL, cg_compile_args);
78  if(_programID)
79  {
80  cgGLLoadProgram(_programID );
81  //_texParamID = cgGetNamedParameter(_programID, "tex");
82 
83  glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos);
84  if(epos >=0)
85  {
86  std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl;
87  std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
88  }else
89  {
90  _valid = 1;
91  }
92  }else
93  {
94  std::cerr<<code<<endl;
95  glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos);
96  if(epos >=0)
97  {
98  std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl;
99  std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
100  }else
101  {
102  std::cout<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
103  }
104  }
105 
106 }
107 
108 void ProgramCG::ErrorCallback()
109 {
110  CGerror err = cgGetError();
111  if(err)
112  {
113  std::cerr<< cgGetErrorString(err)<<endl;
114  }
115 }
116 
117 
118 void ProgramCG::InitContext()
119 {
120  if(_Context == 0)
121  {
122  _Context = cgCreateContext();
123 
125  _FProfile = cgGLGetLatestProfile(CG_GL_FRAGMENT);
126  cgGLSetOptimalOptions(_FProfile);
127 
128  if(GlobalUtil::_verbose) std::cout<<"Shader Profile: "<<cgGetProfileString(_FProfile)<<endl;
129 
130  cgSetErrorCallback(ErrorCallback);
131  }
132 }
133 
134 void ProgramCG::DestroyContext()
135 {
136  cgDestroyContext(_Context);
137 }
138 
139 ShaderBagCG::ShaderBagCG()
140 {
141  ProgramCG::InitContext();
142 }
143 
144 
145 int ProgramCG::UseProgram()
146 {
147  if(_programID)
148  {
149  cgGLEnableProfile(_profile);
150  cgGLBindProgram(_programID);
151 
152  return 1;
153  }else
154  {
155  return 0;
156  }
157 }
158 
159 void ShaderBagCG::UnloadProgram()
160 {
161 
162  cgGLUnbindProgram(ProgramCG::_FProfile);
163  cgGLDisableProfile(ProgramCG::_FProfile);
164 }
165 
166 
167 void ShaderBagCG::LoadFixedShaders()
168 {
169 // s_debug = new ProgramCG( "void main(float4 TexCoord0:TEXCOORD0, out float4 FragColor:COLOR0,"
170 // "uniform samplerRECT tex){ gl_FragColor.rg = gl_TexCoord[0].st;}");
171 
172  s_gray = new ProgramCG(
173  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
174  "float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"
175  "FragColor= float4(intensity, intensity, intensity, 1.0);}" );
176 
177 
178  s_sampling = new ProgramCG(
179  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
180  "float4 cc = texRECT(tex, TexCoord0.xy); FragColor = float4(cc.rg, 0.0, 0.0); }" );
181 
182 
183  s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}");
184 
185 
186  ProgramCG * program;
187  s_margin_copy = program = new ProgramCG(
188  "void main(float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"
189  "uniform samplerRECT tex, uniform float2 truncate){\n"
190  "FragColor = texRECT(tex, min(texCoord0.xy, truncate)); }");
191 
192  _param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");
193 
194 
195  s_grad_pass = new ProgramCG(
196  "void main (\n"
197  "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
198  "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"
199  "out float4 FragData0 : COLOR0, uniform samplerRECT tex)\n"
200  "{\n"
201  " float4 v1, v2, gg;\n"
202  " float4 cc = texRECT(tex, TexCC.xy);\n"
203  " gg.x = texRECT(tex, TexLC.xy).r;\n"
204  " gg.y = texRECT(tex, TexRC.xy).r;\n"
205  " gg.z = texRECT(tex, TexCD.xy).r;\n"
206  " gg.w = texRECT(tex, TexCU.xy).r;\n"
207  " float2 dxdy = (gg.yw - gg.xz); \n"
208  " float grad = 0.5*length(dxdy);\n"
209  " float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
210  " FragData0 = float4(cc.rg, grad, theta);\n"
211  "}\n\0");
212 
213 
214  if(GlobalUtil::_SupportFP40)
215  {
216  //use the packing mode for cpu list reshape and two orientations
218 
219  LoadOrientationShader();
220 
221 
222  if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
223 
224  }else
225  {
226  s_orientation = program = new ProgramCG(
227  "void main(out float4 FragColor : COLOR0, \n"
228  " uniform samplerRECT fTex, uniform samplerRECT oTex, \n"
229  " uniform float size, \n"
230  " in float2 tpos : TEXCOORD0){\n"
231  " float4 cc = texRECT(fTex, tpos);\n"
232  " float4 oo = texRECT(oTex, cc.rg);\n"
233  " FragColor = float4(cc.rg, oo.a, size);}");
234  _param_orientation_gtex= cgGetNamedParameter(*program, "oTex");
235  _param_orientation_size= cgGetNamedParameter(*program, "size");
236 
237 
240  GlobalUtil::_MaxOrientation = 0; //0 for simplified version
242  std::cerr<<"Orientation simplified on this hardware"<<endl;
243  std::cerr<<"Descriptor ignored on this hardware"<<endl;
244  }
245 
246 
247 }
248 
249 void ShaderBagCG::LoadDisplayShaders()
250 {
251  s_copy_key = new ProgramCG(
252  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
253  "FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1); }");
254 
255  //shader used to write a vertex buffer object
256  //which is used to draw the quads of each feature
257  ProgramCG * program;
258  s_vertex_list = program = new ProgramCG(
259  "void main(in float4 TexCoord0: TEXCOORD0,\n"
260  "uniform float4 sizes, \n"
261  "uniform samplerRECT tex, \n"
262  "out float4 FragColor: COLOR0){\n"
263  "float fwidth = sizes.y; \n"
264  "float twidth = sizes.z; \n"
265  "float rwidth = sizes.w; \n"
266  "float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"
267  "float px = fmod(index, twidth);\n"
268  "float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"
269  "float4 cc = texRECT(tex, tpos );\n"
270  "float size = cc.a * 3.0f;//sizes.x;// \n"
271  "FragColor.zw = float2(0.0, 1.0);\n"
272  "if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"
273  "{\n"
274  " float type = frac(px);\n"
275  " float2 dxy; float s, c;\n"
276  " dxy.x = type < 0.1 ? 0 : ((type <0.5 || type > 0.9)? size : -size);\n"
277  " dxy.y = type < 0.2 ? 0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
278  " sincos(cc.b, s, c);\n"
279  " FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
280  " FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
281  "}\n\0");
282  /*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/
283 
284  _param_genvbo_size = cgGetNamedParameter(*program, "sizes");
285 
286 
287  s_display_gaussian = new ProgramCG(
288  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
289  "float r = texRECT(tex, TexCoord0.xy).r;\n"
290  "FragColor = float4(r, r, r, 1.0);}");
291 
292 
293  s_display_dog = new ProgramCG(
294  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
295  "float g = (0.5+20.0*texRECT(tex, TexCoord0.xy).g);\n"
296  "FragColor = float4(g, g, g, 1.0);}" );
297 
298 
299  s_display_grad = new ProgramCG(
300  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
301  "float4 cc = texRECT(tex, TexCoord0.xy); FragColor = float4(5.0 * cc.bbb, 1.0); }");
302 
303 
304  s_display_keys= new ProgramCG(
305  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
306  "float4 cc = texRECT(tex, TexCoord0.xy);\n"
307  "if(cc.r ==1.0) FragColor = float4(1.0, 0, 0,1.0); \n"
308  "else {if (cc.r ==0.5) FragColor = float4(0.0,1.0,0.0,1.0); else discard;}}");
309 
310 }
311 
312 void ShaderBagCG::SetMarginCopyParam(int xmax, int ymax)
313 {
314  float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
315  cgGLSetParameter2fv(_param_margin_copy_truncate, truncate);
316 }
317 
318 
319 int ShaderBagCG::LoadKeypointShaderMR(float threshold, float edge_threshold)
320 {
321  char buffer[10240];
322  float threshold0 = threshold * 0.8f;
323  float threshold1 = threshold;
324  float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
325  int max_refine = max(2, GlobalUtil::_SubpixelLocalization);
326  ostrstream out(buffer, 10240);
327 
328  out << "#define THRESHOLD0 " << threshold0 << "\n"
329  "#define THRESHOLD1 " << threshold1 << "\n"
330  "#define THRESHOLD2 " << threshold2 << "\n"
331  "#define MAX_REFINE " << max_refine << "\n";
332  out<<
333  "void main (\n"
334  "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
335  "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
336  "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
337  "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
338  "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
339  "uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"
340  "{\n"
341  " float4 v1, v2, gg;\n"
342  " float2 TexRU = float2(TexRC.x, TexCU.y); \n"
343  " float4 cc = texRECT(tex, TexCC.xy);\n"
344  " v1.x = texRECT(tex, TexLC.xy).g;\n"
345  " gg.x = texRECT(tex, TexLC.xy).r;\n"
346  " v1.y = texRECT(tex, TexRC.xy).g;\n"
347  " gg.y = texRECT(tex, TexRC.xy).r;\n"
348  " v1.z = texRECT(tex, TexCD.xy).g;\n"
349  " gg.z = texRECT(tex, TexCD.xy).r;\n"
350  " v1.w = texRECT(tex, TexCU.xy).g;\n"
351  " gg.w = texRECT(tex, TexCU.xy).r;\n"
352  " v2.x = texRECT(tex, TexLD.xy).g;\n"
353  " v2.y = texRECT(tex, TexLU.xy).g;\n"
354  " v2.z = texRECT(tex, TexRD.xy).g;\n"
355  " v2.w = texRECT(tex, TexRU.xy).g;\n"
356  " float2 dxdy = 0.5*(gg.yw - gg.xz); \n"
357  " float grad = length(dxdy);\n"
358  " float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
359  " FragData0 = float4(cc.rg, grad, theta);\n"
360  <<
361  " float dog = 0.0; \n"
362  " FragData1 = float4(0, 0, 0, 0); \n"
363  " float2 v3; float4 v4, v5, v6;\n"
364  <<
365  " if( cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2)))\n"
366  " {\n"
367  " v3.x = texRECT(texU, TexCC.xy).g;\n"
368  " v4.x = texRECT(texU, TexLC.xy).g;\n"
369  " v4.y = texRECT(texU, TexRC.xy).g;\n"
370  " v4.z = texRECT(texU, TexCD.xy).g;\n"
371  " v4.w = texRECT(texU, TexCU.xy).g;\n"
372  " v6.x = texRECT(texU, TexLD.xy).g;\n"
373  " v6.y = texRECT(texU, TexLU.xy).g;\n"
374  " v6.z = texRECT(texU, TexRD.xy).g;\n"
375  " v6.w = texRECT(texU, TexRU.xy).g;\n"
376  " if(cc.g < v3.x || any(cc.gggg<v4.xyzw || cc.gggg<v6.xyzw))return; \n"
377  " v3.y = texRECT(texD, TexCC.xy).g;\n"
378  " v5.x = texRECT(texD, TexLC.xy).g;\n"
379  " v5.y = texRECT(texD, TexRC.xy).g;\n"
380  " v5.z = texRECT(texD, TexCD.xy).g;\n"
381  " v5.w = texRECT(texD, TexCU.xy).g;\n"
382  " v6.x = texRECT(texD, TexLD.xy).g;\n"
383  " v6.y = texRECT(texD, TexLU.xy).g;\n"
384  " v6.z = texRECT(texD, TexRD.xy).g;\n"
385  " v6.w = texRECT(texD, TexRU.xy).g;\n"
386  " if(cc.g < v3.y || any(cc.gggg<v5.xyzw || cc.gggg<v6.xyzw))return; \n"
387  " dog = 1.0; \n"
388  " }\n"
389  //the minimum case
390  <<
391  " else if(cc.g < -THRESHOLD0 && all(cc.gggg < min(v1, v2)))\n"
392  " {\n"
393  " v3.x = texRECT(texU, TexCC.xy).g;\n"
394  " v4.x = texRECT(texU, TexLC.xy).g;\n"
395  " v4.y = texRECT(texU, TexRC.xy).g;\n"
396  " v4.z = texRECT(texU, TexCD.xy).g;\n"
397  " v4.w = texRECT(texU, TexCU.xy).g;\n"
398  " v6.x = texRECT(texU, TexLD.xy).g;\n"
399  " v6.y = texRECT(texU, TexLU.xy).g;\n"
400  " v6.z = texRECT(texU, TexRD.xy).g;\n"
401  " v6.w = texRECT(texU, TexRU.xy).g;\n"
402  " if(cc.g > v3.x || any(cc.gggg>v4.xyzw || cc.gggg>v6.xyzw))return; \n"
403  " v3.y = texRECT(texD, TexCC.xy).g;\n"
404  " v5.x = texRECT(texD, TexLC.xy).g;\n"
405  " v5.y = texRECT(texD, TexRC.xy).g;\n"
406  " v5.z = texRECT(texD, TexCD.xy).g;\n"
407  " v5.w = texRECT(texD, TexCU.xy).g;\n"
408  " v6.x = texRECT(texD, TexLD.xy).g;\n"
409  " v6.y = texRECT(texD, TexLU.xy).g;\n"
410  " v6.z = texRECT(texD, TexRD.xy).g;\n"
411  " v6.w = texRECT(texD, TexRU.xy).g;\n"
412  " if(cc.g > v3.y || any(cc.gggg>v5.xyzw || cc.gggg>v6.xyzw))return; \n"
413  " dog = 0.5 ; \n"
414  " }\n"
415  " else\n"
416  " return;\n"
417  <<
418  " int i = 0; \n"
419  " float2 offset = float2(0, 0);\n"
420  " float2 offsets = float2(0, 0);\n"
421  " float3 dxys; bool key_moved; \n"
422  " float fx, fy, fs; \n"
423  " float fxx, fyy, fxy; \n"
424  " float fxs, fys, fss; \n"
425  " do\n"
426  " {\n"
427  " dxys = float3(0, 0, 0);\n"
428  " offset = float2(0, 0);\n"
429  " float4 D2 = v1.xyzw - cc.gggg;\n"
430  " fxx = D2.x + D2.y;\n"
431  " fyy = D2.z + D2.w;\n"
432  " float2 D4 = v2.xw - v2.yz;\n"
433  " fxy = 0.25*(D4.x + D4.y);\n"
434  " float2 D5 = 0.5*(v1.yw-v1.xz); \n"
435  " fx = D5.x;\n"
436  " fy = D5.y ; \n"
437  " fs = 0.5*( v3.x - v3.y ); \n"
438  " fss = v3.x + v3.y - cc.g - cc.g;\n"
439  " fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
440  " fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
441  " float4 A0, A1, A2 ; \n"
442  " A0 = float4(fxx, fxy, fxs, -fx); \n"
443  " A1 = float4(fxy, fyy, fys, -fy); \n"
444  " A2 = float4(fxs, fys, fss, -fs); \n"
445  " float3 x3 = abs(float3(fxx, fxy, fxs)); \n"
446  " float maxa = max(max(x3.x, x3.y), x3.z); \n"
447  " if(maxa > 1e-10 ) \n"
448  " {\n"
449  " if(x3.y ==maxa ) \n"
450  " { \n"
451  " float4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
452  " }else if( x3.z == maxa ) \n"
453  " { \n"
454  " float4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
455  " } \n"
456  " A0 /= A0.x; \n"
457  " A1 -= A1.x * A0; \n"
458  " A2 -= A2.x * A0; \n"
459  " float2 x2 = abs(float2(A1.y, A2.y)); \n"
460  " if( x2.y > x2.x ) \n"
461  " { \n"
462  " float3 TEMP = A2.yzw; \n"
463  " A2.yzw = A1.yzw; \n"
464  " A1.yzw = TEMP; \n"
465  " x2.x = x2.y; \n"
466  " } \n"
467  " if(x2.x > 1e-10) \n"
468  " {\n"
469  " A1.yzw /= A1.y; \n"
470  " A2.yzw -= A2.y * A1.yzw; \n"
471  " if(abs(A2.z) > 1e-10) \n"
472  " {\n"
473  // compute dx, dy, ds:
474  <<
475  " dxys.z = A2.w /A2.z; \n"
476  " dxys.y = A1.w - dxys.z*A1.z; \n"
477  " dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y; \n"
478  " }\n"
479  " }\n"
480  " }\n"
481  " offset.x = dxys.x > 0.6 ? 1 : 0 + dxys.x < -0.6 ? -1 : 0;\n"
482  " offset.y = dxys.y > 0.6 ? 1 : 0 + dxys.y < - 0.6? -1 : 0;\n"
483  " i++; key_moved = i < MAX_REFINE && any(abs(offset)>0) ; \n"
484  " if(key_moved)\n"
485  " {\n"
486  " offsets += offset; \n"
487  " cc = texRECT(tex, TexCC.xy + offsets);\n"
488  " v1.x = texRECT(tex , TexLC.xy + offsets).g;\n"
489  " v1.y = texRECT(tex , TexRC.xy + offsets).g;\n"
490  " v1.z = texRECT(tex , TexCD.xy + offsets).g;\n"
491  " v1.w = texRECT(tex , TexCU.xy + offsets).g;\n"
492  " v2.x = texRECT(tex , TexLD.xy + offsets).g;\n"
493  " v2.y = texRECT(tex , TexLU.xy + offsets).g;\n"
494  " v2.z = texRECT(tex , TexRD.xy + offsets).g;\n"
495  " v2.w = texRECT(tex , TexRU.xy + offsets).g;\n"
496  " v3.x = texRECT(texU, TexCC.xy + offsets).g;\n"
497  " v4.x = texRECT(texU, TexLC.xy + offsets).g;\n"
498  " v4.y = texRECT(texU, TexRC.xy + offsets).g;\n"
499  " v4.z = texRECT(texU, TexCD.xy + offsets).g;\n"
500  " v4.w = texRECT(texU, TexCU.xy + offsets).g;\n"
501  " v3.y = texRECT(texD, TexCC.xy + offsets).g;\n"
502  " v5.x = texRECT(texD, TexLC.xy + offsets).g;\n"
503  " v5.y = texRECT(texD, TexRC.xy + offsets).g;\n"
504  " v5.z = texRECT(texD, TexCD.xy + offsets).g;\n"
505  " v5.w = texRECT(texD, TexCU.xy + offsets).g;\n"
506  " }\n"
507  " }while(key_moved);\n"
508  <<
509  " bool test1 = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys ))> THRESHOLD1) ;\n"
510  " float test2_v1= fxx*fyy - fxy *fxy; \n"
511  " float test2_v2 = (fxx+fyy); \n"
512  " test2_v2 = test2_v2*test2_v2;\n"
513  " bool test2 = test2_v1>0 && test2_v2 < THRESHOLD2 * test2_v1; \n "
514  //keep the point when the offset is less than 1
515  <<
516  " FragData1 = test1 && test2 && all( abs(dxys) < 1)? float4( dog, dxys.xy+offsets, dxys.z) : float4(0, 0, 0, 0); \n"
517  "}\n"
518  <<'\0';
519 
520  ProgramCG * program;
521  s_keypoint = program = new ProgramCG(buffer);
522  //parameter
523  _param_dog_texu = cgGetNamedParameter(*program, "texU");
524  _param_dog_texd = cgGetNamedParameter(*program, "texD");
525 
526  return 1;
527 
528 }
529 
530 //keypoint detection shader
531 //1. compare with 26 neighbours
532 //2. sub-pixel sub-scale localization
533 //3. output: [dog, offset(x,y,s)]
534 
535 void ShaderBagCG:: LoadKeypointShader(float threshold, float edge_threshold)
536 {
537  char buffer[10240];
538  float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
539  float threshold1 = threshold;
540  float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
541  ostrstream out(buffer, 10240);
542  out<<setprecision(8);
543  streampos pos;
544  //tex(X)(Y)
545  //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
546  //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
547 
548  out << "#define THRESHOLD0 " << threshold0 << "\n"
549  "#define THRESHOLD1 " << threshold1 << "\n"
550  "#define THRESHOLD2 " << threshold2 << "\n";
551  out<<
552  "void main (\n"
553  "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
554  "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
555  "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
556  "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
557  "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
558  "uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"
559  "{\n"
560  " float4 v1, v2, gg;\n"
561  " float2 TexRU = float2(TexRC.x, TexCU.y); \n"
562  " float4 cc = texRECT(tex, TexCC.xy);\n"
563  " v1.x = texRECT(tex, TexLC.xy).g;\n"
564  " gg.x = texRECT(tex, TexLC.xy).r;\n"
565  " v1.y = texRECT(tex, TexRC.xy).g;\n"
566  " gg.y = texRECT(tex, TexRC.xy).r;\n"
567  " v1.z = texRECT(tex, TexCD.xy).g;\n"
568  " gg.z = texRECT(tex, TexCD.xy).r;\n"
569  " v1.w = texRECT(tex, TexCU.xy).g;\n"
570  " gg.w = texRECT(tex, TexCU.xy).r;\n"
571  " v2.x = texRECT(tex, TexLD.xy).g;\n"
572  " v2.y = texRECT(tex, TexLU.xy).g;\n"
573  " v2.z = texRECT(tex, TexRD.xy).g;\n"
574  " v2.w = texRECT(tex, TexRU.xy).g;\n"
575  " float2 dxdy = (gg.yw - gg.xz); \n"
576  " float grad = 0.5*length(dxdy);\n"
577  " float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
578  " FragData0 = float4(cc.rg, grad, theta);\n"
579 
580  //test against 8 neighbours
581  //use variable to identify type of extremum
582  //1.0 for local maximum and 0.5 for minimum
583  <<
584  " float dog = 0.0; \n"
585  " FragData1 = float4(0, 0, 0, 0); \n"
586  " dog = cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2))?1.0: 0.0;\n"
587  " dog = cc.g < -THRESHOLD0 && all(cc.gggg < min(v1, v2))?0.5: dog;\n";
588 
589  pos = out.tellp();
590  //do edge supression first..
591  //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
592  //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
593 
594  out<<
595  " if(dog == 0.0) return;\n"
596  " float fxx, fyy, fxy; \n"
597  " float4 D2 = v1.xyzw - cc.gggg;\n"
598  " float2 D4 = v2.xw - v2.yz;\n"
599  " fxx = D2.x + D2.y;\n"
600  " fyy = D2.z + D2.w;\n"
601  " fxy = 0.25*(D4.x + D4.y);\n"
602  " float fxx_plus_fyy = fxx + fyy;\n"
603  " float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
604  " float score_down = (fxx*fyy - fxy*fxy);\n"
605  " if( score_down <= 0 || score_up > THRESHOLD2 * score_down)return;\n"
606  //...
607  <<
608  " float2 D5 = 0.5*(v1.yw-v1.xz); \n"
609  " float fx = D5.x, fy = D5.y ; \n"
610  " float fs, fss , fxs, fys ; \n"
611  " float2 v3; float4 v4, v5, v6;\n"
612  //read 9 pixels of upper level
613  <<
614  " v3.x = texRECT(texU, TexCC.xy).g;\n"
615  " v4.x = texRECT(texU, TexLC.xy).g;\n"
616  " v4.y = texRECT(texU, TexRC.xy).g;\n"
617  " v4.z = texRECT(texU, TexCD.xy).g;\n"
618  " v4.w = texRECT(texU, TexCU.xy).g;\n"
619  " v6.x = texRECT(texU, TexLD.xy).g;\n"
620  " v6.y = texRECT(texU, TexLU.xy).g;\n"
621  " v6.z = texRECT(texU, TexRD.xy).g;\n"
622  " v6.w = texRECT(texU, TexRU.xy).g;\n"
623  //compare with 9 pixels of upper level
624  //read and compare with 9 pixels of lower level
625  //the maximum case
626  <<
627  " if(dog == 1.0)\n"
628  " {\n"
629  " bool4 test = cc.gggg < max(v4, v6); \n"
630  " if(cc.g < v3.x || any(test.xy||test.zw))return; \n"
631  " v3.y = texRECT(texD, TexCC.xy).g;\n"
632  " v5.x = texRECT(texD, TexLC.xy).g;\n"
633  " v5.y = texRECT(texD, TexRC.xy).g;\n"
634  " v5.z = texRECT(texD, TexCD.xy).g;\n"
635  " v5.w = texRECT(texD, TexCU.xy).g;\n"
636  " v6.x = texRECT(texD, TexLD.xy).g;\n"
637  " v6.y = texRECT(texD, TexLU.xy).g;\n"
638  " v6.z = texRECT(texD, TexRD.xy).g;\n"
639  " v6.w = texRECT(texD, TexRU.xy).g;\n"
640  " test = cc.gggg<max(v5, v6); \n"
641  " if(cc.g < v3.y || any(test.xy||test.zw))return; \n"
642  " }\n"
643  //the minimum case
644  <<
645  " else{\n"
646  " bool4 test = cc.gggg>min(v4, v6); \n"
647  " if(cc.g > v3.x || any(test.xy||test.zw))return; \n"
648  " v3.y = texRECT(texD, TexCC.xy).g;\n"
649  " v5.x = texRECT(texD, TexLC.xy).g;\n"
650  " v5.y = texRECT(texD, TexRC.xy).g;\n"
651  " v5.z = texRECT(texD, TexCD.xy).g;\n"
652  " v5.w = texRECT(texD, TexCU.xy).g;\n"
653  " v6.x = texRECT(texD, TexLD.xy).g;\n"
654  " v6.y = texRECT(texD, TexLU.xy).g;\n"
655  " v6.z = texRECT(texD, TexRD.xy).g;\n"
656  " v6.w = texRECT(texD, TexRU.xy).g;\n"
657  " test = cc.gggg>min(v5, v6); \n"
658  " if(cc.g > v3.y || any(test.xy||test.zw))return; \n"
659  " }\n";
660 
662 
663  // sub-pixel localization FragData1 = float4(dog, 0, 0, 0); return;
664  out <<
665  " fs = 0.5*( v3.x - v3.y ); //bug fix 9/12/2007 \n"
666  " fss = v3.x + v3.y - cc.g - cc.g;\n"
667  " fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
668  " fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
669 
671  // let dog difference be quatratic function of dx, dy, ds;
672  // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds +
673  // + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
674  // + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
675  // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
676 
677  //the local extremum satisfies
678  // df/dx = 0, df/dy = 0, df/dz = 0
679 
680  //that is
681  // |-fx| | fxx fxy fxs | |dx|
682  // |-fy| = | fxy fyy fys | * |dy|
683  // |-fs| | fxs fys fss | |ds|
684  // need to solve dx, dy, ds
685 
686  // Use Gauss elimination to solve the linear system
687  <<
688  " float3 dxys = float3(0.0); \n"
689  " float4 A0, A1, A2 ; \n"
690  " A0 = float4(fxx, fxy, fxs, -fx); \n"
691  " A1 = float4(fxy, fyy, fys, -fy); \n"
692  " A2 = float4(fxs, fys, fss, -fs); \n"
693  " float3 x3 = abs(float3(fxx, fxy, fxs)); \n"
694  " float maxa = max(max(x3.x, x3.y), x3.z); \n"
695  " if(maxa >= 1e-10 ) { \n"
696  " if(x3.y ==maxa ) \n"
697  " { \n"
698  " float4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
699  " }else if( x3.z == maxa ) \n"
700  " { \n"
701  " float4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
702  " } \n"
703  " A0 /= A0.x; \n"
704  " A1 -= A1.x * A0; \n"
705  " A2 -= A2.x * A0; \n"
706  " float2 x2 = abs(float2(A1.y, A2.y)); \n"
707  " if( x2.y > x2.x ) \n"
708  " { \n"
709  " float3 TEMP = A2.yzw; \n"
710  " A2.yzw = A1.yzw; \n"
711  " A1.yzw = TEMP; \n"
712  " x2.x = x2.y; \n"
713  " } \n"
714  " if(x2.x >= 1e-10) { \n"
715  " A1.yzw /= A1.y; \n"
716  " A2.yzw -= A2.y * A1.yzw; \n"
717  " if(abs(A2.z) >= 1e-10) { \n"
718  // compute dx, dy, ds:
719  <<
720  " dxys.z = A2.w /A2.z; \n"
721  " dxys.y = A1.w - dxys.z*A1.z; \n"
722  " dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y; \n"
723 
724  //one more threshold which I forgot in versions prior to 286
725  <<
726  " bool bugfix_test = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys )) < THRESHOLD1) ;\n"
727  " if(bugfix_test || any(abs(dxys) >= 1.0)) dog = 0; \n"
728  " }}}\n"
729  //keep the point when the offset is less than 1
730  <<
731  " FragData1 = float4( dog, dxys); \n"
732  "}\n" <<'\0';
733 
734  else out<<
735  " FragData1 = float4( dog, 0, 0, 0) ; \n"
736  "}\n" <<'\0';
737 
738  ProgramCG * program;
739  s_keypoint = program = new ProgramCG(buffer);
740  if(!program->IsValidProgram())
741  {
742  delete program;
743  out.seekp(pos);
744  out <<
745  " FragData1 = float4( fabs(cc.g) > 2.0 * THRESHOLD0? dog : 0, 0, 0, 0) ; \n"
746  "}\n" <<'\0';
747  s_keypoint = program = new ProgramCG(buffer);
749  std::cerr<<"Detection simplified on this hardware"<<endl;
750  }
751  //parameter
752  _param_dog_texu = cgGetNamedParameter(*program, "texU");
753  _param_dog_texd = cgGetNamedParameter(*program, "texD");
754 
755 
756 
757 
758 }
759 
760 
761 void ShaderBagCG::SetDogTexParam(int texU, int texD)
762 {
763  cgGLSetTextureParameter(_param_dog_texu, texU);
764  cgGLEnableTextureParameter(_param_dog_texu);
765  cgGLSetTextureParameter(_param_dog_texd, texD);
766  cgGLEnableTextureParameter(_param_dog_texd);
767 }
768 
769 void ShaderBagCG::SetGenListStepParam(int tex, int tex0)
770 {
771  cgGLSetTextureParameter(_param_genlist_step_tex, tex);
772  cgGLEnableTextureParameter(_param_genlist_step_tex);
773  cgGLSetTextureParameter(_param_genlist_step_tex0, tex0);
774  cgGLEnableTextureParameter(_param_genlist_step_tex0);
775 }
776 
777 void ShaderBagCG::SetGenVBOParam(float width, float fwidth, float size)
778 {
779  float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
780  cgGLSetParameter4fv(_param_genvbo_size, sizes);
781 }
782 
783 
784 ProgramGPU* FilterGLCG::CreateFilterH(float kernel[], float offset[], int width)
785 {
786 
787 
788  char buffer[10240];
789  ostrstream out(buffer, 10240);
790 
791  out<<setprecision(8);
792 
793  if(GlobalUtil::_BetaFilter)
794  {
795  out<< "void main(uniform samplerRECT tex,";
796  out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
797  out<<"\n\tout float4 FragColor : COLOR0 )";
798  out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";
799  out<<"float or = texRECT(tex, TexCoord0.xy).r, intensity;\n";
800 
801  for(int i = 0; i< width; i+=4)
802  {
803  out <<"data = float4(";
804  for(int j = i; j < i + 4; j++)
805  {
806  if(j != i) out <<", \n";
807  if(j >= width)
808  {
809  out<<"0";
810  }else if(offset[j]==0.0)
811  {
812  out<<"or";
813  }else
814  {
815  out<<"texRECT(tex, TexCoord0.xy + float2(float("<<offset[j] <<") , 0)).r";
816  }
817  }
818  out << ");\n";
819  out << "intensity4 += data * float4(";
820  for(int k = i; k < i + 4; k++)
821  {
822  if(k != i) out <<", ";
823  if(k >= width) out<<"0";
824  else out<<kernel[k];
825  }
826  out << ");\n";
827 
828  }
829  out << "intensity4.xy += intensity4.zw;\n";
830  out << "intensity = intensity4.x + intensity4.y;\n";
831  }else
832  {
833  out<< "void main(uniform samplerRECT tex,";
834  out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
835  out<<"\n\tout float4 FragColor : COLOR0 )";
836  out<<"\n{\n\tfloat intensity = 0.0 ; float2 pos;\n";
837 
838  for(int i = 0; i< width; i++)
839  {
840  if(offset[i]==0.0)
841  {
842  out<<"float or = texRECT(tex, TexCoord0.xy).r;\n";
843  out<<"intensity+= or * "<<kernel[i]<<";\n";
844 
845  }else
846  {
847  out<<"pos = TexCoord0.xy + float2(float("<<offset[i] <<") , 0);\n";
848  out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).r;\n";
849  }
850  }
851  }
852  //copy original data to red channel
853  out<<"FragColor.r = or;\n";
854  out<<"FragColor.b = intensity;}\n"<<'\0';
855 
856  return new ProgramCG( buffer);
857 }
858 
859 
860 ProgramGPU* FilterGLCG::CreateFilterV(float kernel[], float offset[], int height)
861 {
862  char buffer[10240];
863  ostrstream out(buffer, 10240);
864  out<<setprecision(8);
865 
866  if(GlobalUtil::_BetaFilter)
867  {
868  out<< "void main(uniform samplerRECT tex,";
869  out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
870  out<<"\n\tout float4 FragColor : COLOR0 )";
871  out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";
872  out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb; float intensity;\n";
873 
874  for(int i = 0; i< height; i+=4)
875  {
876  out <<"data = float4(";
877  for(int j = i; j < i + 4; j++)
878  {
879  if(j != i) out <<", \n";
880  if(j >= height)
881  {
882  out<<"0";
883  }else if(offset[j]==0.0)
884  {
885  out<<"orb.y";
886  }else
887  {
888  out<<"texRECT(tex, TexCoord0.xy + float2(0, float("<<offset[j] <<"))).b";
889  }
890  }
891  out << ");\n";
892  out << "intensity4 += data * float4(";
893  for(int k = i; k < i + 4; k++)
894  {
895  if(k != i) out <<", ";
896  if(k >= height) out<<"0";
897  else out<<kernel[k];
898  }
899  out << ");\n";
900 
901  }
902  out << "intensity4.xy += intensity4.zw;\n";
903  out << "intensity = intensity4.x + intensity4.y;\n";
904  }else
905  {
906  out<< "void main(uniform samplerRECT tex,";
907  out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
908  out<<"\n\tout float4 FragColor : COLOR0 )";
909  out<<"\n{\n\tfloat intensity = 0.0 ; float2 pos;\n";
910 
911  for(int i = 0; i< height; i++)
912  {
913  if(offset[i]==0.0)
914  {
915  out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb;\n";
916  out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
917 
918  }else
919  {
920  out<<"pos = TexCoord0.xy + float2(0, float("<<offset[i] <<"));\n";
921  out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).b;\n";
922  }
923  }
924  }
925  out<<"FragColor.b = orb.y;\n";
926  out<<"FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
927  out<<"FragColor.r = intensity;}\n"<<'\0';
928 
929  return new ProgramCG( buffer);
930 }
931 
932 
933 ProgramGPU* FilterGLCG::CreateFilterHPK(float kernel[], float offset[], int width)
934 {
935  //both h and v are packed...
936  int i, j , xw, xwn;
937  int halfwidth = width >>1;
938  float * pf = kernel + halfwidth;
939  int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up
940  int npixel = (nhpixel<<1)+1;//
941  char buffer[10240];
942  float weight[3];
943  ostrstream out(buffer, 10240);
944  out<<setprecision(8);
945 
946  out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";
947  out<< "float4 result = float4(0, 0, 0, 0); \nfloat4 pc; float2 coord; \n";
949  for( i = 0 ; i < npixel ; i++)
950  {
951 
952  out<<"coord = TexCoord0.xy + float2(float("<<i-nhpixel<<"),0);\n";
953  out<<"pc=texRECT(tex, coord);\n";
954  if(GlobalUtil::_PreciseBorder) out<<"if(coord.x < 0) pc = pc.rrbb;\n";
955 
956  //for each sub-pixel j in center, the weight of sub-pixel k
957  xw = (i - nhpixel)*2;
958  for( j = 0; j < 3; j++)
959  {
960  xwn = xw + j -1;
961  weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
962  }
963  //if(weight[1]!=0.0) out<<"FragColor += "<<weight[1]<<"*pc;\n";
964  //out<<"FragColor += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
965 
966  if(weight[1] == 0.0)
967  {
968  out<<"result += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
969  }
970  else
971  {
972  out<<"result += float4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
973  out<<"result += float4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
974  }
975 
976  }
977  out<<
978  " FragColor = result; }\n"<<'\0';
979  return new ProgramCG( buffer);
980 }
981 
982 ProgramGPU* FilterGLCG::CreateFilterVPK(float kernel[], float offset[], int height)
983 {
984 
985  //both h and v are packed...
986  int i, j , yw, ywn;
987  int halfh = height >>1;
988  float * pf = kernel + halfh;
989  int nhpixel = (halfh+1)>>1; //how many neighbour pixels need to be looked up
990  int npixel = (nhpixel<<1)+1;//
991  char buffer[10240];
992  float weight[3];
993  ostrstream out(buffer, 10240);
994  out<<setprecision(8);
995 
996  out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";
997  out<< "float4 result = float4(0, 0, 0, 0);\nfloat4 pc; float2 coord;\n";
999 
1000  for( i = 0 ; i < npixel ; i++)
1001  {
1002 
1003  out<<"coord = TexCoord0.xy + float2(0, float("<<i-nhpixel<<"));\n";
1004  out<<"pc=texRECT(tex, coord);\n";
1005  if(GlobalUtil::_PreciseBorder) out<<"if(coord.y < 0) pc = pc.rgrg;\n";
1006  //for each sub-pixel j in center, the weight of sub-pixel k
1007  yw = (i - nhpixel)*2;
1008  for( j = 0; j < 3; j++)
1009  {
1010  ywn = yw + j -1;
1011  weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
1012  }
1013  //if(weight[1]!=0.0) out<<"FragColor += "<<weight[1]<<"*pc;\n";
1014  //out<<"FragColor += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
1015  if(weight[1] == 0.0)
1016  {
1017  out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
1018  }else
1019  {
1020  out<<"result += float4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
1021  out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
1022  }
1023  }
1024  out<<
1025  " FragColor = result; }\n"<<'\0';
1026  return new ProgramCG( buffer);
1027 }
1028 
1029 
1030 void ShaderBagCG::LoadGenListShader(int ndoglev, int nlev)
1031 {
1032  ProgramCG * program;
1033 
1034  s_genlist_init_tight = new ProgramCG(
1035  "void main (\n"
1036  "uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
1037  "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
1038  "out float4 FragColor : COLOR0){\n"
1039  "float4 helper = float4( texRECT(tex, TexCoord0.xy).r, texRECT(tex, TexCoord1.xy).r,\n"
1040  "texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"
1041  "FragColor = float4(helper>0.0);\n"
1042  "}");
1043 
1044  s_genlist_init_ex = program = new ProgramCG(
1045  "void main (uniform float2 bbox, \n"
1046  "uniform samplerRECT tex, \n"
1047  "in float4 TexCoord0 : TEXCOORD0,\n"
1048  "in float4 TexCoord1 : TEXCOORD1, \n"
1049  "in float4 TexCoord2 : TEXCOORD2, \n"
1050  "in float4 TexCoord3 : TEXCOORD3,\n"
1051  "out float4 FragColor : COLOR0){\n"
1052  "float4 helper = float4( \n"
1053  "texRECT(tex, TexCoord0.xy).r, texRECT(tex, TexCoord1.xy).r,\n"
1054  "texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"
1055  "bool4 helper4 = bool4(TexCoord0.xy < bbox, TexCoord3.xy < bbox); \n"
1056  "bool4 helper2 = helper4.xzxz && helper4.yyww; \n"
1057  "FragColor = float4(helper2 && (helper>0.0 ));\n"
1058  "}");
1059  _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
1060 
1061 
1062  //reduction ...
1063  s_genlist_histo = new ProgramCG(
1064  "void main (\n"
1065  "uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
1066  "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, in float2 TexCoord3 : TEXCOORD3,\n"
1067  "out float4 FragColor : COLOR0){\n"
1068  "float4 helper; float4 helper2; \n"
1069  "helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"
1070  "helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"
1071  "FragColor.rg = helper2.xz + helper2.yw;\n"
1072  "helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"
1073  "helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"
1074  "FragColor.ba= helper2.xz+helper2.yw;\n"
1075  "}");
1076 
1077 
1078  //read of the first part, which generates tex coordinates
1079 
1080  s_genlist_start= program = LoadGenListStepShader(1, 1);
1081  _param_ftex_width= cgGetNamedParameter(*program, "width");
1082  _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
1083  //stepping
1084  s_genlist_step = program = LoadGenListStepShader(0, 1);
1085  _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
1086  _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
1087 
1088 
1089 }
1090 
1091 ProgramCG* ShaderBagCG::LoadGenListStepShader(int start, int step)
1092 {
1093  int i;
1094  char buffer[10240];
1095  //char chanels[5] = "rgba";
1096  ostrstream out(buffer, 10240);
1097  out<<"void main(out float4 FragColor : COLOR0, \n";
1098 
1099  for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n";
1100 
1101  if(start)
1102  {
1103  out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";
1104  out<<"float index = floor(tpos.y) * width + floor(tpos.x) + 0.0001;\n";
1105  out<<"float2 pos = float2(0.5, 0.5);\n";
1106  }else
1107  {
1108  out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";
1109  out<<"float4 tc = texRECT( tex, tpos);\n";
1110  out<<"float2 pos = tc.rg; float index = tc.b;\n";
1111  }
1112  out<<"float2 sum; float4 cc;\n";
1113 
1114 
1115 
1116  if(step>0)
1117  {
1118  out<<"float2 cpos = float2(-0.5, 0.5);\t float2 opos;\n";
1119  for(i = 0; i < step; i++)
1120  {
1121 //#define SETP_CODE_2
1122 
1123 #ifndef SETP_CODE_2
1124 /* out<<"cc = texRECT(tex"<<i<<", pos);\n";
1125  out<<"float sum3[3] = {cc.r, cc.r + cc.g, cc.r + cc.g + cc.b};\n";
1126  out<<"float3 cmp = float3(index > float3(sum3[0], sum3[1], sum3[2]));\n";
1127  out<<"opos.y = -0.5 + cmp.y; opos.x = -0.5 + cmp.x + (cmp.z - cmp.y);\n";
1128  out<<"index -= dot(cmp, cc.rgb);\n";
1129  out<<"pos = (pos + pos + opos);\n";*/
1130 
1131  out<<"cc = texRECT(tex"<<i<<", pos); sum.x = cc.r + cc.g;\n";
1132  out<<"if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index -= cc.r;}}\n";
1133  out<<"else {index -= sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index -= cc.b;}}";
1134  out<<"pos = (pos + pos + opos);\n";
1135 
1136 /* out<<"cc = texRECT(tex"<<i<<", pos);\n";
1137  out<<"if (index <cc.r){ opos = cpos.xx;}\n";
1138  out<<"else{sum.x = cc.r + cc.g;";
1139  out<<"if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
1140  out<<"else{sum.y = sum.x + cc.b;";
1141  out<<"if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
1142  out<<"else {opos = cpos.yy; index -= sum.y;}}}\n";
1143  out<<"pos = (pos + pos + opos);\n";*/
1144 
1145 #else
1146  out<<"cc = texRECT(tex"<<i<<", pos);\n";
1147  out<<"if (index < cc.r) opos = cpos.xx;\n";
1148  out<<"else if (index < cc.r + cc.g){opos = cpos.yx; index -= cc.r;}\n";
1149  out<<"else if (index < cc.r + cc.g + cc.b){opos = cpos.xy; index -= (cc.r + cc.g);}\n";
1150  out<<"else {opos = cpos.yy; index -= (cc.r + cc.g + cc.b);}\n";
1151  out<<"pos = (pos + pos + opos);\n";
1152 #endif
1153  }
1154  }
1155  out<<"FragColor = float4(pos, index, 1);\n";
1156  out<<"}\n"<<'\0';
1157  return new ProgramCG(buffer);
1158 }
1159 
1160 void ShaderBagCG::SetGenListInitParam(int w, int h)
1161 {
1162  float bbox[2] = {w -1.0f, h - 1.0f};
1163  cgGLSetParameter2fv(_param_genlist_init_bbox, bbox);
1164 }
1165 
1166 void ShaderBagCG::SetGenListStartParam(float width, int tex0)
1167 {
1168  cgGLSetParameter1f(_param_ftex_width, width);
1169 
1170  if(_param_genlist_start_tex0)
1171  {
1172  cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
1173  cgGLEnableTextureParameter(_param_genlist_start_tex0);
1174  }
1175 }
1176 
1177 void ShaderBagCG::LoadDescriptorShaderF2()
1178 {
1179  //one shader outpout 128/8 = 16 , each fragout encodes 4
1180  //const double twopi = 2.0*3.14159265358979323846;
1181  //const double rpi = 8.0/twopi;
1182  char buffer[10240];
1183  ostrstream out(buffer, 10240);
1184 
1185  out<<setprecision(8);
1186 
1187  out<<"\n"
1188  "#define M_PI 3.14159265358979323846\n"
1189  "#define TWO_PI (2.0*M_PI)\n"
1190  "#define RPI 1.2732395447351626861510701069801\n"
1191  "#define WF size.z\n"
1192  "void main(uniform samplerRECT tex, \n"
1193  "uniform samplerRECT gradTex, \n"
1194  "uniform float4 dsize, \n"
1195  "uniform float3 size, \n"
1196  "in float2 TexCoord0 : TEXCOORD0, \n"
1197  "out float4 FragData0:COLOR0, \n"
1198  "out float4 FragData1:COLOR1) \n"
1199  "{\n"
1200  " float2 dim = size.xy; //image size \n"
1201  " float index = dsize.x * floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
1202  " float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5)); \n"
1203  " index = floor(index*0.125) + 0.49; \n"
1204  " float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
1205  " float2 pos = texRECT(tex, coord).xy; \n"
1206  " if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
1207  " //discard; \n"
1208  " { FragData0 = FragData1 = float4(0.0); return; }\n"
1209  " float anglef = texRECT(tex, coord).z;\n"
1210  " if(anglef > M_PI) anglef -= TWO_PI;\n"
1211  " float sigma = texRECT(tex, coord).w; \n"
1212  " float spt = abs(sigma * WF); //default to be 3*sigma \n";
1213 
1214  //rotation
1215  out<<
1216  " float4 cscs, rots; \n"
1217  " sincos(anglef, cscs.y, cscs.x); \n"
1218  " cscs.zw = - cscs.xy; \n"
1219  " rots = cscs /spt; \n"
1220  " cscs *= spt; \n";
1221 
1222  //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
1223  //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
1224  //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
1225  //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
1226  out<<
1227  " float4 temp; float2 pt, offsetpt; \n"
1228  " /*the fraction part of idx is .5*/ \n"
1229  " offsetpt.x = 4.0 * frac(idx*0.25) - 2.0; \n"
1230  " offsetpt.y = floor(idx*0.25) - 1.5; \n"
1231  " temp = cscs.xwyx*offsetpt.xyxy; \n"
1232  " pt = pos + temp.xz + temp.yw; \n";
1233 
1234  //get a horizontal bounding box of the rotated rectangle
1235  out<<
1236  " float2 bwin = abs(cscs.xy); \n"
1237  " float bsz = bwin.x + bwin.y; \n"
1238  " float4 sz; float2 spos; \n"
1239  " sz.xy = max(pt - bsz, float2(1,1));\n"
1240  " sz.zw = min(pt + bsz, dim - 2); \n"
1241  " sz = floor(sz)+0.5;"; //move sample point to pixel center
1242 
1243  //get voting for two box
1244  out<<"\n"
1245  " float4 DA, DB; \n"
1246  " DA = DB = float4(0, 0, 0, 0); \n"
1247  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1248  " { \n"
1249  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1250  " { \n"
1251  " float2 diff = spos - pt; \n"
1252  " temp = rots.xywx * diff.xyxy; \n"
1253  " float2 nxy = (temp.xz + temp.yw); \n"
1254  " float2 nxyn = abs(nxy); \n"
1255  " if(all(nxyn < float2(1.0)))\n"
1256  " {\n"
1257  " float4 cc = texRECT(gradTex, spos); \n"
1258  " float mod = cc.b; float angle = cc.a; \n"
1259  " float theta0 = (anglef - angle)*RPI; \n"
1260  " float theta = theta0 < 0? theta0 + 8.0 : theta0; // fmod(theta0 + 8.0, 8.0); \n"
1261  " diff = nxy + offsetpt.xy; \n"
1262  " float ww = exp(-0.125*dot(diff, diff));\n"
1263  " float2 weights = 1 - nxyn;\n"
1264  " float weight = weights.x * weights.y *mod*ww; \n"
1265  " float theta1 = floor(theta); \n"
1266  " float weight2 = (theta - theta1) * weight; \n"
1267  " float weight1 = weight - weight2;\n"
1268  " DA += float4(theta1 == float4(0, 1, 2, 3))*weight1; \n"
1269  " DA += float4(theta1 == float4(7, 0, 1, 2))*weight2; \n"
1270  " DB += float4(theta1 == float4(4, 5, 6, 7))*weight1; \n"
1271  " DB += float4(theta1 == float4(3, 4, 5, 6))*weight2; \n"
1272  " }\n"
1273  " }\n"
1274  " }\n";
1275 
1276  out<<
1277  " FragData0 = DA; FragData1 = DB;\n"
1278  "}\n"<<'\0';
1279 
1280  ProgramCG * program;
1281  s_descriptor_fp = program = new ProgramCG(buffer);
1282  _param_descriptor_gtex = cgGetNamedParameter(*program, "gradTex");
1283  _param_descriptor_size = cgGetNamedParameter(*program, "size");
1284  _param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");
1285 
1286 
1287 }
1288 
1289 //the shader that computes the descriptors
1290 void ShaderBagCG::LoadDescriptorShader()
1291 {
1293  LoadDescriptorShaderF2();
1294 }
1295 
1296 void ShaderBagCG::LoadOrientationShader()
1297 {
1298 
1299  char buffer[10240];
1300  ostrstream out(buffer,10240);
1301 
1302 
1303  out<<"\n"
1304  "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
1305  "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1306  "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1307  "void main(uniform samplerRECT tex, \n"
1308  "uniform samplerRECT gradTex, \n"
1309  " uniform float4 size, \n"
1310  " in float2 TexCoord0 : TEXCOORD0, \n"
1311  " out float4 FeatureData : COLOR0 ";
1312 
1313  //multi orientation output
1314  //use one additional texture to store up to four orientations
1315  //when we use one 32bit float to store two orientations, no extra texture is required
1316 
1318  out<<", out float4 OrientationData : COLOR1";
1319 
1321  {
1322  //data for sub-pixel localization
1323  out<<", uniform samplerRECT texS";
1324  }
1325 
1326  //use 9 float4 to store histogram of 36 directions
1327  out<<") \n"
1328  "{ \n"
1329  " float4 bins[10]; \n"
1330  " for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0); \n"
1331  " const float4 loc = texRECT(tex, TexCoord0); \n"
1332  " const bool orientation_mode = (size.z != 0); \n"
1333  " float2 pos = loc.xy; \n"
1334  " float sigma = orientation_mode? abs(size.z) : loc.w; \n";
1336  {
1337  out<<
1338  " if(orientation_mode) {\n"
1339  " float4 keyx = texRECT(texS, pos);\n"
1340  " sigma = sigma * pow(size.w, keyx.w); \n"
1341  " pos.xy = pos.xy + keyx.yz; \n"
1342  " #if " << GlobalUtil::_KeepExtremumSign << "\n"
1343  " if(keyx.x<0.6) sigma = - sigma;\n"
1344  " #endif\n"
1345  " }\n";
1346  }
1347 
1348  out<<
1349  " //bool fixed_orientation = (size.z < 0); \n"
1350  " if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
1351  " const float gsigma = sigma * GAUSSIAN_WF; \n"
1352  " const float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF); \n"
1353  " const float2 dim = size.xy; \n"
1354  " const float dist_threshold = win.x*win.x+0.5; \n"
1355  " const float factor = -0.5/(gsigma*gsigma); \n"
1356  " float4 sz; float2 spos; \n"
1357  " //if(any(pos.xy <= 1)) discard; \n"
1358  " sz.xy = max( pos - win, float2(1,1)); \n"
1359  " sz.zw = min( pos + win, dim-2); \n"
1360  " sz = floor(sz)+0.5;";
1361  //loop to get the histogram
1362 
1363  out<<"\n"
1364  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1365  " { \n"
1366  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1367  " { \n"
1368  " const float2 offset = spos - pos; \n"
1369  " const float sq_dist = dot(offset,offset); \n"
1370  " if( sq_dist < dist_threshold){ \n"
1371  " const float4 cc = texRECT(gradTex, spos); \n"
1372  " const float grad = cc.b; float theta = cc.a; \n"
1373  " float idx = floor(degrees(theta)*0.1); \n"
1374  " const float weight = grad*exp(sq_dist * factor); \n"
1375  " if(idx < 0 ) idx += 36; \n"
1376  " const float vidx = 4.0 * fract(idx * 0.25);//fmod(idx, 4); \n"
1377  " const float4 inc = weight*float4(vidx == float4(0,1,2,3)); ";
1378 
1379  if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
1380 // if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
1381  {
1382  //gp_fp supports dynamic indexing
1383  out<<"\n"
1384  " int iidx = int(floor(idx*0.25)); \n"
1385  " bins[iidx]+=inc; \n"
1386  " } \n"
1387  " } \n"
1388  " }";
1389 
1390  }else
1391  {
1392  //nvfp40 still does not support dynamic array indexing
1393  //unrolled binary search...
1394  out<<"\n"
1395  " if(idx < 16) \n"
1396  " { \n"
1397  " if(idx < 8) \n"
1398  " { \n"
1399  " if(idx < 4) { bins[0]+=inc;} \n"
1400  " else { bins[1]+=inc;} \n"
1401  " }else \n"
1402  " { \n"
1403  " if(idx < 12){ bins[2]+=inc;} \n"
1404  " else { bins[3]+=inc;} \n"
1405  " } \n"
1406  " }else if(idx < 32) \n"
1407  " { \n"
1408  " if(idx < 24) \n"
1409  " { \n"
1410  " if(idx <20) { bins[4]+=inc;} \n"
1411  " else { bins[5]+=inc;} \n"
1412  " }else \n"
1413  " { \n"
1414  " if(idx < 28){ bins[6]+=inc;} \n"
1415  " else { bins[7]+=inc;} \n"
1416  " } \n"
1417  " }else \n"
1418  " { \n"
1419  " bins[8]+=inc; \n"
1420  " } \n"
1421  " } \n"
1422  " } \n"
1423  " }";
1424 
1425  }
1426 
1427  WriteOrientationCodeToStream(out);
1428 
1429  ProgramCG * program;
1430  s_orientation = program = new ProgramCG(buffer);
1431  _param_orientation_gtex = cgGetNamedParameter(*program, "gradTex");
1432  _param_orientation_size = cgGetNamedParameter(*program, "size");
1433  _param_orientation_stex = cgGetNamedParameter(*program, "texS");
1434 }
1435 
1436 void ShaderBagCG::WriteOrientationCodeToStream(std::ostream& out)
1437 {
1438  //smooth histogram and find the largest
1439 /*
1440  smoothing kernel: (1 3 6 7 6 3 1 )/27
1441  the same as 3 pass of (1 1 1)/3 averaging
1442  maybe better to use 4 pass on the vectors...
1443 */
1444 
1445 
1446  //the inner loop on different array numbers is always unrolled in fp40
1447 
1448  //bug fixed here:)
1449  out<<"\n"
1450  " float3x3 mat1 = float3x3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;; //bug fix.. \n"
1451  " float4x4 mat2 = float4x4( 7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;;\n"
1452  " for (int j=0; j<2; j++) \n"
1453  " { \n"
1454  " float4 prev = bins[8]; \n"
1455  " bins[9] = bins[0]; \n"
1456  " for (int i=0; i<9; i++) \n"
1457  " { \n"
1458  " float4 newb = mul ( bins[i], mat2); \n"
1459  " newb.xyz += mul ( prev.yzw, mat1); \n"
1460  " prev = bins[i]; \n"
1461  " newb.wzy += mul ( bins[i+1].zyx, mat1); \n"
1462  " bins[i] = newb; \n"
1463  " } \n"
1464  " }";
1465 
1466 
1467  //find the maximum voting
1468  out<<"\n"
1469  " float4 maxh; float2 maxh2; float4 maxh4 = bins[0]; \n"
1470  " for (int i=1; i<9; i++) maxh4 = max(maxh4, bins[i]); \n"
1471  " maxh2 = max(maxh4.xy, maxh4.zw); maxh = float4(max(maxh2.x, maxh2.y));";
1472 
1473  char *testpeak_code;
1474  char *savepeak_code;
1475 
1476 
1477 
1478  //save two/three/four orientations with the largest votings?
1479 
1480  //
1482  {
1483  out<<"\n"
1484  " float4 Orientations = float4(0, 0, 0, 0); \n"
1485  " float4 weights = float4(0,0,0,0); ";
1486 
1487  testpeak_code = "\n"
1488  " {test = bins[i]>hh;";
1489 
1490  //save the orientations in weight-decreasing order
1492  {
1493  savepeak_code = "\n"
1494  " if(weight <=weights.g){}\n"
1495  " else if(weight >weights.r)\n"
1496  " {weights.rg = float2(weight, weights.r); Orientations.rg = float2(th, Orientations.r);}\n"
1497  " else {weights.g = weight; Orientations.g = th;}";
1498 
1499  }else if(GlobalUtil::_MaxOrientation ==3)
1500  {
1501  savepeak_code = "\n"
1502  " if(weight <=weights.b){}\n"
1503  " else if(weight >weights.r)\n"
1504  " {weights.rgb = float3(weight, weights.rg); Orientations.rgb = float3(th, Orientations.rg);}\n"
1505  " else if(weight >weights.g)\n"
1506  " {weights.gb = float2(weight, weights.g); Orientations.gb = float2(th, Orientations.g);}\n"
1507  " else {weights.b = weight; Orientations.b = th;}";
1508  }else
1509  {
1510  savepeak_code = "\n"
1511  " if(weight <=weights.a){}\n"
1512  " else if(weight >weights.r)\n"
1513  " {weights = float4(weight, weights.rgb); Orientations = float4(th, Orientations.rgb);}\n"
1514  " else if(weight >weights.g)\n"
1515  " {weights.gba = float3(weight, weights.gb); Orientations.gba = float3(th, Orientations.gb);}\n"
1516  " else if(weight >weights.b)\n"
1517  " {weights.ba = float2(weight, weights.b); Orientations.ba = float2(th, Orientations.b);}\n"
1518  " else {weights.a = weight; Orientations.a = th;}";
1519  }
1520 
1521  }else
1522  {
1523  out<<"\n"
1524  " float Orientations = 0; ";
1525  testpeak_code ="\n"
1526  " if(npeaks==0){ \n"
1527  " test = (bins[i] >= maxh) ;";
1528  savepeak_code="\n"
1529  " npeaks++; \n"
1530  " Orientations = th.x;";
1531 
1532  }
1533 
1534  //find the peaks
1535  //the following loop will be unrolled
1536 
1537  out<<"\n"
1538  " const float4 hh = maxh * ORIENTATION_THRESHOLD; bool4 test; \n"
1539  " bins[9] = bins[0]; \n"
1540  " float npeaks = 0, k = 0; \n"
1541  " float prevb = bins[8].w; \n"
1542  " for (int i = 0; i <9 ; i++) \n"
1543  " {"
1544  <<testpeak_code<<" \n"
1545  " if( any ( test.xy || test.zw) ) \n"
1546  " { \n"
1547  " if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y ) \n"
1548  " { \n"
1549  " float di = 0.5 * (bins[i].y-prevb) / (bins[i].x *2.0 -bins[i].y -prevb) ; \n"
1550  " float th = (k+di+0.5); float weight = bins[i].x;"
1551  <<savepeak_code<<"\n"
1552  " }\n"
1553  " else if(test.g && all( bins[i].yy > bins[i].xz) ) \n"
1554  " { \n"
1555  " float di = 0.5 * (bins[i].z-bins[i].x) / (bins[i].y * 2.0 - bins[i].z - bins[i].x) ; \n"
1556  " float th = (k+di+1.5); float weight = bins[i].y; "
1557  <<savepeak_code<<" \n"
1558  " }"
1559  <<"\n"
1560  " if(test.b && all( bins[i].zz > bins[i].yw) ) \n"
1561  " { \n"
1562  " float di = 0.5 * (bins[i].w-bins[i].y) / (bins[i].z * 2.0-bins[i].w-bins[i].y) ; \n"
1563  " float th = (k+di+2.5); float weight = bins[i].z; "
1564  <<savepeak_code<<" \n"
1565  " }\n"
1566  " else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x ) \n"
1567  " { \n"
1568  " float di = 0.5 * (bins[i+1].x-bins[i].z) / (bins[i].w * 2.0- bins[i+1].x-bins[i].z) ; \n"
1569  " float th = (k+di+3.5); float weight = bins[i].w; "
1570  <<savepeak_code<<" \n"
1571  " }\n"
1572  " }}\n"
1573  " k = k + 4.0; \n"
1574  " prevb = bins[i].w;\n"
1575  " }";
1576  //WRITE output
1578  {
1579  //pack two orientations in one float
1580  out<<"\n"
1581  " if(orientation_mode){\n"
1582  " Orientations.xy = frac(Orientations.xy / 36.0 + 1.0);\n"
1583  " if(weights.x <= 0) Orientations.x = 1.0;\n"
1584  " if(weights.y <= 0) Orientations.y = 1.0;\n"
1585  " float packed_orientation = pack_2ushort(Orientations.xy); \n"
1586  " FeatureData = float4(pos, packed_orientation, sigma);\n"
1587  " }else{\n"
1588  " FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"
1589  " }\n";
1590  }else if(GlobalUtil::_MaxOrientation>1)
1591  {
1592  out<<"\n"
1593  " if(orientation_mode){\n"
1594  " npeaks = dot(float4(1,1,"
1595  <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
1596  <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), float4(weights>hh));\n"
1597  " OrientationData = radians((Orientations )*10.0);\n"
1598  " FeatureData = float4(pos, npeaks, sigma);\n"
1599  " }else{\n"
1600  " FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"
1601  " }\n";
1602  }else
1603  {
1604  out<<"\n"
1605  " FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);";
1606  }
1607  //end
1608  out<<"\n"
1609  "}\n"<<'\0';
1610 
1611 
1612 }
1613 
1614 void ShaderBagCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
1615 {
1616  cgGLSetTextureParameter(_param_orientation_gtex, oTex);
1617  cgGLEnableTextureParameter(_param_orientation_gtex);
1618  cgGLSetParameter1f(_param_orientation_size, sigma);
1619 }
1620 
1621 void ShaderBagCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
1622 {
1624  cgGLSetTextureParameter(_param_orientation_gtex, gtex);
1625  cgGLEnableTextureParameter(_param_orientation_gtex);
1626 
1628  {
1629  //specify texutre for subpixel subscale localization
1630  cgGLSetTextureParameter(_param_orientation_stex, stex);
1631  cgGLEnableTextureParameter(_param_orientation_stex);
1632  }
1633 
1634  float size[4];
1635  size[0] = (float)width;
1636  size[1] = (float)height;
1637  size[2] = sigma;
1638  size[3] = step;
1639  cgGLSetParameter4fv(_param_orientation_size, size);
1640 
1641 }
1642 
1643 void ShaderBagCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
1644 {
1646  cgGLSetTextureParameter(_param_descriptor_gtex, gtex);
1647  cgGLEnableTextureParameter(_param_descriptor_gtex);
1648 
1649  float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
1650  cgGLSetParameter4fv(_param_descriptor_dsize, dsize);
1651  float size[3];
1652  size[0] = width;
1653  size[1] = height;
1655  cgGLSetParameter3fv(_param_descriptor_size, size);
1656 }
1657 
1658 
1661 
1662 ShaderBagPKCG::ShaderBagPKCG()
1663 {
1664  ProgramCG::InitContext();
1665 }
1666 
1667 void ShaderBagPKCG::UnloadProgram()
1668 {
1669 
1670  cgGLUnbindProgram(ProgramCG::_FProfile);
1671  cgGLDisableProfile(ProgramCG::_FProfile);
1672 }
1673 
1674 void ShaderBagPKCG::LoadFixedShaders()
1675 {
1676  ProgramCG * program;
1677 
1678  /*
1679  char *rgb2gray_packing_code =
1680  "void main(uniform samplerRECT rgbTex, in float4 TexCoord0 : TEXCOORD0, \n"
1681  " in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1682  " in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0){\n"
1683  " const float3 weight = vec3(0.299, 0.587, 0.114);\n"
1684  " FragData.r = dot(weight, texRECT(rgbTex,TexCoord0.st ).rgb);\n"
1685  " FragData.g = dot(weight, texRECT(rgbTex,TexCoord1.st ).rgb);\n"
1686  " FragData.b = dot(weight, texRECT(rgbTex,TexCoord2.st ).rgb);\n"
1687  " FragData.a = dot(weight, texRECT(rgbTex,TexCoord3.st ).rgb);}";//
1688  s_gray = new ProgramCG( rgb2gray_packing_code);
1689  */
1690 
1691  s_gray = new ProgramCG(
1692  "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1693  "float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"
1694  "FragColor= float4(intensity, intensity, intensity, 1.0);}" );
1695 
1696 
1697  s_sampling = new ProgramCG(
1698  "void main(uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0, \n"
1699  " in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1700  " in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0 ){\n"
1701  " FragData= float4( texRECT(tex,TexCoord0.st ).r,texRECT(tex,TexCoord1.st ).r,\n"
1702  " texRECT(tex,TexCoord2.st ).r,texRECT(tex,TexCoord3.st ).r);}" );
1703 
1704 
1705  s_margin_copy = program = new ProgramCG(
1706  "void main(in float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"
1707  "uniform samplerRECT tex, uniform float4 truncate){\n"
1708  "float4 cc = texRECT(tex, min(texCoord0.xy, truncate.xy)); \n"
1709  "bool2 ob = texCoord0.xy < truncate.xy;\n"
1710  "if(ob.y) { FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"
1711  "else if(ob.x) {FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
1712  "else { float4 weights = float4(float4(0, 1, 2, 3) == truncate.w);\n"
1713  "float v = dot(weights, cc); FragColor = v.xxxx;}}");
1714 
1715  _param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");
1716 
1717 
1718  s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}");
1719 
1720  s_grad_pass = program = new ProgramCG(
1721  "void main (\n"
1722  "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
1723  "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"
1724  "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
1725  "out float4 FragData2 : COLOR2, uniform samplerRECT tex, uniform samplerRECT texp)\n"
1726  "{\n"
1727  " float4 v1, v2, gg;\n"
1728  " float4 cc = texRECT(tex, TexCC.xy);\n"
1729  " float4 cp = texRECT(texp, TexCC.xy);\n"
1730  " FragData0 = cc - cp; \n"
1731  " float4 cl = texRECT(tex, TexLC.xy); float4 cr = texRECT(tex, TexRC.xy);\n"
1732  " float4 cd = texRECT(tex, TexCD.xy); float4 cu = texRECT(tex, TexCU.xy);\n"
1733  " float4 dx = (float4(cr.rb, cc.ga) - float4(cc.rb, cl.ga)).zxwy;\n"
1734  " float4 dy = (float4(cu.rg, cc.ba) - float4(cc.rg, cd.ba)).zwxy;\n"
1735  " FragData1 = 0.5 * sqrt(dx*dx + dy * dy);\n"
1736  " FragData2 = FragData1 > 0? atan2(dy, dx) : float4(0);\n"
1737  "}\n\0");
1738 
1739  _param_grad_pass_texp = cgGetNamedParameter(*program, "texp");
1740 
1741 
1742  s_dog_pass = program = new ProgramCG(
1743  "void main (float4 TexCC : TEXCOORD0, out float4 FragData0 : COLOR0, \n"
1744  " uniform samplerRECT tex, uniform samplerRECT texp)\n"
1745  "{\n"
1746  " float4 cc = texRECT(tex, TexCC.xy);\n"
1747  " float4 cp = texRECT(texp, TexCC.xy);\n"
1748  " FragData0 = cc - cp; \n"
1749  "}\n\0");
1750 
1752  if(GlobalUtil::_SupportFP40)
1753  {
1754  LoadOrientationShader();
1755  if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
1756  }else
1757  {
1758  s_orientation = program = new ProgramCG(
1759  "void main(out float4 FragColor : COLOR0, \n"
1760  " uniform samplerRECT fTex, uniform samplerRECT oTex, \n"
1761  " uniform float2 size, \n"
1762  " in float2 tpos : TEXCOORD0){\n"
1763  " float4 cc = texRECT(fTex, tpos);\n"
1764  " float2 co = cc.xy * 0.5; \n"
1765  " float4 oo = texRECT(oTex, co);\n"
1766  " bool2 bo = frac(co) < 0.5; \n"
1767  " float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
1768  " FragColor = float4(cc.rg, o, size.x * pow(size.y, cc.a));}");
1769  _param_orientation_gtex= cgGetNamedParameter(*program, "oTex");
1770  _param_orientation_size= cgGetNamedParameter(*program, "size");
1771 
1775  std::cerr<<"Orientation simplified on this hardware"<<endl;
1776  std::cerr<<"Descriptor ignored on this hardware"<<endl;
1777  }
1778 }
1779 
1780 void ShaderBagPKCG::LoadDisplayShaders()
1781 {
1782  ProgramCG * program;
1783 
1784  s_copy_key = new ProgramCG(
1785  "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1786  "FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1); }");
1787 
1788  //shader used to write a vertex buffer object
1789  //which is used to draw the quads of each feature
1790  s_vertex_list = program = new ProgramCG(
1791  "void main(in float4 TexCoord0: TEXCOORD0,\n"
1792  "uniform float4 sizes, \n"
1793  "uniform samplerRECT tex, \n"
1794  "out float4 FragColor: COLOR0){\n"
1795  "float fwidth = sizes.y; \n"
1796  "float twidth = sizes.z; \n"
1797  "float rwidth = sizes.w; \n"
1798  "float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"
1799  "float px = fmod(index, twidth);\n"
1800  "float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"
1801  "float4 cc = texRECT(tex, tpos );\n"
1802  "float size = 3.0f * cc.a;// sizes.x;// \n"
1803  "FragColor.zw = float2(0.0, 1.0);\n"
1804  "if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"
1805  "{\n"
1806  " float type = frac(px);\n"
1807  " float2 dxy; float s, c;\n"
1808  " dxy.x = type < 0.1 ? 0 : ((type <0.5 || type > 0.9)? size : -size);\n"
1809  " dxy.y = type < 0.2 ? 0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
1810  " sincos(cc.b, s, c);\n"
1811  " FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
1812  " FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
1813  "}\n\0");
1814  /*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/
1815 
1816  _param_genvbo_size = cgGetNamedParameter(*program, "sizes");
1817 
1818  s_display_gaussian = new ProgramCG(
1819  "void main(uniform samplerRECT tex, in float4 TexCoord0:TEXCOORD0, out float4 FragData: COLOR0 ){\n"
1820  "float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1821  "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); FragData = float4(v.xxx, 1.0);}");
1822 
1823  s_display_dog = new ProgramCG(
1824  "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1825  "float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1826  "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
1827  "FragColor = float4(g, g, g, 1.0);}" );
1828 
1829 
1830  s_display_grad = new ProgramCG(
1831  "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1832  "float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1833  "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); FragColor = float4(5.0 *v.xxx, 1.0); }");
1834 
1835  s_display_keys= new ProgramCG(
1836  "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1837  "float4 oc = texRECT(tex, TexCoord0.xy); \n"
1838  "float4 cc = float4(abs(oc.r) == float4(1.0, 2.0, 3.0, 4.0));\n"
1839  "bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1840  "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
1841  "if(oc.r == 0) discard;\n"
1842  "else if(oc.r > 0) FragColor = float4(1.0, 0, 0,1.0); \n"
1843  "else FragColor = float4(0.0,1.0,0.0,1.0); }" );
1844 }
1845 
1846 void ShaderBagPKCG::LoadGenListShader(int ndoglev, int nlev)
1847 {
1848 
1849  //the V2 algorithms are only slightly faster, but way more complicated
1850  //LoadGenListShaderV2(ndoglev, nlev); return;
1851  ProgramCG * program;
1852 
1853  s_genlist_init_tight = new ProgramCG(
1854  "void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
1855  "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1856  "in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
1857  "{\n"
1858  " float4 data = float4( texRECT(tex, TexCoord0.xy).r,\n"
1859  " texRECT(tex, TexCoord1.xy).r,\n"
1860  " texRECT(tex, TexCoord2.xy).r,\n"
1861  " texRECT(tex, TexCoord3.xy).r);\n"
1862  " FragColor = float4(data != 0);\n"
1863  "}");
1864 
1865  s_genlist_init_ex = program = new ProgramCG(
1866  "void main (uniform float4 bbox, uniform samplerRECT tex, \n"
1867  "in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"
1868  "in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
1869  "out float4 FragColor : COLOR0)\n"
1870  "{\n"
1871  " bool4 helper1 = abs(texRECT(tex, TexCoord0.xy).r)== float4(1.0, 2.0, 3.0, 4.0); \n"
1872  " bool4 helper2 = abs(texRECT(tex, TexCoord1.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
1873  " bool4 helper3 = abs(texRECT(tex, TexCoord2.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
1874  " bool4 helper4 = abs(texRECT(tex, TexCoord3.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
1875  " bool4 bx1 = TexCoord0.xxyy < bbox; \n"
1876  " bool4 bx4 = TexCoord3.xxyy < bbox; \n"
1877  " bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"
1878  " bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"
1879  " helper1 = (bx1.xyxy && bx1.zzww && helper1);\n"
1880  " helper2 = (bx2.xyxy && bx2.zzww && helper2);\n"
1881  " helper3 = (bx3.xyxy && bx3.zzww && helper3);\n"
1882  " helper4 = (bx4.xyxy && bx4.zzww && helper4);\n"
1883  " FragColor.r = any(helper1.xy || helper1.zw); \n"
1884  " FragColor.g = any(helper2.xy || helper2.zw); \n"
1885  " FragColor.b = any(helper3.xy || helper3.zw); \n"
1886  " FragColor.a = any(helper4.xy || helper4.zw); \n"
1887  "}");
1888  _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
1889 
1890  s_genlist_end = program = new ProgramCG(
1892 
1893  "void main( uniform samplerRECT tex, uniform samplerRECT ktex,\n"
1894  " in float4 tpos : TEXCOORD0, out float4 FragColor : COLOR0)\n"
1895  "{\n"
1896  " float4 tc = texRECT( tex, tpos.xy);\n"
1897  " float2 pos = tc.rg; float index = tc.b;\n"
1898  " float4 tk = texRECT( ktex, pos); \n"
1899  " float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"
1900  " float2 opos; \n"
1901  " opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
1902  " opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
1903  " FragColor = float4(opos + pos + pos + tk.yz, 1.0, tk.w);\n"
1904  "}" :
1905 
1906  "void main( uniform samplerRECT tex, uniform samplerRECT ktex,\n"
1907  " in float4 tpos : TEXCOORD0, out float4 FragColor : COLOR0)\n"
1908  "{\n"
1909  " float4 tc = texRECT( tex, tpos.xy);\n"
1910  " float2 pos = tc.rg; float index = tc.b;\n"
1911  " float4 tk = texRECT( ktex, pos); \n"
1912  " float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"
1913  " float2 opos; \n"
1914  " opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
1915  " opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
1916  " FragColor = float4(opos + pos + pos + tk.yz, sign(tk.x), tk.w);\n"
1917  "}"
1918  );
1919  _param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");
1920 
1921  //reduction ...
1922  s_genlist_histo = new ProgramCG(
1923  "void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
1924  "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"
1925  "in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
1926  "{\n"
1927  " float4 helper; float4 helper2; \n"
1928  " helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"
1929  " helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"
1930  " FragColor.rg = helper2.xz + helper2.yw;\n"
1931  " helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"
1932  " helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"
1933  " FragColor.ba= helper2.xz+helper2.yw;\n"
1934  "}");
1935 
1936 
1937  //read of the first part, which generates tex coordinates
1938 
1939  s_genlist_start= program = ShaderBagCG::LoadGenListStepShader(1, 1);
1940  _param_ftex_width= cgGetNamedParameter(*program, "width");
1941  _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
1942  //stepping
1943  s_genlist_step = program = ShaderBagCG::LoadGenListStepShader(0, 1);
1944  _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
1945  _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
1946 
1947 
1948 }
1949 
1950 
1951 
1952 void ShaderBagPKCG::LoadGenListShaderV2(int ndoglev, int nlev)
1953 {
1954  ProgramCG * program;
1955 
1956  s_genlist_init_tight = new ProgramCG(
1957  "void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
1958  "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1959  "in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
1960  "{\n"
1961  " float4 data1 = texRECT(tex, TexCoord0.xy);\n"
1962  " float4 data2 = texRECT(tex, TexCoord1.xy);\n"
1963  " float4 data3 = texRECT(tex, TexCoord2.xy);\n"
1964  " float4 data4 = texRECT(tex, TexCoord3.xy);\n"
1965  " bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"
1966  " bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"
1967  " bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"
1968  " bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"
1969  " FragColor.r = any(helper1.xy || helper1.zw); \n"
1970  " FragColor.g = any(helper2.xy || helper2.zw); \n"
1971  " FragColor.b = any(helper3.xy || helper3.zw); \n"
1972  " FragColor.a = any(helper4.xy || helper4.zw); \n"
1973  " if(dot(FragColor, float4(1,1,1,1)) == 1) \n"
1974  " {\n"
1975  " //use a special method if there is only one in the 16, \n"
1976  " float4 data, helper; float2 pos, opos; \n"
1977  " if(FragColor.r){ \n"
1978  " data = data1; helper = helper1; pos = TexCoord0.xy;\n"
1979  " }else if(FragColor.g){\n"
1980  " data = data2; helper = helper2; pos = TexCoord1.xy;\n"
1981  " }else if(FragColor.b){\n"
1982  " data = data3; helper = helper3; pos = TexCoord2.xy;\n"
1983  " }else{\n"
1984  " data = data4; helper = helper4; pos = TexCoord3.xy;\n"
1985  " }\n"
1986  " opos.x = dot(helper, float4(-0.5, 0.5, -0.5, 0.5));\n"
1987  " opos.y = dot(helper, float4(-0.5, -0.5, 0.5, 0.5));\n"
1988  " FragColor = float4( pos + pos + opos + data.yz, -1, data.w); \n"
1989  " }\n"
1990  "}");
1991 
1992  s_genlist_init_ex = program = new ProgramCG(
1993  "void main (uniform float4 bbox, uniform samplerRECT tex, \n"
1994  "in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"
1995  "in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
1996  "out float4 FragColor : COLOR0)\n"
1997  "{\n"
1998  " float4 data1 = texRECT(tex, TexCoord0.xy);\n"
1999  " float4 data2 = texRECT(tex, TexCoord1.xy);\n"
2000  " float4 data3 = texRECT(tex, TexCoord2.xy);\n"
2001  " float4 data4 = texRECT(tex, TexCoord3.xy);\n"
2002  " bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"
2003  " bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"
2004  " bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"
2005  " bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"
2006  " bool4 bx1 = TexCoord0.xxyy < bbox; \n"
2007  " bool4 bx4 = TexCoord3.xxyy < bbox; \n"
2008  " bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"
2009  " bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"
2010  " helper1 = bx1.xyxy && bx1.zzww && helper1; \n"
2011  " helper2 = bx2.xyxy && bx2.zzww && helper2; \n"
2012  " helper3 = bx3.xyxy && bx3.zzww && helper3; \n"
2013  " helper4 = bx4.xyxy && bx4.zzww && helper4; \n"
2014  " FragColor.r = any(helper1.xy || helper1.zw); \n"
2015  " FragColor.g = any(helper2.xy || helper2.zw); \n"
2016  " FragColor.b = any(helper3.xy || helper3.zw); \n"
2017  " FragColor.a = any(helper4.xy || helper4.zw); \n"
2018  " if(dot(FragColor, float4(1,1,1,1)) == 1) \n"
2019  " {\n"
2020  " //use a special method if there is only one in the 16, \n"
2021  " float4 data, helper; bool4 bhelper; float2 pos, opos; \n"
2022  " if(FragColor.r){ \n"
2023  " data = data1; bhelper = helper1; pos = TexCoord0.xy;\n"
2024  " }else if(FragColor.g){\n"
2025  " data = data2; bhelper = helper2; pos = TexCoord1.xy;\n"
2026  " }else if(FragColor.b){\n"
2027  " data = data3; bhelper = helper3; pos = TexCoord2.xy;\n"
2028  " }else{\n"
2029  " data = data4; bhelper = helper4; pos = TexCoord3.xy;\n"
2030  " }\n"
2031  " helper = float4(bhelper); \n"
2032  " opos.x = dot(helper, float4(-0.5, 0.5, -0.5, 0.5));\n"
2033  " opos.y = dot(helper, float4(-0.5, -0.5, 0.5, 0.5));\n"
2034  " FragColor = float4(pos + pos + opos + data.yz, -1, data.w); \n"
2035  " }\n"
2036  "}");
2037  _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
2038 
2039  s_genlist_end = program = new ProgramCG(
2040 
2041  "void main( uniform samplerRECT tex, uniform samplerRECT ktex,\n"
2042  " in float4 tpos : TEXCOORD0, out float4 FragColor : COLOR0)\n"
2043  "{\n"
2044  " float4 tc = texRECT( tex, tpos.xy);\n"
2045  " float2 pos = tc.rg; float index = tc.b;\n"
2046  " if(index == -1)\n"
2047  " {\n"
2048  " FragColor = float4(tc.xy, 0, tc.w);\n"
2049  " }else\n"
2050  " {\n"
2051  " float4 tk = texRECT( ktex, pos); \n"
2052  " float4 keys = float4(abs(tk.r) == float4(1.0, 2.0, 3.0, 4.0)); \n"
2053  " float2 opos; \n"
2054  " opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
2055  " opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
2056  " FragColor = float4(opos + pos + pos + tk.yz, 0, tk.w);\n"
2057  " }\n"
2058  "}");
2059  _param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");
2060 
2061  //reduction ...
2062  s_genlist_histo = new ProgramCG(
2063  "void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
2064  "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"
2065  "in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
2066  "{\n"
2067  " float4 helper[4]; float4 helper2; \n"
2068  " helper[0] = texRECT(tex, TexCoord0); helper2.xy = helper[0].xy + helper[0].zw; \n"
2069  " helper[1] = texRECT(tex, TexCoord1); helper2.zw = helper[1].xy + helper[1].zw; \n"
2070  " FragColor.rg = helper2.xz + helper2.yw;\n"
2071  " helper[2] = texRECT(tex, TexCoord2); helper2.xy = helper[2].xy + helper[2].zw; \n"
2072  " helper[3] = texRECT(tex, TexCoord3); helper2.zw = helper[3].xy + helper[3].zw; \n"
2073  " FragColor.ba= helper2.xz+helper2.yw;\n"
2074  " bool4 keyt = float4(helper[0].z, helper[1].z, helper[2].z, helper[3].z) == -1.0; \n"
2075  " float keyc = dot(float4(keyt), float4(1,1,1,1)); \n"
2076  " if(keyc == 1.0 && dot(FragColor, float4(1,1,1,1)) == -1.0) \n"
2077  " {\n"
2078  " if(keyt.x) FragColor = helper[0];\n"
2079  " else if(keyt.y) FragColor = helper[1]; \n"
2080  " else if(keyt.z) FragColor = helper[2]; \n"
2081  " else FragColor = helper[3]; \n"
2082  " }else\n"
2083  " {\n"
2084  " FragColor = keyt? float4(1,1,1,1) : FragColor;\n"
2085  " }\n"
2086  "}");
2087 
2088  //read of the first part, which generates tex coordinates
2089 
2090  s_genlist_start= program = ShaderBagCG::LoadGenListStepShaderV2(1, 1);
2091  _param_ftex_width= cgGetNamedParameter(*program, "width");
2092  _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
2093  //stepping
2094  s_genlist_step = program = ShaderBagCG::LoadGenListStepShaderV2(0, 1);
2095  _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
2096  _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
2097 
2098 
2099 }
2100 
2101 
2102 
2103 ProgramCG* ShaderBagCG::LoadGenListStepShaderV2(int start, int step)
2104 {
2105  int i;
2106  char buffer[10240];
2107  //char chanels[5] = "rgba";
2108  ostrstream out(buffer, 10240);
2109  out<<"void main(out float4 FragColor : COLOR0, \n";
2110 
2111  for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n";
2112 
2113  if(start)
2114  {
2115  out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";
2116  out<<"float index = floor(tpos.y) * width + floor(tpos.x);\n";
2117  out<<"float2 pos = float2(0.5, 0.5);\n";
2118  }else
2119  {
2120  out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";
2121  out<<"float4 tc = texRECT( tex, tpos);\n";
2122  out<<"float2 pos = tc.rg; float index = tc.b;\n";
2123  out<<"if(index==-1) {FragColor = tc; return;}\n";
2124  }
2125  out<<"float2 sum; float4 cc;\n";
2126 
2127 
2128 
2129  if(step>0)
2130  {
2131  out<<"float2 cpos = float2(-0.5, 0.5);\t float2 opos;\n";
2132  for(i = 0; i < step; i++)
2133  {
2134 
2135  out<<"cc = texRECT(tex"<<i<<", pos);\n";
2136  out<<"if(cc.z == -1){FragColor = cc; return;}";
2137  out<<"sum.x = cc.r + cc.g;if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index -= cc.r;}}\n";
2138  out<<"else {index -= sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index -= cc.b;}}";
2139  out<<"pos = (pos + pos + opos);\n";
2140  }
2141  }
2142  out<<"FragColor = float4(pos, index, 1);\n";
2143  out<<"}\n"<<'\0';
2144  return new ProgramCG(buffer);
2145 }
2146 
2147 
2148 void ShaderBagPKCG:: LoadKeypointShader(float threshold, float edge_threshold)
2149 {
2150  //
2151  ProgramCG * program;
2152  char buffer[10240];
2153  float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
2154  float threshold1 = threshold;
2155  float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
2156  ostrstream out(buffer, 10240);
2157  out<<setprecision(8);
2158  //tex(X)(Y)
2159  //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
2160  //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
2161  out << "#define THRESHOLD0 " << threshold0 << "\n"
2162  "#define THRESHOLD1 " << threshold1 << "\n"
2163  "#define THRESHOLD2 " << threshold2 << "\n";
2164 
2165  out<<
2166  "void main (\n"
2167  "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
2168  "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
2169  "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
2170  "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
2171  "out float4 FragData0 : COLOR0, uniform samplerRECT tex, \n"
2172  "uniform samplerRECT texU, uniform samplerRECT texD)\n"
2173  "{\n"
2174  " float2 TexRU = float2(TexRC.x, TexCU.y); \n"
2175  " float4 ccc = texRECT(tex, TexCC.xy);\n"
2176  " float4 clc = texRECT(tex, TexLC.xy);\n"
2177  " float4 crc = texRECT(tex, TexRC.xy);\n"
2178  " float4 ccd = texRECT(tex, TexCD.xy);\n"
2179  " float4 ccu = texRECT(tex, TexCU.xy);\n"
2180  " float4 cld = texRECT(tex, TexLD.xy);\n"
2181  " float4 clu = texRECT(tex, TexLU.xy);\n"
2182  " float4 crd = texRECT(tex, TexRD.xy);\n"
2183  " float4 cru = texRECT(tex, TexRU.xy);\n"
2184  " float4 cc = ccc;\n"
2185  " float4 v1[4], v2[4];\n"
2186  " v1[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2187  " v1[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2188  " v1[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2189  " v1[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2190  " v2[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
2191  " v2[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
2192  " v2[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
2193  " v2[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
2194 
2195  //test against 8 neighbours
2196  //use variable to identify type of extremum
2197  //1.0 for local maximum and -1.0 for minimum
2198  <<
2199  " float4 key ={0, 0, 0, 0}; \n"
2200  " for(int i = 0; i < 4; i++)\n"
2201  " {\n"
2202  " bool4 test1 = cc[i] > max(v1[i], v2[i]), test2 = cc[i] < min(v1[i], v2[i]);\n"
2203  " key[i] = cc[i] > THRESHOLD0 && all(test1.xy&&test1.zw)?1.0: 0.0;\n"
2204  " key[i] = cc[i] < -THRESHOLD0 && all(test2.xy&&test2.zw)? -1.0: key[i];\n"
2205  " }\n"
2206  " if(TexCC.x < 1.0) {key.rb = 0;}\n"
2207  " if(TexCC.y < 1.0) {key.rg = 0;}\n"
2208  " FragData0 = float4(0.0);\n"
2209  " if(all(key == 0.0)) return; \n";
2210 
2211  //do edge supression first..
2212  //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
2213  //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
2214 
2215  out<<
2216  " float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
2217  " for(int i = 0; i < 4; i++) \n"
2218  " {\n"
2219  " if(key[i] != 0)\n"
2220  " {\n"
2221  " float4 D2 = v1[i].xyzw - cc[i];\n"
2222  " float2 D4 = v2[i].xw - v2[i].yz;\n"
2223  " float2 D5 = 0.5*(v1[i].yw-v1[i].xz); \n"
2224  " fx[i] = D5.x;\n"
2225  " fy[i] = D5.y ;\n"
2226  " fxx[i] = D2.x + D2.y;\n"
2227  " fyy[i] = D2.z + D2.w;\n"
2228  " fxy[i] = 0.25*(D4.x + D4.y);\n"
2229  " float fxx_plus_fyy = fxx[i] + fyy[i];\n"
2230  " float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
2231  " float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\n"
2232  " if( score_down <= 0 || score_up > THRESHOLD2 * score_down)key[i] = 0;\n"
2233  " }\n"
2234  " }\n"
2235  " if(all(key == 0.0)) return; \n\n";
2236 
2238  //read 9 pixels of upper/lower level
2239  out<<
2240  " float4 v4[4], v5[4], v6[4];\n"
2241  " ccc = texRECT(texU, TexCC.xy);\n"
2242  " clc = texRECT(texU, TexLC.xy);\n"
2243  " crc = texRECT(texU, TexRC.xy);\n"
2244  " ccd = texRECT(texU, TexCD.xy);\n"
2245  " ccu = texRECT(texU, TexCU.xy);\n"
2246  " cld = texRECT(texU, TexLD.xy);\n"
2247  " clu = texRECT(texU, TexLU.xy);\n"
2248  " crd = texRECT(texU, TexRD.xy);\n"
2249  " cru = texRECT(texU, TexRU.xy);\n"
2250  " float4 cu = ccc;\n"
2251  " v4[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2252  " v4[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2253  " v4[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2254  " v4[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2255  " v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
2256  " v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
2257  " v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
2258  " v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
2259  <<
2260  " for(int i = 0; i < 4; i++)\n"
2261  " {\n"
2262  " if(key[i] == 1.0)\n"
2263  " {\n"
2264  " bool4 test = cc[i]< max(v4[i], v6[i]); \n"
2265  " if(cc[i] < cu[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2266  " }else if(key[i] == -1.0)\n"
2267  " {\n"
2268  " bool4 test = cc[i]> min( v4[i], v6[i]); \n"
2269  " if(cc[i] > cu[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2270  " }\n"
2271  " }\n"
2272  " if(all(key == 0.0)) return; \n"
2273  <<
2274  " ccc = texRECT(texD, TexCC.xy);\n"
2275  " clc = texRECT(texD, TexLC.xy);\n"
2276  " crc = texRECT(texD, TexRC.xy);\n"
2277  " ccd = texRECT(texD, TexCD.xy);\n"
2278  " ccu = texRECT(texD, TexCU.xy);\n"
2279  " cld = texRECT(texD, TexLD.xy);\n"
2280  " clu = texRECT(texD, TexLU.xy);\n"
2281  " crd = texRECT(texD, TexRD.xy);\n"
2282  " cru = texRECT(texD, TexRU.xy);\n"
2283  " float4 cd = ccc;\n"
2284  " v5[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2285  " v5[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2286  " v5[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2287  " v5[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2288  " v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
2289  " v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
2290  " v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
2291  " v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
2292  <<
2293  " for(int i = 0; i < 4; i++)\n"
2294  " {\n"
2295  " if(key[i] == 1.0)\n"
2296  " {\n"
2297  " bool4 test = cc[i]< max(v5[i], v6[i]);\n"
2298  " if(cc[i] < cd[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2299  " }else if(key[i] == -1.0)\n"
2300  " {\n"
2301  " bool4 test = cc[i]>min(v5[i],v6[i]);\n"
2302  " if(cc[i] > cd[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2303  " }\n"
2304  " }\n"
2305  " float keysum = dot(abs(key), float4(1, 1, 1, 1)) ;\n"
2306  " //assume there is only one keypoint in the four. \n"
2307  " if(keysum != 1.0) return; \n";
2308 
2311 
2312  out <<
2313  " float3 offset = float3(0, 0, 0); \n"
2314  " /*The unrolled follwing loop is faster than a dynamic indexing version.*/\n"
2315  " for(int idx = 1; idx < 4; idx++)\n"
2316  " {\n"
2317  " if(key[idx] != 0) \n"
2318  " {\n"
2319  " cu[0] = cu[idx]; cd[0] = cd[idx]; cc[0] = cc[idx]; \n"
2320  " v4[0] = v4[idx]; v5[0] = v5[idx]; \n"
2321  " fxy[0] = fxy[idx]; fxx[0] = fxx[idx]; fyy[0] = fyy[idx]; \n"
2322  " fx[0] = fx[idx]; fy[0] = fy[idx]; \n"
2323  " }\n"
2324  " }\n"
2325  <<
2326 
2327  " float fs = 0.5*( cu[0] - cd[0] ); \n"
2328  " float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
2329  " float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
2330  " float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
2331  " float4 A0, A1, A2 ; \n"
2332  " A0 = float4(fxx[0], fxy[0], fxs, -fx[0]); \n"
2333  " A1 = float4(fxy[0], fyy[0], fys, -fy[0]); \n"
2334  " A2 = float4(fxs, fys, fss, -fs); \n"
2335  " float3 x3 = abs(float3(fxx[0], fxy[0], fxs)); \n"
2336  " float maxa = max(max(x3.x, x3.y), x3.z); \n"
2337  " if(maxa >= 1e-10 ) \n"
2338  " { \n"
2339  " if(x3.y ==maxa ) \n"
2340  " { \n"
2341  " float4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
2342  " }else if( x3.z == maxa ) \n"
2343  " { \n"
2344  " float4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
2345  " } \n"
2346  " A0 /= A0.x; \n"
2347  " A1 -= A1.x * A0; \n"
2348  " A2 -= A2.x * A0; \n"
2349  " float2 x2 = abs(float2(A1.y, A2.y)); \n"
2350  " if( x2.y > x2.x ) \n"
2351  " { \n"
2352  " float3 TEMP = A2.yzw; \n"
2353  " A2.yzw = A1.yzw; \n"
2354  " A1.yzw = TEMP; \n"
2355  " x2.x = x2.y; \n"
2356  " } \n"
2357  " if(x2.x >= 1e-10) { \n"
2358  " A1.yzw /= A1.y; \n"
2359  " A2.yzw -= A2.y * A1.yzw; \n"
2360  " if(abs(A2.z) >= 1e-10) {\n"
2361  " offset.z = A2.w /A2.z; \n"
2362  " offset.y = A1.w - offset.z*A1.z; \n"
2363  " offset.x = A0.w - offset.z*A0.z - offset.y*A0.y; \n"
2364  " bool test = (abs(cc[0] + 0.5*dot(float3(fx[0], fy[0], fs), offset ))>THRESHOLD1) ;\n"
2365  " if(!test || any( abs(offset) >= 1.0)) return;\n"
2366  " }\n"
2367  " }\n"
2368  " }\n"
2369  <<"\n"
2370  " float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"
2371  " FragData0 = float4(keyv, offset);\n"
2372  "}\n" <<'\0';
2373 
2374  else out << "\n"
2375  " float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"
2376  " FragData0 = float4(keyv, 0, 0, 0);\n"
2377  "}\n" <<'\0';
2378 
2379  s_keypoint = program = new ProgramCG(buffer);
2380  //parameter
2381  _param_dog_texu = cgGetNamedParameter(*program, "texU");
2382  _param_dog_texd = cgGetNamedParameter(*program, "texD");
2383 }
2384 
2385 void ShaderBagPKCG::LoadOrientationShader()
2386 {
2387  char buffer[10240];
2388  ostrstream out(buffer,10240);
2389 
2390  out<<"\n"
2391  "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
2392  "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
2393  "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
2394  "void main(uniform samplerRECT tex, uniform samplerRECT gtex, \n"
2395  " uniform samplerRECT otex, uniform float4 size, in float2 TexCoord0 : TEXCOORD0, \n"
2396  " out float4 FeatureData : COLOR0 ";
2397 
2398  //multi orientation output
2399  //use one additional texture to store up to four orientations
2400  //when we use one 32bit float to store two orientations, no extra texture is required
2401 
2403  out<<", out float4 OrientationData : COLOR1";
2404 
2405 
2406  //use 9 float4 to store histogram of 36 directions
2407  out<<") \n"
2408  "{ \n"
2409  " float4 bins[10]; \n"
2410  " for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0); \n"
2411  " float4 sift = texRECT(tex, TexCoord0); \n"
2412  " float2 pos = sift.xy; \n"
2413  " bool orientation_mode = (size.z != 0); \n"
2414  " float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
2415  " //bool fixed_orientation = (size.z < 0); \n"
2416  " if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
2417  " float gsigma = sigma * GAUSSIAN_WF; \n"
2418  " float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF); \n"
2419  " float2 dim = size.xy; \n"
2420  " float4 dist_threshold = float4(win.x*win.x+0.5); \n"
2421  " float factor = -0.5/(gsigma*gsigma); \n"
2422  " float4 sz; float2 spos; \n"
2423  " //if(any(pos.xy <= 1)) discard; \n"
2424  " sz.xy = max( pos - win, float2(2,2)); \n"
2425  " sz.zw = min( pos + win, dim-3); \n"
2426  " sz = floor(sz*0.5) + 0.5; ";
2427  //loop to get the histogram
2428 
2429  out<<"\n"
2430  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
2431  " { \n"
2432  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
2433  " { \n"
2434  " float2 offset = 2* spos - pos - 0.5; \n"
2435  " float4 off = float4(offset, offset + 1); \n"
2436  " float4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww; \n"
2437  " bool4 inside = distsq < dist_threshold; \n"
2438  " if(any(inside.xy||inside.zw)) \n"
2439  " { \n"
2440  " float4 gg = texRECT(gtex, spos); \n"
2441  " float4 oo = texRECT(otex, spos); \n"
2442  " float4 weight = gg * exp(distsq * factor); \n"
2443  " float4 idxv = floor(degrees(oo)*0.1); \n"
2444  " idxv = idxv<0? idxv + 36.0: idxv; \n"
2445  " float4 vidx = 4.0* fract(idxv * 0.25);//fmod(idxv, 4.0);\n";
2446 
2447  //
2448  if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
2449  //if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
2450  {
2451  //gp4fp supports dynamic indexing, but it might be slow on some GPUs
2452  out<<"\n"
2453  " for(int i = 0 ; i < 4; i++)\n"
2454  " {\n"
2455  " if(inside[i])\n"
2456  " {\n"
2457  " float idx = idxv[i]; \n"
2458  " float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3)); \n"
2459  " int iidx = int(floor(idx*0.25)); \n"
2460  " bins[iidx]+=inc; \n"
2461  " } \n"
2462  " } \n"
2463  " } \n"
2464  " } \n"
2465  " }";
2466 
2467  }else
2468  {
2469  //nvfp40 still does not support dynamic array indexing
2470  //unrolled binary search
2471  //it seems to be faster than the dyanmic indexing version on some GPUs
2472  out<<"\n"
2473  " for(int i = 0 ; i < 4; i++)\n"
2474  " {\n"
2475  " if(inside[i])\n"
2476  " {\n"
2477  " float idx = idxv[i]; \n"
2478  " float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3)); \n"
2479  " if(idx < 16) \n"
2480  " { \n"
2481  " if(idx < 8) \n"
2482  " { \n"
2483  " if(idx < 4) { bins[0]+=inc;} \n"
2484  " else { bins[1]+=inc;} \n"
2485  " }else \n"
2486  " { \n"
2487  " if(idx < 12){ bins[2]+=inc;} \n"
2488  " else { bins[3]+=inc;} \n"
2489  " } \n"
2490  " }else if(idx < 32) \n"
2491  " { \n"
2492  " if(idx < 24) \n"
2493  " { \n"
2494  " if(idx <20) { bins[4]+=inc;} \n"
2495  " else { bins[5]+=inc;} \n"
2496  " }else \n"
2497  " { \n"
2498  " if(idx < 28){ bins[6]+=inc;} \n"
2499  " else { bins[7]+=inc;} \n"
2500  " } \n"
2501  " }else \n"
2502  " { \n"
2503  " bins[8]+=inc; \n"
2504  " } \n"
2505  " } \n"
2506  " } \n"
2507  " } \n"
2508  " } \n"
2509  " }";
2510 
2511  }
2512 
2513  //reuse the code from the unpacked version..
2514  ShaderBagCG::WriteOrientationCodeToStream(out);
2515 
2516 
2517  ProgramCG * program;
2518  s_orientation = program = new ProgramCG(buffer);
2519  _param_orientation_gtex = cgGetNamedParameter(*program, "gtex");
2520  _param_orientation_otex = cgGetNamedParameter(*program, "otex");
2521  _param_orientation_size = cgGetNamedParameter(*program, "size");
2522 
2523 
2524 }
2525 
2526 void ShaderBagPKCG::LoadDescriptorShader()
2527 {
2529  LoadDescriptorShaderF2();
2530 
2531 }
2532 
2533 void ShaderBagPKCG::LoadDescriptorShaderF2()
2534 {
2535  //one shader outpout 128/8 = 16 , each fragout encodes 4
2536  //const double twopi = 2.0*3.14159265358979323846;
2537  //const double rpi = 8.0/twopi;
2538  char buffer[10240];
2539  ostrstream out(buffer, 10240);
2540 
2541  out<<setprecision(8);
2542 
2543  out<<"\n"
2544  "#define M_PI 3.14159265358979323846\n"
2545  "#define TWO_PI (2.0*M_PI)\n"
2546  "#define RPI 1.2732395447351626861510701069801\n"
2547  "#define WF size.z\n"
2548  "void main(uniform samplerRECT tex, \n"
2549  "uniform samplerRECT gtex, \n"
2550  "uniform samplerRECT otex, \n"
2551  "uniform float4 dsize, \n"
2552  "uniform float3 size, \n"
2553  "in float2 TexCoord0 : TEXCOORD0, \n"
2554  "out float4 FragData0:COLOR0, \n"
2555  "out float4 FragData1:COLOR1) \n"
2556  "{\n"
2557  " float2 dim = size.xy; //image size \n"
2558  " float index = dsize.x*floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
2559  " float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5)); \n"
2560  " index = floor(index*0.125)+ 0.49; \n"
2561  " float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2562  " float2 pos = texRECT(tex, coord).xy; \n"
2563  " if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
2564  " //discard; \n"
2565  " { FragData0 = FragData1 = float4(0.0); return; }\n"
2566  " float anglef = texRECT(tex, coord).z;\n"
2567  " if(anglef > M_PI) anglef -= TWO_PI;\n"
2568  " float sigma = texRECT(tex, coord).w; \n"
2569  " float spt = abs(sigma * WF); //default to be 3*sigma \n";
2570  //rotation
2571  out<<
2572  " float4 cscs, rots; \n"
2573  " sincos(anglef, cscs.y, cscs.x); \n"
2574  " cscs.zw = - cscs.xy; \n"
2575  " rots = cscs /spt; \n"
2576  " cscs *= spt; \n";
2577 
2578  //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2579  //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
2580  //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2581  //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2582  out<<
2583  " float4 temp; float2 pt, offsetpt; \n"
2584  " /*the fraction part of idx is .5*/ \n"
2585  " offsetpt.x = 4.0 * fract(idx * 0.25) - 2.0; \n"
2586  " offsetpt.y = floor(idx*0.25) - 1.5; \n"
2587  " temp = cscs.xwyx*offsetpt.xyxy; \n"
2588  " pt = pos + temp.xz + temp.yw; \n";
2589 
2590  //get a horizontal bounding box of the rotated rectangle
2591  out<<
2592  " float2 bwin = abs(cscs.xy); \n"
2593  " float bsz = bwin.x + bwin.y; \n"
2594  " float4 sz; float2 spos; \n"
2595  " sz.xy = max(pt - bsz, float2(2,2));\n"
2596  " sz.zw = min(pt + bsz, dim - 3); \n"
2597  " sz = floor(sz * 0.5) + 0.5;"; //move sample point to pixel center
2598  //get voting for two box
2599 
2600  out<<"\n"
2601  " float4 DA, DB; \n"
2602  " DA = DB = float4(0, 0, 0, 0); \n"
2603  " float4 nox = float4(0, rots.xy, rots.x + rots.y); \n"
2604  " float4 noy = float4(0, rots.wx, rots.w + rots.x); \n"
2605  " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
2606  " { \n"
2607  " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
2608  " { \n"
2609  " float2 tpt = spos * 2.0 - pt - 0.5; \n"
2610  " float4 temp = rots.xywx * tpt.xyxy; \n"
2611  " float2 temp2 = temp.xz + temp.yw; \n"
2612  " float4 nx = temp2.x + nox; \n"
2613  " float4 ny = temp2.y + noy; \n"
2614  " float4 nxn = abs(nx), nyn = abs(ny); \n"
2615  " bool4 inside = (max(nxn, nyn) < 1.0); \n"
2616  " if(any(inside.xy || inside.zw))\n"
2617  " {\n"
2618  " float4 gg = texRECT(gtex, spos);\n"
2619  " float4 oo = texRECT(otex, spos);\n"
2620  " float4 theta0 = (anglef - oo)*RPI;\n"
2621  " float4 theta = theta0 < 0? theta0 + 8.0 : theta0;//8.0 * frac(1.0 + 0.125 * theta0);// \n"
2622  " float4 theta1 = floor(theta); \n"
2623  " float4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y; \n"
2624  " float4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy )); \n"
2625  " float4 weight = (1 - nxn) * (1 - nyn) * gg * ww; \n"
2626  " float4 weight2 = (theta - theta1) * weight; \n"
2627  " float4 weight1 = weight - weight2; \n"
2628  " for(int i = 0;i < 4; i++)\n"
2629  " {\n"
2630  " if(inside[i])\n"
2631  " {\n"
2632  " DA += float4(theta1[i] == float4(0, 1, 2, 3))*weight1[i]; \n"
2633  " DA += float4(theta1[i] == float4(7, 0, 1, 2))*weight2[i]; \n"
2634  " DB += float4(theta1[i] == float4(4, 5, 6, 7))*weight1[i]; \n"
2635  " DB += float4(theta1[i] == float4(3, 4, 5, 6))*weight2[i]; \n"
2636  " }\n"
2637  " }\n"
2638  " }\n"
2639  " }\n"
2640  " }\n";
2641  out<<
2642  " FragData0 = DA; FragData1 = DB;\n"
2643  "}\n"<<'\0';
2644  ProgramCG * program;
2645 
2646  s_descriptor_fp = program = new ProgramCG(buffer);
2647  _param_descriptor_gtex = cgGetNamedParameter(*program, "gtex");
2648  _param_descriptor_otex = cgGetNamedParameter(*program, "otex");
2649  _param_descriptor_size = cgGetNamedParameter(*program, "size");
2650  _param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");
2651 
2652 }
2653 
2654 void ShaderBagPKCG::SetMarginCopyParam(int xmax, int ymax)
2655 {
2656  float truncate[4];
2657  truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1) >> 1) - 0.5f;
2658  truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1) >> 1) - 0.5f;
2659  truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
2660  truncate[3] = truncate[2] + (((ymax % 2) == 1)? 0.0f : 2.0f);
2661  cgGLSetParameter4fv(_param_margin_copy_truncate, truncate);
2662 }
2663 
2664 void ShaderBagPKCG::SetGradPassParam(int texP)
2665 {
2666  cgGLSetTextureParameter(_param_grad_pass_texp, texP);
2667  cgGLEnableTextureParameter(_param_grad_pass_texp);
2668 }
2669 
2670 void ShaderBagPKCG::SetGenListEndParam(int ktex)
2671 {
2672  cgGLSetTextureParameter(_param_genlist_end_ktex, ktex);
2673  cgGLEnableTextureParameter(_param_genlist_end_ktex);
2674 }
2675 
2676 void ShaderBagPKCG::SetDogTexParam(int texU, int texD)
2677 {
2678  cgGLSetTextureParameter(_param_dog_texu, texU);
2679  cgGLEnableTextureParameter(_param_dog_texu);
2680  cgGLSetTextureParameter(_param_dog_texd, texD);
2681  cgGLEnableTextureParameter(_param_dog_texd);
2682 }
2683 
2684 void ShaderBagPKCG::SetGenListInitParam(int w, int h)
2685 {
2686  float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f, (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
2687  cgGLSetParameter4fv(_param_genlist_init_bbox, bbox);
2688 }
2689 
2690 
2691 void ShaderBagPKCG::SetGenListStartParam(float width, int tex0)
2692 {
2693  cgGLSetParameter1f(_param_ftex_width, width);
2694 
2695  if(_param_genlist_start_tex0)
2696  {
2697  cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
2698  cgGLEnableTextureParameter(_param_genlist_start_tex0);
2699  }
2700 }
2701 
2702 
2703 
2704 void ShaderBagPKCG::SetGenListStepParam(int tex, int tex0)
2705 {
2706  cgGLSetTextureParameter(_param_genlist_step_tex, tex);
2707  cgGLEnableTextureParameter(_param_genlist_step_tex);
2708  cgGLSetTextureParameter(_param_genlist_step_tex0, tex0);
2709  cgGLEnableTextureParameter(_param_genlist_step_tex0);
2710 }
2711 
2712 void ShaderBagPKCG::SetGenVBOParam(float width, float fwidth, float size)
2713 {
2714  float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
2715  cgGLSetParameter4fv(_param_genvbo_size, sizes);
2716 }
2717 
2718 void ShaderBagPKCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
2719 {
2720  cgGLSetTextureParameter(_param_orientation_gtex, oTex);
2721  cgGLEnableTextureParameter(_param_orientation_gtex);
2722  cgGLSetParameter2f(_param_orientation_size, sigma, sigma_step);
2723 }
2724 
2725 
2726 void ShaderBagPKCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
2727 {
2729  cgGLSetTextureParameter(_param_orientation_gtex, gtex);
2730  cgGLEnableTextureParameter(_param_orientation_gtex);
2731  cgGLSetTextureParameter(_param_orientation_otex, otex);
2732  cgGLEnableTextureParameter(_param_orientation_otex);
2733 
2734  float size[4];
2735  size[0] = (float)width;
2736  size[1] = (float)height;
2737  size[2] = sigma;
2738  size[3] = step;
2739  cgGLSetParameter4fv(_param_orientation_size, size);
2740 
2741 }
2742 
2743 void ShaderBagPKCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
2744 {
2746 
2747  cgGLSetTextureParameter(_param_descriptor_gtex, gtex);
2748  cgGLEnableTextureParameter(_param_descriptor_gtex);
2749  cgGLSetTextureParameter(_param_descriptor_otex, otex);
2750  cgGLEnableTextureParameter(_param_descriptor_otex);
2751 
2752 
2753  float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2754  cgGLSetParameter4fv(_param_descriptor_dsize, dsize);
2755  float size[3];
2756  size[0] = width;
2757  size[1] = height;
2759  cgGLSetParameter3fv(_param_descriptor_size, size);
2760 
2761 
2762 }
2763 
2764 #endif
2765 
int width
int size
int height
int offset
CloudViewerScene::LightingProfile profile
#define NULL
static int _SubpixelLocalization
Definition: GlobalUtil.h:72
static int _PreciseBorder
Definition: GlobalUtil.h:75
static float _DescriptorWindowFactor
Definition: GlobalUtil.h:58
static int _OrientationPack2
Definition: GlobalUtil.h:60
static int _verbose
Definition: GlobalUtil.h:44
static float _MulitiOrientationThreshold
Definition: GlobalUtil.h:97
static float _OrientationWindowFactor
Definition: GlobalUtil.h:57
static int _UseDynamicIndexing
Definition: GlobalUtil.h:53
static int _KeepExtremumSign
Definition: GlobalUtil.h:89
static float _OrientationGaussianFactor
Definition: GlobalUtil.h:96
static int _IsNvidia
Definition: GlobalUtil.h:49
static int _DescriptorPPT
Definition: GlobalUtil.h:69
static int _MaxOrientation
Definition: GlobalUtil.h:59
static int _FullSupported
Definition: GlobalUtil.h:65
int max(int a, int b)
Definition: cutil_math.h:48
QTextStream & endl(QTextStream &stream)
Definition: QtCompat.h:718
Definition: Eigen.h:85