23 #if defined(CL_SIFTGPU_ENABLED)
25 #include <CL/opencl.h>
45 #pragma comment (lib, "OpenCL.lib")
49 #ifndef WIN32_LEAN_AND_MEAN
50 #define WIN32_LEAN_AND_MEAN
59 ProgramCL::ProgramCL()
66 ProgramCL::~ProgramCL()
68 if(_kernel) clReleaseKernel(_kernel);
69 if(_program) clReleaseProgram(_program);
72 ProgramCL::ProgramCL(
const char*
name,
const char * code, cl_context
context, cl_device_id device) : _valid(1)
74 const char * src[1] = {code}; cl_int status;
76 _program = clCreateProgramWithSource(
context, 1, src,
NULL, &status);
77 if(status != CL_SUCCESS) _valid = 0;
79 status = clBuildProgram(_program, 0,
NULL,
81 "-cl-fast-relaxed-math -cl-single-precision-constant -cl-nv-verbose" :
82 "-cl-fast-relaxed-math -cl-single-precision-constant",
NULL,
NULL);
84 if(status != CL_SUCCESS) {PrintBuildLog(device, 1); _valid = 0;}
87 _kernel = clCreateKernel(_program,
name, &status);
88 if(status != CL_SUCCESS) _valid = 0;
91 void ProgramCL::PrintBuildLog(cl_device_id device,
int all)
93 char buffer[10240] =
"\0";
94 cl_int status = clGetProgramBuildInfo(
95 _program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buffer), buffer,
NULL);
98 std::cerr << buffer <<
endl;
101 const char * pos = strstr(buffer,
"ptxas");
102 if(pos) std::cerr << pos <<
endl;
109 ProgramBagCL::ProgramBagCL()
113 s_gray = s_sampling =
NULL;
114 s_packup = s_zero_pass =
NULL;
115 s_gray_pack = s_unpack =
NULL;
124 f_gaussian_skip0 =
NULL;
125 f_gaussian_skip1 =
NULL;
130 if(!InitializeContext())
return;
137 ProgramBagCL::~ProgramBagCL()
139 if(s_gray)
delete s_gray;
140 if(s_sampling)
delete s_sampling;
141 if(s_zero_pass)
delete s_zero_pass;
142 if(s_packup)
delete s_packup;
143 if(s_unpack)
delete s_unpack;
144 if(s_gray_pack)
delete s_gray_pack;
145 if(s_sampling_u)
delete s_sampling_u;
146 if(s_dog_pass)
delete s_dog_pass;
147 if(s_grad_pass)
delete s_grad_pass;
148 if(s_grad_pass2)
delete s_grad_pass2;
149 if(s_unpack_dog)
delete s_unpack_dog;
150 if(s_unpack_grd)
delete s_unpack_grd;
151 if(s_unpack_key)
delete s_unpack_key;
152 if(s_keypoint)
delete s_keypoint;
154 if(f_gaussian_skip1)
delete f_gaussian_skip1;
156 for(
unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
158 if(f_gaussian_skip0_v[i])
delete f_gaussian_skip0_v[i];
160 if(f_gaussian_step && _gaussian_step_num > 0)
162 for(
int i = 0; i< _gaussian_step_num; i++)
164 delete f_gaussian_step[i];
166 delete[] f_gaussian_step;
170 if(_context) clReleaseContext(_context);
171 if(_queue) clReleaseCommandQueue(_queue);
174 bool ProgramBagCL::InitializeContext()
176 cl_uint num_platform, num_device;
179 status = clGetPlatformIDs (0,
NULL, &num_platform);
180 if (status != CL_SUCCESS || num_platform == 0)
return false;
182 cl_platform_id platforms[16];
183 if(num_platform > 16 ) num_platform = 16;
184 status = clGetPlatformIDs (num_platform, platforms,
NULL);
185 _platform = platforms[0];
188 status = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, 0,
NULL, &num_device);
189 if(status != CL_SUCCESS || num_device == 0)
return false;
192 cl_device_id* devices =
new cl_device_id [num_device];
193 status = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, num_device, devices,
NULL);
194 _device = (status == CL_SUCCESS? devices[0] : 0);
delete[] devices;
195 if(status != CL_SUCCESS)
return false;
200 cl_device_mem_cache_type is_gcache;
201 clGetDeviceInfo(_device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
sizeof(is_gcache), &is_gcache,
NULL);
202 if(is_gcache == CL_NONE) std::cout <<
"No cache for global memory\n";
210 cl_context_properties prop[] = {
211 CL_CONTEXT_PLATFORM, (cl_context_properties)_platform,
212 CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
213 CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), 0 };
214 _context = clCreateContext(prop, 1, &_device,
NULL,
NULL, &status);
215 if(status != CL_SUCCESS)
return false;
218 _context = clCreateContext(0, 1, &_device,
NULL,
NULL, &status);
219 if(status != CL_SUCCESS)
return false;
223 _queue = clCreateCommandQueue(_context, _device, 0, &status);
224 return status == CL_SUCCESS;
227 void ProgramBagCL::InitProgramBag(
SiftParam¶m)
231 LoadDynamicShaders(param);
237 void ProgramBagCL::UnloadProgram()
242 void ProgramBagCL::FinishCL()
247 void ProgramBagCL::LoadFixedShaders()
251 s_gray =
new ProgramCL(
"gray",
252 "__kernel void gray(__read_only image2d_t input, __write_only image2d_t output) {\n"
253 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
254 "int2 coord = (int2)(get_global_id(0), get_global_id(1));\n"
255 "float4 weight = (float4)(0.299, 0.587, 0.114, 0.0);\n"
256 "float intensity = dot(weight, read_imagef(input,sampler, coord ));\n"
257 "float4 result= (float4)(intensity, intensity, intensity, 1.0);\n"
258 "write_imagef(output, coord, result); }", _context, _device );
261 s_sampling =
new ProgramCL(
"sampling",
262 "__kernel void sampling(__read_only image2d_t input, __write_only image2d_t output,\n"
263 " int width, int height) {\n"
264 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
265 "int x = get_global_id(0), y = get_global_id(1); \n"
266 "if( x >= width || y >= height) return;\n"
267 "int xa = x + x, ya = y + y; \n"
268 "int xb = xa + 1, yb = ya + 1; \n"
269 "float v1 = read_imagef(input, sampler, (int2) (xa, ya)).x; \n"
270 "float v2 = read_imagef(input, sampler, (int2) (xb, ya)).x; \n"
271 "float v3 = read_imagef(input, sampler, (int2) (xa, yb)).x; \n"
272 "float v4 = read_imagef(input, sampler, (int2) (xb, yb)).x; \n"
273 "float4 result = (float4) (v1, v2, v3, v4);"
274 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
276 s_sampling_k =
new ProgramCL(
"sampling_k",
277 "__kernel void sampling_k(__read_only image2d_t input, __write_only image2d_t output, "
278 " int width, int height,\n"
279 " int step, int halfstep) {\n"
280 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
281 "int x = get_global_id(0), y = get_global_id(1); \n"
282 "if( x >= width || y >= height) return;\n"
283 "int xa = x * step, ya = y *step; \n"
284 "int xb = xa + halfstep, yb = ya + halfstep; \n"
285 "float v1 = read_imagef(input, sampler, (int2) (xa, ya)).x; \n"
286 "float v2 = read_imagef(input, sampler, (int2) (xb, ya)).x; \n"
287 "float v3 = read_imagef(input, sampler, (int2) (xa, yb)).x; \n"
288 "float v4 = read_imagef(input, sampler, (int2) (xb, yb)).x; \n"
289 "float4 result = (float4) (v1, v2, v3, v4);"
290 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
293 s_sampling_u =
new ProgramCL(
"sampling_u",
294 "__kernel void sampling_u(__read_only image2d_t input, \n"
295 " __write_only image2d_t output,\n"
296 " int width, int height,\n"
297 " float step, float halfstep) {\n"
298 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;\n"
299 "int x = get_global_id(0), y = get_global_id(1); \n"
300 "if( x >= width || y >= height) return;\n"
301 "float xa = x * step, ya = y *step; \n"
302 "float xb = xa + halfstep, yb = ya + halfstep; \n"
303 "float v1 = read_imagef(input, sampler, (float2) (xa, ya)).x; \n"
304 "float v2 = read_imagef(input, sampler, (float2) (xb, ya)).x; \n"
305 "float v3 = read_imagef(input, sampler, (float2) (xa, yb)).x; \n"
306 "float v4 = read_imagef(input, sampler, (float2) (xb, yb)).x; \n"
307 "float4 result = (float4) (v1, v2, v3, v4);"
308 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
311 s_zero_pass =
new ProgramCL(
"zero_pass",
312 "__kernel void zero_pass(__write_only image2d_t output){\n"
313 "int2 coord = (int2)(get_global_id(0), get_global_id(1));\n"
314 "write_imagef(output, coord, (float4)(0.0));}", _context, _device);
316 s_packup =
new ProgramCL(
"packup",
317 "__kernel void packup(__global float* input, __write_only image2d_t output,\n"
318 " int twidth, int theight, int width){\n"
319 "int2 coord = (int2)(get_global_id(0), get_global_id(1));\n"
320 "if(coord.x >= twidth || coord.y >= theight) return;\n"
321 "int index0 = (coord.y + coord.y) * width; \n"
322 "int index1 = index0 + coord.x;\n"
323 "int x2 = min(width -1, coord.x); \n"
324 "float v1 = input[index1 + coord.x], v2 = input[index1 + x2]; \n"
325 "int index2 = index1 + width; \n"
326 "float v3 = input[index2 + coord.x], v4 = input[index2 + x2]; \n "
327 "write_imagef(output, coord, (float4) (v1, v2, v3, v4));}", _context, _device);
329 s_dog_pass =
new ProgramCL(
"dog_pass",
330 "__kernel void dog_pass(__read_only image2d_t tex, __read_only image2d_t texp,\n"
331 " __write_only image2d_t dog, int width, int height) {\n"
332 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
333 " CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
334 "int2 coord = (int2)(get_global_id(0), get_global_id(1)); \n"
335 "if( coord.x >= width || coord.y >= height) return;\n"
336 "float4 cc = read_imagef(tex , sampler, coord); \n"
337 "float4 cp = read_imagef(texp, sampler, coord);\n"
338 "write_imagef(dog, coord, cc - cp); }\n", _context, _device);
340 s_grad_pass =
new ProgramCL(
"grad_pass",
341 "__kernel void grad_pass(__read_only image2d_t tex, __read_only image2d_t texp,\n"
342 " __write_only image2d_t dog, int width, int height,\n"
343 " __write_only image2d_t grad, __write_only image2d_t rot) {\n"
344 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
345 " CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
346 "int x = get_global_id(0), y = get_global_id(1); \n"
347 "if( x >= width || y >= height) return;\n"
348 "int2 coord = (int2) (x, y);\n"
349 "float4 cc = read_imagef(tex , sampler, coord); \n"
350 "float4 cp = read_imagef(texp, sampler, coord);\n"
351 "float2 cl = read_imagef(tex, sampler, (int2)(x - 1, y)).yw;\n"
352 "float2 cr = read_imagef(tex, sampler, (int2)(x + 1, y)).xz;\n"
353 "float2 cd = read_imagef(tex, sampler, (int2)(x, y - 1)).zw;\n"
354 "float2 cu = read_imagef(tex, sampler, (int2)(x, y + 1)).xy;\n"
355 "write_imagef(dog, coord, cc - cp); \n"
356 "float4 dx = (float4)(cc.y - cl.x, cr.x - cc.x, cc.w - cl.y, cr.y - cc.z);\n"
357 "float4 dy = (float4)(cc.zw - cd.xy, cu.xy - cc.xy);\n"
358 "write_imagef(grad, coord, 0.5 * sqrt(dx*dx + dy * dy));\n"
359 "write_imagef(rot, coord, atan2(dy, dx + (float4) (FLT_MIN)));}\n", _context, _device);
361 s_grad_pass2 =
new ProgramCL(
"grad_pass2",
362 "#define BLOCK_DIMX 32\n"
363 "#define BLOCK_DIMY 14\n"
364 "#define BLOCK_SIZE (BLOCK_DIMX * BLOCK_DIMY)\n"
365 "__kernel void grad_pass2(__read_only image2d_t tex, __read_only image2d_t texp,\n"
366 " __write_only image2d_t dog, int width, int height,\n"
367 " __write_only image2d_t grd, __write_only image2d_t rot){\n"
368 "__local float block[BLOCK_SIZE * 4]; \n"
369 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
370 " CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
371 "int2 coord = (int2) ( get_global_id(0) - get_group_id(0) * 2 - 1, \n"
372 " get_global_id(1) - get_group_id(1) * 2- 1); \n"
373 "int idx = mad24(get_local_id(1), BLOCK_DIMX, get_local_id(0));\n"
374 "float4 cc = read_imagef(tex, sampler, coord);\n"
375 "block[idx ] = cc.x;\n"
376 "block[idx + BLOCK_SIZE ] = cc.y;\n"
377 "block[idx + BLOCK_SIZE * 2] = cc.z;\n"
378 "block[idx + BLOCK_SIZE * 3] = cc.w;\n"
379 "barrier(CLK_LOCAL_MEM_FENCE);\n"
380 "if( get_local_id(0) == 0 || get_local_id(0) == BLOCK_DIMX - 1) return;\n"
381 "if( get_local_id(1) == 0 || get_local_id(1) == BLOCK_DIMY - 1) return;\n"
382 "if( coord.x >= width) return; \n"
383 "if( coord.y >= height) return;\n"
384 "float4 cp = read_imagef(texp, sampler, coord);\n"
385 "float4 dx = (float4)( cc.y - block[idx - 1 + BLOCK_SIZE], \n"
386 " block[idx + 1] - cc.x, \n"
387 " cc.w - block[idx - 1 + 3 * BLOCK_SIZE], \n"
388 " block[idx + 1 + 2 * BLOCK_SIZE] - cc.z);\n"
389 "float4 dy = (float4)( cc.z - block[idx - BLOCK_DIMX + 2 * BLOCK_SIZE], \n"
390 " cc.w - block[idx - BLOCK_DIMX + 3 * BLOCK_SIZE],"
392 " block[idx + BLOCK_DIMX] - cc.x,\n "
393 " block[idx + BLOCK_DIMX + BLOCK_SIZE] - cc.y);\n"
395 "write_imagef(dog, coord, cc - cp); \n"
396 "write_imagef(grd, coord, 0.5 * sqrt(dx*dx + dy * dy));\n"
397 "write_imagef(rot, coord, atan2(dy, dx + (float4) (FLT_MIN)));}\n", _context, _device);
400 void ProgramBagCL::LoadDynamicShaders(
SiftParam& param)
402 LoadKeypointShader();
404 CreateGaussianFilters(param);
408 void ProgramBagCL::SelectInitialSmoothingFilter(
int octave_min,
SiftParam¶m)
413 f_gaussian_skip0 =
NULL;
416 for(
unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
418 if(f_gaussian_skip0_v[i]->_id == octave_min)
420 f_gaussian_skip0 = f_gaussian_skip0_v[i];
424 FilterCL * filter = CreateGaussianFilter(sigma);
425 filter->_id = octave_min;
426 f_gaussian_skip0_v.push_back(filter);
427 f_gaussian_skip0 = filter;
432 void ProgramBagCL::CreateGaussianFilters(
SiftParam¶m)
436 f_gaussian_skip0 = CreateGaussianFilter(param.
_sigma_skip0);
438 f_gaussian_skip0_v.push_back(f_gaussian_skip0);
442 f_gaussian_skip1 = CreateGaussianFilter(param.
_sigma_skip1);
445 f_gaussian_step =
new FilterCL*[param.
_sigma_num];
448 f_gaussian_step[i] = CreateGaussianFilter(param.
_sigma[i]);
454 FilterCL* ProgramBagCL::CreateGaussianFilter(
float sigma)
458 int width = 2*sz + 1;
469 float * kernel =
new float[
width];
470 float rv = 1.0f/(sigma*sigma);
474 for( i = -sz ; i <= sz ; ++i)
476 kernel[i+sz] = v = exp(-0.5f * i * i *rv) ;
482 for(i = 0; i<
width ;i++) kernel[i]*=rv;
484 FilterCL * filter = CreateFilter(kernel,
width);
490 FilterCL* ProgramBagCL::CreateFilter(
float kernel[],
int width)
492 FilterCL * filter =
new FilterCL;
493 filter->s_shader_h = CreateFilterH(kernel,
width);
494 filter->s_shader_v = CreateFilterV(kernel,
width);
495 filter->_size =
width;
500 ProgramCL* ProgramBagCL::CreateFilterH(
float kernel[],
int width)
502 int halfwidth =
width >>1;
503 float * pf = kernel + halfwidth;
504 int nhpixel = (halfwidth+1)>>1;
505 int npixel = (nhpixel<<1)+1;
510 ostrstream out(buffer, 10240);
511 out<<setprecision(8);
516 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;"
517 "__kernel void filter_h(__read_only image2d_t input, \n"
518 " __write_only image2d_t output, int width_, int height_) {\n"
519 "int x = get_global_id(0);\n"
520 "int y = get_global_id(1); \n"
521 "if( x > width_ || y > height_) return; \n"
522 "float4 pc; int2 coord; \n"
523 "float4 result = (float4)(0.0);\n";
524 for(
int i = 0 ; i < npixel ; i++)
526 out<<
"coord = (int2)(x + ("<< (i - nhpixel) <<
"), y);\n";
527 out<<
"pc= read_imagef(input, sampler, coord);\n";
529 out<<
"if(coord.x < 0) pc = pc.xxzz; else if (coord.x > width_) pc = pc.yyww; \n";
531 int xw = (i - nhpixel)*2;
532 for(
int j = 0; j < 3; j++)
535 weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
539 out<<
"result += (float4)("<<weight[2]<<
","<<weight[0]<<
","<<weight[2]<<
","<<weight[0]<<
") * pc.yxwz;\n";
543 out<<
"result += (float4)("<<weight[1]<<
", "<<weight[0]<<
", "<<weight[1]<<
", "<<weight[0]<<
") * pc.xxzz;\n";
544 out<<
"result += (float4)("<<weight[2]<<
", "<<weight[1]<<
", "<<weight[2]<<
", "<<weight[1]<<
") * pc.yyww;\n";
547 out <<
"write_imagef(output, (int2)(x, y), result); }\n" <<
'\0';
548 return new ProgramCL(
"filter_h", buffer, _context, _device);
553 ProgramCL* ProgramBagCL::CreateFilterV(
float kernel[],
int width)
556 int halfwidth =
width >>1;
557 float * pf = kernel + halfwidth;
558 int nhpixel = (halfwidth+1)>>1;
559 int npixel = (nhpixel<<1)+1;
564 ostrstream out(buffer, 10240);
565 out<<setprecision(8);
570 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;"
571 "__kernel void filter_v(__read_only image2d_t input, \n"
572 " __write_only image2d_t output, int width_, int height_) {\n"
573 "int x = get_global_id(0);\n"
574 "int y = get_global_id(1); \n"
575 "if( x > width_ || y >= height_) return; \n"
576 "float4 pc; int2 coord; \n"
577 "float4 result = (float4)(0.0);\n";
578 for(
int i = 0 ; i < npixel ; i++)
580 out<<
"coord = (int2)(x, y + ("<< (i - nhpixel) <<
"));\n";
581 out<<
"pc= read_imagef(input, sampler, coord);\n";
583 out<<
"if(coord.y < 0) pc = pc.xyxy; else if (coord.y > height_) pc = pc.zwzw; \n";
585 int xw = (i - nhpixel)*2;
586 for(
int j = 0; j < 3; j++)
589 weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
593 out<<
"result += (float4)("<<weight[2]<<
","<<weight[2]<<
","<<weight[0]<<
","<<weight[0]<<
") * pc.zwxy;\n";
597 out<<
"result += (float4)("<<weight[1]<<
", "<<weight[1]<<
", "<<weight[0]<<
", "<<weight[0]<<
") * pc.xyxy;\n";
598 out<<
"result += (float4)("<<weight[2]<<
", "<<weight[2]<<
", "<<weight[1]<<
", "<<weight[1]<<
") * pc.zwzw;\n";
601 out <<
"write_imagef(output, (int2)(x, y), result); }\n" <<
'\0';
602 return new ProgramCL(
"filter_v", buffer, _context, _device);
606 void ProgramBagCL::FilterImage(FilterCL* filter, CLTexImage *dst, CLTexImage *src, CLTexImage*tmp)
608 cl_kernel kernelh = filter->s_shader_h->_kernel;
609 cl_kernel kernelv = filter->s_shader_v->_kernel;
612 cl_int status, w = dst->GetImgWidth(), h = dst->GetImgHeight();
613 cl_int w_ = w - 1, h_ = h - 1;
615 size_t dim0 = 16, dim1 = 16;
616 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
618 clSetKernelArg(kernelh, 0,
sizeof(cl_mem), &src->_clData);
619 clSetKernelArg(kernelh, 1,
sizeof(cl_mem), &tmp->_clData);
620 clSetKernelArg(kernelh, 2,
sizeof(cl_int), &w_);
621 clSetKernelArg(kernelh, 3,
sizeof(cl_int), &h_);
622 status = clEnqueueNDRangeKernel(_queue, kernelh, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
623 CheckErrorCL(status,
"ProgramBagCL::FilterImageH");
624 if(status != CL_SUCCESS)
return;
626 clSetKernelArg(kernelv, 0,
sizeof(cl_mem), &tmp->_clData);
627 clSetKernelArg(kernelv, 1,
sizeof(cl_mem), &dst->_clData);
628 clSetKernelArg(kernelv, 2,
sizeof(cl_int), &w_);
629 clSetKernelArg(kernelv, 3,
sizeof(cl_int), &h_);
630 size_t gsz2[2] = {(w + dim1 - 1) / dim1 * dim1, (h + dim0 - 1) / dim0 * dim0}, lsz2[2] = {dim1, dim0};
631 status = clEnqueueNDRangeKernel(_queue, kernelv, 2,
NULL, gsz2, lsz2, 0,
NULL,
NULL);
632 CheckErrorCL(status,
"ProgramBagCL::FilterImageV");
636 void ProgramBagCL::SampleImageU(CLTexImage *dst, CLTexImage *src,
int log_scale)
638 cl_kernel kernel= s_sampling_u->_kernel;
639 float scale = 1.0f / (1 << log_scale);
640 float offset = scale * 0.5f;
641 cl_int w = dst->GetImgWidth(), h = dst->GetImgHeight();
642 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(src->_clData));
643 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(dst->_clData));
644 clSetKernelArg(kernel, 2,
sizeof(cl_int), &(w));
645 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(h));
646 clSetKernelArg(kernel, 4,
sizeof(cl_float), &(scale));
647 clSetKernelArg(kernel, 5,
sizeof(cl_float), &(
offset));
649 size_t dim0 = 16, dim1 = 16;
651 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
652 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
653 CheckErrorCL(status,
"ProgramBagCL::SampleImageU");
656 void ProgramBagCL::SampleImageD(CLTexImage *dst, CLTexImage *src,
int log_scale)
659 cl_int w = dst->GetImgWidth(), h = dst->GetImgHeight();
662 kernel = s_sampling->_kernel;
663 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(src->_clData));
664 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(dst->_clData));
665 clSetKernelArg(kernel, 2,
sizeof(cl_int), &(w));
666 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(h));
669 cl_int fullstep = (1 << log_scale);
670 cl_int halfstep = fullstep >> 1;
671 kernel = s_sampling_k->_kernel;
672 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(src->_clData));
673 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(dst->_clData));
674 clSetKernelArg(kernel, 2,
sizeof(cl_int), &(w));
675 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(h));
676 clSetKernelArg(kernel, 4,
sizeof(cl_int), &(fullstep));
677 clSetKernelArg(kernel, 5,
sizeof(cl_int), &(halfstep));
679 size_t dim0 = 128, dim1 = 1;
681 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
682 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
683 CheckErrorCL(status,
"ProgramBagCL::SampleImageD");
686 void ProgramBagCL::FilterInitialImage(CLTexImage* tex, CLTexImage* buf)
688 if(f_gaussian_skip0) FilterImage(f_gaussian_skip0, tex, tex, buf);
691 void ProgramBagCL::FilterSampledImage(CLTexImage* tex, CLTexImage* buf)
693 if(f_gaussian_skip1) FilterImage(f_gaussian_skip1, tex, tex, buf);
696 void ProgramBagCL::ComputeDOG(CLTexImage*tex, CLTexImage* texp, CLTexImage* dog, CLTexImage* grad, CLTexImage* rot)
698 int margin = 0, use_gm2 = 1;
699 bool both_grad_dog = rot->_clData && grad->_clData;
700 cl_int w = tex->GetImgWidth(), h = tex->GetImgHeight();
701 cl_kernel kernel ;
size_t dim0, dim1;
702 if(!both_grad_dog) {kernel = s_dog_pass->_kernel; dim0 = 16; dim1 = 12; }
703 else if(use_gm2) {kernel = s_grad_pass2->_kernel; dim0 = 32; dim1 = 14; margin = 2; }
704 else {kernel = s_grad_pass->_kernel; dim0 = 16; dim1 = 20; }
705 size_t gsz[2] = { (w + dim0 - 1 - margin) / (dim0 - margin) * dim0,
706 (h + dim1 - 1 - margin) / (dim1 - margin) * dim1};
707 size_t lsz[2] = {dim0, dim1};
708 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(tex->_clData));
709 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(texp->_clData));
710 clSetKernelArg(kernel, 2,
sizeof(cl_mem), &(dog->_clData));
711 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(w));
712 clSetKernelArg(kernel, 4,
sizeof(cl_int), &(h));
715 clSetKernelArg(kernel, 5,
sizeof(cl_mem), &(grad->_clData));
716 clSetKernelArg(kernel, 6,
sizeof(cl_mem), &(rot->_clData));
719 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
720 CheckErrorCL(status,
"ProgramBagCL::ComputeDOG");
724 void ProgramBagCL::ComputeKEY(CLTexImage*dog, CLTexImage* key,
float Tdog,
float Tedge)
726 cl_kernel kernel = s_keypoint->_kernel;
727 cl_int w = key->GetImgWidth(), h = key->GetImgHeight();
729 float threshold1 = Tdog;
730 float threshold2 = (Tedge+1)*(Tedge+1)/Tedge;
732 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(dog->_clData));
733 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &((dog + 1)->_clData));
734 clSetKernelArg(kernel, 2,
sizeof(cl_mem), &((dog - 1)->_clData));
735 clSetKernelArg(kernel, 3,
sizeof(cl_mem), &(key->_clData));
736 clSetKernelArg(kernel, 4,
sizeof(cl_float), &(threshold0));
737 clSetKernelArg(kernel, 5,
sizeof(cl_float), &(threshold1));
738 clSetKernelArg(kernel, 6,
sizeof(cl_float), &(threshold2));
739 clSetKernelArg(kernel, 7,
sizeof(cl_int), &(w));
740 clSetKernelArg(kernel, 8,
sizeof(cl_int), &(h));
742 size_t dim0 = 8, dim1 = 8;
744 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
745 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
746 CheckErrorCL(status,
"ProgramBagCL::ComputeKEY");
749 void ProgramBagCL::UnpackImage(CLTexImage*src, CLTexImage* dst)
751 cl_kernel kernel = s_unpack->_kernel;
752 cl_int w = dst->GetImgWidth(), h = dst->GetImgHeight();
753 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(src->_clData));
754 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(dst->_clData));
755 clSetKernelArg(kernel, 2,
sizeof(cl_int), &(w));
756 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(h));
757 const size_t dim0 = 16, dim1 = 16;
758 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
759 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
761 CheckErrorCL(status,
"ProgramBagCL::UnpackImage");
766 void ProgramBagCL::UnpackImageDOG(CLTexImage*src, CLTexImage* dst)
768 if(s_unpack_dog ==
NULL)
return;
769 cl_kernel kernel = s_unpack_dog->_kernel;
770 cl_int w = dst->GetImgWidth(), h = dst->GetImgHeight();
771 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(src->_clData));
772 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(dst->_clData));
773 clSetKernelArg(kernel, 2,
sizeof(cl_int), &(w));
774 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(h));
775 const size_t dim0 = 16, dim1 = 16;
776 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
777 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
779 CheckErrorCL(status,
"ProgramBagCL::UnpackImage");
783 void ProgramBagCL::UnpackImageGRD(CLTexImage*src, CLTexImage* dst)
785 if(s_unpack_grd ==
NULL)
return;
786 cl_kernel kernel = s_unpack_grd->_kernel;
787 cl_int w = dst->GetImgWidth(), h = dst->GetImgHeight();
788 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(src->_clData));
789 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(dst->_clData));
790 clSetKernelArg(kernel, 2,
sizeof(cl_int), &(w));
791 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(h));
792 const size_t dim0 = 16, dim1 = 16;
793 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
794 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
796 CheckErrorCL(status,
"ProgramBagCL::UnpackImage");
799 void ProgramBagCL::UnpackImageKEY(CLTexImage*src, CLTexImage* dog, CLTexImage* dst)
801 if(s_unpack_key ==
NULL)
return;
802 cl_kernel kernel = s_unpack_key->_kernel;
803 cl_int w = dst->GetImgWidth(), h = dst->GetImgHeight();
804 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(dog->_clData));
805 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(src->_clData));
806 clSetKernelArg(kernel, 2,
sizeof(cl_mem), &(dst->_clData));
807 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(w));
808 clSetKernelArg(kernel, 4,
sizeof(cl_int), &(h));
809 const size_t dim0 = 16, dim1 = 16;
810 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
811 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
813 CheckErrorCL(status,
"ProgramBagCL::UnpackImageKEY");
816 void ProgramBagCL::LoadDescriptorShader()
819 LoadDescriptorShaderF2();
822 void ProgramBagCL::LoadDescriptorShaderF2()
827 void ProgramBagCL::LoadOrientationShader(
void)
832 void ProgramBagCL::LoadGenListShader(
int ndoglev,
int nlev)
837 void ProgramBagCL::LoadKeypointShader()
839 int i;
char buffer[20240];
840 ostrstream out(buffer, 20240);
847 "__kernel void keypoint(__read_only image2d_t tex, __read_only image2d_t texU,\n"
848 " __read_only image2d_t texD, __write_only image2d_t texK,\n"
849 " float THRESHOLD0, float THRESHOLD1, \n"
850 " float THRESHOLD2, int width, int height)\n"
852 " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | \n"
853 " CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;"
854 " int x = get_global_id(0), y = get_global_id(1);\n"
855 " if(x >= width || y >= height) return; \n"
856 " int xp = x - 1, xn = x + 1;\n"
857 " int yp = y - 1, yn = y + 1;\n"
858 " int2 coord0 = (int2) (x, y); \n"
859 " int2 coord1 = (int2) (xp, y); \n"
860 " int2 coord2 = (int2) (xn, y); \n"
861 " int2 coord3 = (int2) (x, yp); \n"
862 " int2 coord4 = (int2) (x, yn); \n"
863 " int2 coord5 = (int2) (xp, yp); \n"
864 " int2 coord6 = (int2) (xp, yn); \n"
865 " int2 coord7 = (int2) (xn, yp); \n"
866 " int2 coord8 = (int2) (xn, yn); \n"
867 " float4 ccc = read_imagef(tex, sampler,coord0);\n"
868 " float4 clc = read_imagef(tex, sampler,coord1);\n"
869 " float4 crc = read_imagef(tex, sampler,coord2);\n"
870 " float4 ccd = read_imagef(tex, sampler,coord3);\n"
871 " float4 ccu = read_imagef(tex, sampler,coord4);\n"
872 " float4 cld = read_imagef(tex, sampler,coord5);\n"
873 " float4 clu = read_imagef(tex, sampler,coord6);\n"
874 " float4 crd = read_imagef(tex, sampler,coord7);\n"
875 " float4 cru = read_imagef(tex, sampler,coord8);\n"
876 " float4 cc = ccc;\n"
877 " float4 v1[4], v2[4];\n"
878 " v1[0] = (float4)(clc.y, ccc.y, ccd.z, ccc.z);\n"
879 " v1[1] = (float4)(ccc.x, crc.x, ccd.w, ccc.w);\n"
880 " v1[2] = (float4)(clc.w, ccc.w, ccc.x, ccu.x);\n"
881 " v1[3] = (float4)(ccc.z, crc.z, ccc.y, ccu.y);\n"
882 " v2[0] = (float4)(cld.w, clc.w, ccd.w, ccc.w);\n"
883 " v2[1] = (float4)(ccd.z, ccc.z, crd.z, crc.z);\n"
884 " v2[2] = (float4)(clc.y, clu.y, ccc.y, ccu.y);\n"
885 " v2[3] = (float4)(ccc.x, ccu.x, crc.x, cru.x);\n"
886 " float4 key4 = (float4)(0); \n";
890 for(i = 0; i < 4; ++i)
892 " if(cc.s"<<i<<
" > THRESHOLD0){ \n"
893 " if(all(isgreater((float4)(cc.s"<<i<<
"), max(v1["<<i<<
"], v2["<<i<<
"]))))key4.s"<<i<<
" = 1.0;\n"
894 " }else if(cc.s"<<i<<
" < -THRESHOLD0){ \n"
895 " if(all(isless((float4)(cc.s"<<i<<
"), min(v1["<<i<<
"], v2["<<i<<
"]))))key4.s"<<i<<
" = -1.0;\n"
899 " if(x ==0) {key4.x = key4.z= 0; }\n"
900 " else if(x + 1 == width) {key4.y = key4.w = 0;}\n"
901 " if(y ==0) {key4.x = key4.y = 0; }\n"
902 " else if(y + 1 == height) {key4.z = key4.w = 0;}\n"
903 " float4 ak = fabs(key4); \n"
904 " float keysum = ak.x + ak.y + ak.z + ak.w; \n"
905 " float4 result = (float4)(0.0);\n"
906 " if(keysum == 1.0) {\n"
907 " float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n";
912 for(i = 0; i < 4; ++i)
914 " if(key4.s"<<i<<
" != 0)\n"
916 " float4 D2 = v1["<<i<<
"].xyzw - cc.s"<<i<<
";\n"
917 " float2 D4 = v2["<<i<<
"].xw - v2["<<i<<
"].yz;\n"
918 " float2 D5 = 0.5*(v1["<<i<<
"].yw-v1["<<i<<
"].xz); \n"
919 " fx["<<i<<
"] = D5.x; fy["<<i<<
"] = D5.y ;\n"
920 " fxx["<<i<<
"] = D2.x + D2.y;\n"
921 " fyy["<<i<<
"] = D2.z + D2.w;\n"
922 " fxy["<<i<<
"] = 0.25*(D4.x + D4.y);\n"
923 " float fxx_plus_fyy = fxx["<<i<<
"] + fyy["<<i<<
"];\n"
924 " float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
925 " float score_down = (fxx["<<i<<
"]*fyy["<<i<<
"] - fxy["<<i<<
"]*fxy["<<i<<
"]);\n"
926 " if( score_down <= 0 || score_up > THRESHOLD2 * score_down)keysum = 0;\n"
930 " if(keysum == 1) {\n";
934 " float4 v4[4], v5[4], v6[4];\n"
935 " ccc = read_imagef(texU, sampler,coord0);\n"
936 " clc = read_imagef(texU, sampler,coord1);\n"
937 " crc = read_imagef(texU, sampler,coord2);\n"
938 " ccd = read_imagef(texU, sampler,coord3);\n"
939 " ccu = read_imagef(texU, sampler,coord4);\n"
940 " cld = read_imagef(texU, sampler,coord5);\n"
941 " clu = read_imagef(texU, sampler,coord6);\n"
942 " crd = read_imagef(texU, sampler,coord7);\n"
943 " cru = read_imagef(texU, sampler,coord8);\n"
944 " float4 cu = ccc;\n"
945 " v4[0] = (float4)(clc.y, ccc.y, ccd.z, ccc.z);\n"
946 " v4[1] = (float4)(ccc.x, crc.x, ccd.w, ccc.w);\n"
947 " v4[2] = (float4)(clc.w, ccc.w, ccc.x, ccu.x);\n"
948 " v4[3] = (float4)(ccc.z, crc.z, ccc.y, ccu.y);\n"
949 " v6[0] = (float4)(cld.w, clc.w, ccd.w, ccc.w);\n"
950 " v6[1] = (float4)(ccd.z, ccc.z, crd.z, crc.z);\n"
951 " v6[2] = (float4)(clc.y, clu.y, ccc.y, ccu.y);\n"
952 " v6[3] = (float4)(ccc.x, ccu.x, crc.x, cru.x);\n";
954 for(i = 0; i < 4; ++i)
956 " if(key4.s"<<i<<
" == 1.0)\n"
958 " if(cc.s"<<i<<
" < cu.s"<<i<<
" || \n"
959 " any(isless((float4)(cc.s"<<i<<
"), max(v4["<<i<<
"], v6["<<i<<
"]))))keysum = 0; \n"
960 " }else if(key4.s"<<i<<
" == -1.0)\n"
962 " if(cc.s"<<i<<
" > cu.s"<<i<<
" || \n"
963 " any(isgreater((float4)(cc.s"<<i<<
"), min(v4["<<i<<
"], v6["<<i<<
"]))) )keysum = 0; \n"
967 " if(keysum == 1.0) { \n";
969 " ccc = read_imagef(texD, sampler,coord0);\n"
970 " clc = read_imagef(texD, sampler,coord1);\n"
971 " crc = read_imagef(texD, sampler,coord2);\n"
972 " ccd = read_imagef(texD, sampler,coord3);\n"
973 " ccu = read_imagef(texD, sampler,coord4);\n"
974 " cld = read_imagef(texD, sampler,coord5);\n"
975 " clu = read_imagef(texD, sampler,coord6);\n"
976 " crd = read_imagef(texD, sampler,coord7);\n"
977 " cru = read_imagef(texD, sampler,coord8);\n"
978 " float4 cd = ccc;\n"
979 " v5[0] = (float4)(clc.y, ccc.y, ccd.z, ccc.z);\n"
980 " v5[1] = (float4)(ccc.x, crc.x, ccd.w, ccc.w);\n"
981 " v5[2] = (float4)(clc.w, ccc.w, ccc.x, ccu.x);\n"
982 " v5[3] = (float4)(ccc.z, crc.z, ccc.y, ccu.y);\n"
983 " v6[0] = (float4)(cld.w, clc.w, ccd.w, ccc.w);\n"
984 " v6[1] = (float4)(ccd.z, ccc.z, crd.z, crc.z);\n"
985 " v6[2] = (float4)(clc.y, clu.y, ccc.y, ccu.y);\n"
986 " v6[3] = (float4)(ccc.x, ccu.x, crc.x, cru.x);\n";
987 for(i = 0; i < 4; ++i)
989 " if(key4.s"<<i<<
" == 1.0)\n"
991 " if(cc.s"<<i<<
" < cd.s"<<i<<
" ||\n"
992 " any(isless((float4)(cc.s"<<i<<
"), max(v5["<<i<<
"], v6["<<i<<
"]))))keysum = 0; \n"
993 " }else if(key4.s"<<i<<
" == -1.0)\n"
995 " if(cc.s"<<i<<
" > cd.s"<<i<<
" ||\n"
996 " any(isgreater((float4)(cc.s"<<i<<
"), min(v5["<<i<<
"], v6["<<i<<
"]))))keysum = 0; \n"
1000 " if(keysum==1.0) {\n";
1005 " float4 offset = (float4)(0); \n";
1006 for(i = 1; i < 4; ++i)
1008 " if(key4.s"<<i<<
" != 0) \n"
1010 " cu.s0 = cu.s"<<i<<
"; cd.s0 = cd.s"<<i<<
"; cc.s0 = cc.s"<<i<<
"; \n"
1011 " v4[0] = v4["<<i<<
"]; v5[0] = v5["<<i<<
"]; \n"
1012 " fxy[0] = fxy["<<i<<
"]; fxx[0] = fxx["<<i<<
"]; fyy[0] = fyy["<<i<<
"]; \n"
1013 " fx[0] = fx["<<i<<
"]; fy[0] = fy["<<i<<
"]; \n"
1017 " float fs = 0.5*( cu.s0 - cd.s0 ); \n"
1018 " float fss = cu.s0 + cd.s0 - cc.s0 - cc.s0;\n"
1019 " float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
1020 " float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
1021 " float4 A0, A1, A2 ; \n"
1022 " A0 = (float4)(fxx[0], fxy[0], fxs, -fx[0]); \n"
1023 " A1 = (float4)(fxy[0], fyy[0], fys, -fy[0]); \n"
1024 " A2 = (float4)(fxs, fys, fss, -fs); \n"
1025 " float4 x3 = fabs((float4)(fxx[0], fxy[0], fxs, 0)); \n"
1026 " float maxa = max(max(x3.x, x3.y), x3.z); \n"
1027 " if(maxa >= 1e-10 ) \n"
1029 " if(x3.y ==maxa ) \n"
1031 " float4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
1032 " }else if( x3.z == maxa ) \n"
1034 " float4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
1037 " A1 -= A1.x * A0; \n"
1038 " A2 -= A2.x * A0; \n"
1039 " float2 x2 = fabs((float2)(A1.y, A2.y)); \n"
1040 " if( x2.y > x2.x ) \n"
1042 " float4 TEMP = A2.yzwx; \n"
1043 " A2.yzw = A1.yzw; \n"
1044 " A1.yzw = TEMP.xyz; \n"
1047 " if(x2.x >= 1e-10) { \n"
1048 " A1.yzw /= A1.y; \n"
1049 " A2.yzw -= A2.y * A1.yzw; \n"
1050 " if(fabs(A2.z) >= 1e-10) {\n"
1051 " offset.z = A2.w /A2.z; \n"
1052 " offset.y = A1.w - offset.z*A1.z; \n"
1053 " offset.x = A0.w - offset.z*A0.z - offset.y*A0.y; \n"
1054 " if(fabs(cc.s0 + 0.5*dot((float4)(fx[0], fy[0], fs, 0), offset ))<=THRESHOLD1\n"
1055 " || any( isgreater(fabs(offset), (float4)(1.0)))) key4 = (float4)(0.0);\n"
1060 " float keyv = dot(key4, (float4)(1.0, 2.0, 3.0, 4.0));\n"
1061 " result = (float4)(keyv, offset.xyz);\n"
1063 " write_imagef(texK, coord0, result);\n "
1069 " float keyv = dot(key4, (float4)(1.0, 2.0, 3.0, 4.0));\n"
1070 " result = (float4)(keyv, 0, 0, 0);\n"
1072 " write_imagef(texK, coord0, result);\n "
1076 s_keypoint =
new ProgramCL(
"keypoint", buffer, _context, _device);
1079 void ProgramBagCL::LoadDisplayShaders()
1084 s_unpack =
new ProgramCL(
"main",
1085 "__kernel void main(__read_only image2d_t input, __write_only image2d_t output,\n"
1086 " int width, int height) {\n"
1087 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1088 "int x = get_global_id(0), y = get_global_id(1); \n"
1089 "if(x >= width || y >= height) return;\n"
1090 "int xx = x / 2, yy = y / 2; \n"
1091 "float4 vv = read_imagef(input, sampler, (int2) (xx, yy)); \n"
1092 "float v1 = (x & 1 ? vv.w : vv.z); \n"
1093 "float v2 = (x & 1 ? vv.y : vv.x);\n"
1094 "float v = y & 1 ? v1 : v2;\n"
1095 "float4 result = (float4) (v, v, v, 1);"
1096 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
1098 s_unpack_dog =
new ProgramCL(
"main",
1099 "__kernel void main(__read_only image2d_t input, __write_only image2d_t output,\n"
1100 " int width, int height) {\n"
1101 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1102 "int x = get_global_id(0), y = get_global_id(1); \n"
1103 "if(x >= width || y >= height) return;\n"
1104 "int xx = x / 2, yy = y / 2; \n"
1105 "float4 vv = read_imagef(input, sampler, (int2) (xx, yy)); \n"
1106 "float v1 = (x & 1 ? vv.w : vv.z); \n"
1107 "float v2 = (x & 1 ? vv.y : vv.x);\n"
1108 "float v0 = y & 1 ? v1 : v2;\n"
1109 "float v = 0.5 + 20.0 * v0;\n "
1110 "float4 result = (float4) (v, v, v, 1);"
1111 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
1113 s_unpack_grd =
new ProgramCL(
"main",
1114 "__kernel void main(__read_only image2d_t input, __write_only image2d_t output,\n"
1115 " int width, int height) {\n"
1116 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1117 "int x = get_global_id(0), y = get_global_id(1); \n"
1118 "if(x >= width || y >= height) return;\n"
1119 "int xx = x / 2, yy = y / 2; \n"
1120 "float4 vv = read_imagef(input, sampler, (int2) (xx, yy)); \n"
1121 "float v1 = (x & 1 ? vv.w : vv.z); \n"
1122 "float v2 = (x & 1 ? vv.y : vv.x);\n"
1123 "float v0 = y & 1 ? v1 : v2;\n"
1124 "float v = 5.0 * v0;\n "
1125 "float4 result = (float4) (v, v, v, 1);"
1126 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
1128 s_unpack_key =
new ProgramCL(
"main",
1129 "__kernel void main(__read_only image2d_t dog,\n"
1130 " __read_only image2d_t key,\n"
1131 " __write_only image2d_t output,\n"
1132 " int width, int height) {\n"
1133 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1134 "int x = get_global_id(0), y = get_global_id(1); \n"
1135 "if(x >= width || y >= height) return;\n"
1136 "int xx = x / 2, yy = y / 2; \n"
1137 "float4 kk = read_imagef(key, sampler, (int2) (xx, yy));\n"
1138 "int4 cc = isequal(fabs(kk.xxxx), (float4)(1.0, 2.0, 3.0, 4.0));\n"
1139 "int k1 = (x & 1 ? cc.w : cc.z); \n"
1140 "int k2 = (x & 1 ? cc.y : cc.x);\n"
1141 "int k0 = y & 1 ? k1 : k2;\n"
1144 " //result = kk.x > 0 ? ((float4)(1.0, 0, 0, 1.0)) : ((float4) (0.0, 1.0, 0.0, 1.0)); \n"
1145 " result = kk.x < 0 ? ((float4)(0, 1.0, 0, 1.0)) : ((float4) (1.0, 0.0, 0.0, 1.0)); \n"
1147 "float4 vv = read_imagef(dog, sampler, (int2) (xx, yy));\n"
1148 "float v1 = (x & 1 ? vv.w : vv.z); \n"
1149 "float v2 = (x & 1 ? vv.y : vv.x);\n"
1150 "float v0 = y & 1 ? v1 : v2;\n"
1151 "float v = 0.5 + 20.0 * v0;\n "
1152 "result = (float4) (v, v, v, 1);"
1154 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
1158 void ProgramBagCL::SetMarginCopyParam(
int xmax,
int ymax)
1163 void ProgramBagCL::SetGradPassParam(
int texP)
1168 void ProgramBagCL::SetGenListEndParam(
int ktex)
1173 void ProgramBagCL::SetDogTexParam(
int texU,
int texD)
1178 void ProgramBagCL::SetGenListInitParam(
int w,
int h)
1180 float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f, (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
1185 void ProgramBagCL::SetGenListStartParam(
float width,
int tex0)
1192 void ProgramBagCL::SetGenListStepParam(
int tex,
int tex0)
1197 void ProgramBagCL::SetGenVBOParam(
float width,
float fwidth,
float size)
1202 void ProgramBagCL::SetSimpleOrientationInput(
int oTex,
float sigma,
float sigma_step)
1208 void ProgramBagCL::SetFeatureOrientationParam(
int gtex,
int width,
int height,
float sigma,
int otex,
float step)
1214 void ProgramBagCL::SetFeatureDescirptorParam(
int gtex,
int otex,
float dwidth,
float fwidth,
float width,
float height,
float sigma)
1221 const char* ProgramBagCL::GetErrorString(cl_int
error)
1223 static const char* errorString[] = {
1225 "CL_DEVICE_NOT_FOUND",
1226 "CL_DEVICE_NOT_AVAILABLE",
1227 "CL_COMPILER_NOT_AVAILABLE",
1228 "CL_MEM_OBJECT_ALLOCATION_FAILURE",
1229 "CL_OUT_OF_RESOURCES",
1230 "CL_OUT_OF_HOST_MEMORY",
1231 "CL_PROFILING_INFO_NOT_AVAILABLE",
1232 "CL_MEM_COPY_OVERLAP",
1233 "CL_IMAGE_FORMAT_MISMATCH",
1234 "CL_IMAGE_FORMAT_NOT_SUPPORTED",
1235 "CL_BUILD_PROGRAM_FAILURE",
1255 "CL_INVALID_DEVICE_TYPE",
1256 "CL_INVALID_PLATFORM",
1257 "CL_INVALID_DEVICE",
1258 "CL_INVALID_CONTEXT",
1259 "CL_INVALID_QUEUE_PROPERTIES",
1260 "CL_INVALID_COMMAND_QUEUE",
1261 "CL_INVALID_HOST_PTR",
1262 "CL_INVALID_MEM_OBJECT",
1263 "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
1264 "CL_INVALID_IMAGE_SIZE",
1265 "CL_INVALID_SAMPLER",
1266 "CL_INVALID_BINARY",
1267 "CL_INVALID_BUILD_OPTIONS",
1268 "CL_INVALID_PROGRAM",
1269 "CL_INVALID_PROGRAM_EXECUTABLE",
1270 "CL_INVALID_KERNEL_NAME",
1271 "CL_INVALID_KERNEL_DEFINITION",
1272 "CL_INVALID_KERNEL",
1273 "CL_INVALID_ARG_INDEX",
1274 "CL_INVALID_ARG_VALUE",
1275 "CL_INVALID_ARG_SIZE",
1276 "CL_INVALID_KERNEL_ARGS",
1277 "CL_INVALID_WORK_DIMENSION",
1278 "CL_INVALID_WORK_GROUP_SIZE",
1279 "CL_INVALID_WORK_ITEM_SIZE",
1280 "CL_INVALID_GLOBAL_OFFSET",
1281 "CL_INVALID_EVENT_WAIT_LIST",
1283 "CL_INVALID_OPERATION",
1284 "CL_INVALID_GL_OBJECT",
1285 "CL_INVALID_BUFFER_SIZE",
1286 "CL_INVALID_MIP_LEVEL",
1287 "CL_INVALID_GLOBAL_WORK_SIZE",
1290 const int errorCount =
sizeof(errorString) /
sizeof(errorString[0]);
1292 const int index = -
error;
1294 return (index >= 0 && index < errorCount) ? errorString[index] :
"";
1297 bool ProgramBagCL::CheckErrorCL(cl_int
error,
const char* location)
1299 if(
error == CL_SUCCESS)
return true;
1300 const char *errstr = GetErrorString(
error);
1301 if(errstr && errstr[0]) std::cerr << errstr;
1302 else std::cerr <<
"Error " <<
error;
1303 if(location) std::cerr <<
" at " << location;
1314 void ProgramBagCLN::LoadFixedShaders()
1316 s_sampling =
new ProgramCL(
"sampling",
1317 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1318 "__kernel void sampling(__read_only image2d_t input, __write_only image2d_t output, "
1319 " int width, int height) {\n"
1320 "int2 coord = (int2)(get_global_id(0), get_global_id(1)); \n"
1321 "if( coord.x >= width || coord.y >= height) return;\n"
1322 "write_imagef(output, coord, read_imagef(input, sampler, coord << 1)); }" , _context, _device);
1324 s_sampling_k =
new ProgramCL(
"sampling_k",
1325 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1326 "__kernel void sampling_k(__read_only image2d_t input, __write_only image2d_t output, "
1327 " int width, int height, int step) {\n"
1328 "int x = get_global_id(0), y = get_global_id(1); \n"
1329 "if( x >= width || y >= height) return;\n"
1330 "int xa = x * step, ya = y *step; \n"
1331 "float4 v1 = read_imagef(input, sampler, (int2) (xa, ya)); \n"
1332 "write_imagef(output, (int2) (x, y), v1); }" , _context, _device);
1335 s_sampling_u =
new ProgramCL(
"sampling_u",
1336 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;\n"
1337 "__kernel void sampling_u(__read_only image2d_t input, \n"
1338 " __write_only image2d_t output,\n"
1339 " int width, int height, float step) {\n"
1340 "int x = get_global_id(0), y = get_global_id(1); \n"
1341 "if( x >= width || y >= height) return;\n"
1342 "float xa = x * step, ya = y *step; \n"
1343 "float v1 = read_imagef(input, sampler, (float2) (xa, ya)).x; \n"
1344 "write_imagef(output, (int2) (x, y), (float4)(v1)); }" , _context, _device);
1346 s_dog_pass =
new ProgramCL(
"dog_pass",
1347 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1348 "__kernel void dog_pass(__read_only image2d_t tex, __read_only image2d_t texp,\n"
1349 " __write_only image2d_t dog, int width, int height) {\n"
1350 "int2 coord = (int2)(get_global_id(0), get_global_id(1)); \n"
1351 "if( coord.x >= width || coord.y >= height) return;\n"
1352 "float cc = read_imagef(tex , sampler, coord).x; \n"
1353 "float cp = read_imagef(texp, sampler, coord).x;\n"
1354 "write_imagef(dog, coord, (float4)(cc - cp)); }\n", _context, _device);
1356 s_grad_pass =
new ProgramCL(
"grad_pass",
1357 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1358 "__kernel void grad_pass(__read_only image2d_t tex, __read_only image2d_t texp,\n"
1359 " __write_only image2d_t dog, int width, int height, \n"
1360 " __write_only image2d_t grad, __write_only image2d_t rot) {\n"
1361 "int x = get_global_id(0), y = get_global_id(1); \n"
1362 "if( x >= width || y >= height) return;\n"
1363 "int2 coord = (int2) (x, y);\n"
1364 "float cl = read_imagef(tex, sampler, (int2)(x - 1, y)).x;\n"
1365 "float cc = read_imagef(tex , sampler, coord).x; \n"
1366 "float cr = read_imagef(tex, sampler, (int2)(x + 1, y)).x;\n"
1367 "float cp = read_imagef(texp, sampler, coord).x;\n"
1368 "write_imagef(dog, coord, (float4)(cc - cp)); \n"
1369 "float cd = read_imagef(tex, sampler, (int2)(x, y - 1)).x;\n"
1370 "float cu = read_imagef(tex, sampler, (int2)(x, y + 1)).x;\n"
1371 "float dx = cr - cl, dy = cu - cd; \n"
1372 "float gg = 0.5 * sqrt(dx*dx + dy * dy);\n"
1373 "write_imagef(grad, coord, (float4)(gg));\n"
1374 "float oo = atan2(dy, dx + FLT_MIN);\n"
1375 "write_imagef(rot, coord, (float4)(oo));}\n", _context, _device);
1377 s_grad_pass2 =
new ProgramCL(
"grad_pass2",
1378 "#define BLOCK_DIMX 32\n"
1379 "#define BLOCK_DIMY 14\n"
1380 "#define BLOCK_SIZE (BLOCK_DIMX * BLOCK_DIMY)\n"
1381 "__kernel void grad_pass2(__read_only image2d_t tex, __read_only image2d_t texp,\n"
1382 " __write_only image2d_t dog, int width, int height,\n"
1383 " __write_only image2d_t grd, __write_only image2d_t rot){\n"
1384 "__local float block[BLOCK_SIZE]; \n"
1385 "sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
1386 " CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1387 "int2 coord = (int2) ( get_global_id(0) - get_group_id(0) * 2 - 1, \n"
1388 " get_global_id(1) - get_group_id(1) * 2 - 1); \n"
1389 "int idx = mad24(get_local_id(1), BLOCK_DIMX, get_local_id(0));\n"
1390 "float cc = read_imagef(tex, sampler, coord).x;\n"
1391 "block[idx] = cc;\n"
1392 "barrier(CLK_LOCAL_MEM_FENCE);\n"
1393 "if( get_local_id(0) == 0 || get_local_id(0) == BLOCK_DIMX - 1) return;\n"
1394 "if( get_local_id(1) == 0 || get_local_id(1) == BLOCK_DIMY - 1) return;\n"
1395 "if( coord.x >= width) return; \n"
1396 "if( coord.y >= height) return;\n"
1397 "float cp = read_imagef(texp, sampler, coord).x;\n"
1398 "float dx = block[idx + 1] - block[idx - 1];\n"
1399 "float dy = block[idx + BLOCK_DIMX ] - block[idx - BLOCK_DIMX];\n"
1400 "write_imagef(dog, coord, (float4)(cc - cp)); \n"
1401 "write_imagef(grd, coord, (float4)(0.5 * sqrt(dx*dx + dy * dy)));\n"
1402 "write_imagef(rot, coord, (float4)(atan2(dy, dx + FLT_MIN)));}\n", _context, _device);
1405 void ProgramBagCLN::LoadDisplayShaders()
1407 s_unpack =
new ProgramCL(
"main",
1408 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1409 "__kernel void main(__read_only image2d_t input, __write_only image2d_t output,\n"
1410 " int width, int height) {\n"
1411 "int x = get_global_id(0), y = get_global_id(1); \n"
1412 "if(x >= width || y >= height) return;\n"
1413 "float v = read_imagef(input, sampler, (int2) (x, y)).x; \n"
1414 "float4 result = (float4) (v, v, v, 1);"
1415 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
1417 s_unpack_grd =
new ProgramCL(
"main",
1418 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
1419 " CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1420 "__kernel void main(__read_only image2d_t input, __write_only image2d_t output,\n"
1421 " int width, int height) {\n"
1422 "int x = get_global_id(0), y = get_global_id(1); \n"
1423 "if(x >= width || y >= height) return;\n"
1424 "float v0 = read_imagef(input, sampler, (int2) (x, y)).x; \n"
1425 "float v = 5.0 * v0; float4 result = (float4) (v, v, v, 1);"
1426 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
1428 s_unpack_dog =
new ProgramCL(
"main",
1429 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1430 "__kernel void main(__read_only image2d_t input, __write_only image2d_t output,\n"
1431 " int width, int height) {\n"
1432 "int x = get_global_id(0), y = get_global_id(1); \n"
1433 "if(x >= width || y >= height) return;\n"
1434 "float v0 = read_imagef(input, sampler, (int2) (x, y)).x; \n"
1435 "float v = 0.5 + 20.0 * v0; float4 result = (float4) (v, v, v, 1);"
1436 "write_imagef(output, (int2) (x, y), result); }" , _context, _device);
1439 ProgramCL* ProgramBagCLN::CreateFilterH(
float kernel[],
int width)
1443 ostrstream out(buffer, 10240);
1444 out <<
"#define KERNEL_WIDTH " <<
width <<
"\n"
1445 <<
"#define KERNEL_HALF_WIDTH " << (
width / 2) <<
"\n"
1446 "#define BLOCK_WIDTH 128\n"
1447 "#define BLOCK_HEIGHT 1\n"
1448 "#define CACHE_WIDTH (BLOCK_WIDTH + KERNEL_WIDTH - 1)\n"
1449 "#define CACHE_WIDTH_ALIGNED ((CACHE_WIDTH + 15) / 16 * 16)\n"
1450 "#define CACHE_COUNT (2 + (CACHE_WIDTH - 2) / BLOCK_WIDTH)\n"
1451 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1452 "__kernel void filter_h(__read_only image2d_t input, \n"
1453 " __write_only image2d_t output, int width_, int height_, \n"
1454 " __constant float* weight) {\n"
1455 "__local float data[CACHE_WIDTH]; \n"
1456 "int x = get_global_id(0), y = get_global_id(1);\n"
1458 "for(int j = 0; j < CACHE_COUNT; ++j)\n"
1460 " if(get_local_id(0) + j * BLOCK_WIDTH < CACHE_WIDTH)\n"
1462 " int fetch_index = min(x + j * BLOCK_WIDTH - KERNEL_HALF_WIDTH, width_);\n"
1463 " data[get_local_id(0) + j * BLOCK_WIDTH] = read_imagef(input, sampler, (int2)(fetch_index, y)).x;\n"
1466 "barrier(CLK_LOCAL_MEM_FENCE); \n"
1467 "if( x > width_ || y > height_) return; \n"
1468 "float result = 0; \n"
1470 "for(int i = 0; i < KERNEL_WIDTH; ++i)\n"
1472 " result += data[get_local_id(0) + i] * weight[i];\n"
1474 <<
"write_imagef(output, (int2)(x, y), (float4)(result)); }\n" <<
'\0';
1475 return new ProgramCL(
"filter_h", buffer, _context, _device);
1480 ProgramCL* ProgramBagCLN::CreateFilterV(
float kernel[],
int width)
1484 ostrstream out(buffer, 10240);
1485 out <<
"#define KERNEL_WIDTH " <<
width <<
"\n"
1486 <<
"#define KERNEL_HALF_WIDTH " << (
width / 2) <<
"\n"
1487 "#define BLOCK_WIDTH 128\n"
1488 "#define CACHE_WIDTH (BLOCK_WIDTH + KERNEL_WIDTH - 1)\n"
1489 "#define CACHE_WIDTH_ALIGNED ((CACHE_WIDTH + 15) / 16 * 16)\n"
1490 "#define CACHE_COUNT (2 + (CACHE_WIDTH - 2) / BLOCK_WIDTH)\n"
1491 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | \n"
1492 " CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
1493 "__kernel void filter_v(__read_only image2d_t input, \n"
1494 " __write_only image2d_t output, int width_, int height_, \n"
1495 " __constant float* weight) {\n"
1496 "__local float data[CACHE_WIDTH]; \n"
1497 "int x = get_global_id(0), y = get_global_id(1);\n"
1499 "for(int j = 0; j < CACHE_COUNT; ++j)\n"
1501 " if(get_local_id(1) + j * BLOCK_WIDTH < CACHE_WIDTH)\n"
1503 " int fetch_index = min(y + j * BLOCK_WIDTH - KERNEL_HALF_WIDTH, height_);\n"
1504 " data[get_local_id(1) + j * BLOCK_WIDTH ] = read_imagef(input, sampler, (int2)(x, fetch_index)).x;\n"
1507 "barrier(CLK_LOCAL_MEM_FENCE); \n"
1508 "if( x > width_ || y > height_) return; \n"
1509 "float result = 0; \n"
1511 "for(int i = 0; i < KERNEL_WIDTH; ++i)\n"
1513 " result += data[get_local_id(1) + i] * weight[i];\n"
1515 <<
"write_imagef(output, (int2)(x, y), (float4)(result)); }\n" <<
'\0';
1517 return new ProgramCL(
"filter_v", buffer, _context, _device);
1520 FilterCL* ProgramBagCLN::CreateFilter(
float kernel[],
int width)
1522 FilterCL * filter =
new FilterCL;
1523 filter->s_shader_h = CreateFilterH(kernel,
width);
1524 filter->s_shader_v = CreateFilterV(kernel,
width);
1525 filter->_weight =
new CLTexImage(_context, _queue);
1526 filter->_weight->InitBufferTex(
width, 1, 1);
1527 filter->_weight->CopyFromHost(kernel);
1528 filter->_size =
width;
1533 void ProgramBagCLN::FilterImage(FilterCL* filter, CLTexImage *dst, CLTexImage *src, CLTexImage*tmp)
1535 cl_kernel kernelh = filter->s_shader_h->_kernel;
1536 cl_kernel kernelv = filter->s_shader_v->_kernel;
1539 cl_int status, w = dst->GetImgWidth(), h = dst->GetImgHeight();
1540 cl_mem weight = (cl_mem) filter->_weight->_clData;
1541 cl_int w_ = w - 1, h_ = h - 1;
1544 clSetKernelArg(kernelh, 0,
sizeof(cl_mem), &src->_clData);
1545 clSetKernelArg(kernelh, 1,
sizeof(cl_mem), &tmp->_clData);
1546 clSetKernelArg(kernelh, 2,
sizeof(cl_int), &w_);
1547 clSetKernelArg(kernelh, 3,
sizeof(cl_int), &h_);
1548 clSetKernelArg(kernelh, 4,
sizeof(cl_mem), &weight);
1550 size_t dim00 = 128, dim01 = 1;
1551 size_t gsz1[2] = {(w + dim00 - 1) / dim00 * dim00, (h + dim01 - 1) / dim01 * dim01}, lsz1[2] = {dim00, dim01};
1552 status = clEnqueueNDRangeKernel(_queue, kernelh, 2,
NULL, gsz1, lsz1, 0,
NULL,
NULL);
1553 CheckErrorCL(status,
"ProgramBagCLN::FilterImageH");
1554 if(status != CL_SUCCESS)
return;
1557 clSetKernelArg(kernelv, 0,
sizeof(cl_mem), &tmp->_clData);
1558 clSetKernelArg(kernelv, 1,
sizeof(cl_mem), &dst->_clData);
1559 clSetKernelArg(kernelv, 2,
sizeof(cl_int), &w_);
1560 clSetKernelArg(kernelv, 3,
sizeof(cl_int), &h_);
1561 clSetKernelArg(kernelv, 4,
sizeof(cl_mem), &weight);
1563 size_t dim10 = 1, dim11 = 128;
1564 size_t gsz2[2] = {(w + dim10 - 1) / dim10 * dim10, (h + dim11 - 1) / dim11 * dim11}, lsz2[2] = {dim10, dim11};
1565 status = clEnqueueNDRangeKernel(_queue, kernelv, 2,
NULL, gsz2, lsz2, 0,
NULL,
NULL);
1566 CheckErrorCL(status,
"ProgramBagCLN::FilterImageV");
1570 void ProgramBagCLN::SampleImageD(CLTexImage *dst, CLTexImage *src,
int log_scale)
1573 cl_int w = dst->GetImgWidth(), h = dst->GetImgHeight();
1575 cl_int fullstep = (1 << log_scale);
1576 kernel = log_scale == 1? s_sampling->_kernel : s_sampling_k->_kernel;
1577 clSetKernelArg(kernel, 0,
sizeof(cl_mem), &(src->_clData));
1578 clSetKernelArg(kernel, 1,
sizeof(cl_mem), &(dst->_clData));
1579 clSetKernelArg(kernel, 2,
sizeof(cl_int), &(w));
1580 clSetKernelArg(kernel, 3,
sizeof(cl_int), &(h));
1581 if(log_scale > 1) clSetKernelArg(kernel, 4,
sizeof(cl_int), &(fullstep));
1583 size_t dim0 = 128, dim1 = 1;
1585 size_t gsz[2] = {(w + dim0 - 1) / dim0 * dim0, (h + dim1 - 1) / dim1 * dim1}, lsz[2] = {dim0, dim1};
1586 cl_int status = clEnqueueNDRangeKernel(_queue, kernel, 2,
NULL, gsz, lsz, 0,
NULL,
NULL);
1587 CheckErrorCL(status,
"ProgramBagCLN::SampleImageD");
static float _FilterWidthFactor
static int _SubpixelLocalization
static int _octave_min_default
static int _PreciseBorder
static int _MaxFilterWidth
static int _DescriptorPPT
static void StartTimer(const char *event)
float GetInitialSmoothSigma(int octave_min)
static void error(char *msg)
QTextStream & endl(QTextStream &stream)
MiniVec< float, N > ceil(const MiniVec< float, N > &a)