22 #if defined(SIFTGPU_CUDA_ENABLED)
35 #include <cuda_runtime_api.h>
36 #include <cuda_gl_interop.h>
40 #include "CuTexImage.h"
41 #include "ProgramCU.h"
45 cudaDestroyTextureObject(handle);
49 const cudaChannelFormatDesc& channelFmtDesc)
53 cudaResourceDesc resourceDesc;
54 memset(&resourceDesc, 0,
sizeof(resourceDesc));
55 resourceDesc.resType = cudaResourceTypeLinear;
56 resourceDesc.res.linear.devPtr = _cuData;
57 resourceDesc.res.linear.desc = channelFmtDesc;
58 resourceDesc.res.linear.sizeInBytes = _numBytes;
60 cudaCreateTextureObject(&texObj.handle, &resourceDesc, &textureDesc,
nullptr);
67 const cudaChannelFormatDesc& channelFmtDesc)
71 cudaResourceDesc resourceDesc;
72 memset(&resourceDesc, 0,
sizeof(resourceDesc));
73 resourceDesc.resType = cudaResourceTypePitch2D;
74 resourceDesc.res.pitch2D.devPtr = _cuData;
75 resourceDesc.res.pitch2D.width = _imgWidth;
76 resourceDesc.res.pitch2D.height = _imgHeight;
77 resourceDesc.res.pitch2D.pitchInBytes = _imgWidth * _numChannel *
sizeof(float);
78 resourceDesc.res.pitch2D.desc = channelFmtDesc;
80 cudaCreateTextureObject(&texObj.handle, &resourceDesc, &textureDesc,
nullptr);
91 _numChannel = _numBytes = 0;
92 _imgWidth = _imgHeight = _texWidth = _texHeight = 0;
100 GLint bsize, esize =
width *
height * nchannel *
sizeof(float);
101 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
102 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
105 glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize,
NULL, GL_STATIC_DRAW_ARB);
106 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
108 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
112 cudaGLRegisterBufferObject(pbo);
113 cudaGLMapBufferObject(&_cuData, pbo);
126 _numChannel = nchannel;
135 _texWidth = _texHeight =0;
146 cudaGLUnmapBufferObject(_fromPBO);
147 cudaGLUnregisterBufferObject(_fromPBO);
152 if(_cuData2D) cudaFreeArray(_cuData2D);
165 _numChannel =
min(
max(nchannel, 1), 4);
175 if (
size >= INT_MAX *
sizeof(
float)) {
179 if(
size <= _numBytes)
return true;
181 if(_cuData) cudaFree(_cuData);
184 const cudaError_t status = cudaMalloc(&_cuData, _numBytes =
size);
186 if (status != cudaSuccess) {
197 if(_cuData ==
NULL)
return;
198 cudaMemcpy( _cuData, buf, _imgWidth * _imgHeight * _numChannel *
sizeof(
float), cudaMemcpyHostToDevice);
203 if(_cuData ==
NULL)
return;
204 cudaMemcpy(buf, _cuData, _imgWidth * _imgHeight * _numChannel *
sizeof(
float), cudaMemcpyDeviceToHost);
209 if(_cuData ==
NULL)
return;
210 cudaMemcpyAsync(buf, _cuData, _imgWidth * _imgHeight * _numChannel *
sizeof(
float), cudaMemcpyDeviceToHost, (cudaStream_t)stream);
215 #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
216 if(_cuData2D && (_texWidth < _imgWidth || _texHeight < _imgHeight))
218 cudaFreeArray(_cuData2D);
221 if(_cuData2D ==
NULL)
223 _texWidth =
max(_texWidth, _imgWidth);
224 _texHeight =
max(_texHeight, _imgHeight);
225 cudaChannelFormatDesc desc;
226 desc.f = cudaChannelFormatKindFloat;
227 desc.x =
sizeof(float) * 8;
228 desc.y = _numChannel >=2 ?
sizeof(float) * 8 : 0;
229 desc.z = _numChannel >=3 ?
sizeof(float) * 8 : 0;
230 desc.w = _numChannel >=4 ?
sizeof(float) * 8 : 0;
231 const cudaError_t status = cudaMallocArray(&_cuData2D, &desc, _texWidth, _texHeight);
232 if (status != cudaSuccess) {
243 #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
247 cudaMemcpy2DToArray(_cuData2D, 0, 0, _cuData, _imgWidth* _numChannel*
sizeof(
float) ,
248 _imgWidth * _numChannel*
sizeof(
float), _imgHeight, cudaMemcpyDeviceToDevice);
258 cudaGLRegisterBufferObject(pbo);
259 cudaGLMapBufferObject(&pbuf, pbo);
261 cudaMemcpy(_cuData, pbuf, esize, cudaMemcpyDeviceToDevice);
263 cudaGLUnmapBufferObject(pbo);
264 cudaGLUnregisterBufferObject(pbo);
270 GLint bsize, esize = _imgWidth * _imgHeight *
sizeof(float) * _numChannel;
271 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
272 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
275 glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2,
NULL, GL_STATIC_DRAW_ARB);
276 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
278 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
282 cudaGLRegisterBufferObject(pbo);
283 cudaGLMapBufferObject(&pbuf, pbo);
284 cudaMemcpy(pbuf, _cuData, esize, cudaMemcpyDeviceToDevice);
285 cudaGLUnmapBufferObject(pbo);
286 cudaGLUnregisterBufferObject(pbo);
void CopyFromPBO(int width, int height, GLuint pbo)
void CopyFromHost(const void *buf)
void CopyToHost(void *buf)
virtual void SetImageSize(int width, int height)
virtual bool InitTexture(int width, int height, int nchannel=1)
CuTexObj BindTexture2D(const cudaTextureDesc &textureDesc, const cudaChannelFormatDesc &channelFmtDesc)
int CopyToPBO(GLuint pbo)
CuTexObj BindTexture(const cudaTextureDesc &textureDesc, const cudaChannelFormatDesc &channelFmtDesc)
int CheckErrorCUDA(const char *location)