Note that there are some explanatory texts on larger screens.

plurals
  1. POC/CUDA - Modifying CUDA/GL interop example to store image in a memory buffer
    primarykey
    data
    text
    <p>I am trying to store the image, which is generated by the CUDA-OpenGL interop example in the 'CUDA-By Example' textbook, into a memory buffer that can store to images. </p> <p>I want to store two images, one which is a green "X" and another which is an orangish "X", in a memory buffer. When I render the pBuffer with OpenGL, I should get a green "X" image like the example output, however, I just get a black screen. I am not sure why I am not getting the right output. Could someone please tell me what's wrong? </p> <p>I obtained the code for the memory buffer from <a href="https://stackoverflow.com/questions/10399095/a-memory-buffer-for-multiple-images">A Memory buffer for multiple images</a></p> <pre><code> #include "book.h" #include "cpu_bitmap.h" #include "cuda.h" #include &lt;cuda_gl_interop.h&gt; PFNGLBINDBUFFERARBPROC glBindBuffer = NULL; PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL; PFNGLGENBUFFERSARBPROC glGenBuffers = NULL; PFNGLBUFFERDATAARBPROC glBufferData = NULL; #define DIM 512 #define IMAGESIZE_MAX (DIM*DIM) // MY CHANGE GLuint bufferObj; cudaGraphicsResource *resource; // based on ripple code, but uses uchar4 which is the type of data // graphic inter op uses. see screenshot - basic2.png __global__ void kernel( uchar4 *ptr1) { // map from threadIdx/BlockIdx to pixel position int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int offset = x + y * blockDim.x * gridDim.x ; // now calculate the value at that position float fx = x/(float)DIM - 0.5f; float fy = y/(float)DIM - 0.5f; unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) ); // accessing uchar4 vs unsigned char* ptr1[offset].x = 0; ptr1[offset].y = green; ptr1[offset].z = 0; ptr1[offset].w = 255; } // MY CODE __global__ void kernel2( uchar4 *ptr2) { // map from threadIdx/BlockIdx to pixel position int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int offset = x + y * blockDim.x * gridDim.x ; // now calculate the value at that position float fx = x/(float)DIM - 0.5f; float fy = y/(float)DIM - 0.5f; unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) ); // accessing uchar4 vs unsigned char* ptr2[offset].x = 1000; ptr2[offset].y = green; ptr2[offset].z = 0; ptr2[offset].w = 255; } __global__ void copy ( uchar4 *pBuffer, uchar4 *Ptr, uchar4 *Ptr2, size_t size, int a ) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int idx = x + y * blockDim.x * gridDim.x ; int bdx = idx; if (a==1) { while ( idx &lt; DIM*DIM) { pBuffer[idx] = Ptr[idx] ; __syncthreads(); if (idx==DIM*DIM) { break; } } } if (a==2) { while ( (idx &lt; DIM*DIM) &amp;&amp; (bdx &lt; DIM*DIM) ) { uchar4 temp = Ptr2[bdx]; __syncthreads(); pBuffer[idx+4] = temp; __syncthreads(); if ((idx==DIM*DIM) &amp;&amp; (bdx==DIM*DIM)) { break; } } } } void key_func( unsigned char key, int x, int y ) { switch (key) { case 27: // clean up OpenGL and CUDA ( cudaGraphicsUnregisterResource( resource ) ); glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 ); glDeleteBuffers( 1, &amp;bufferObj ); exit(0); } } void draw_func( void ) { // we pass zero as the last parameter, because out bufferObj is now // the source, and the field switches from being a pointer to a // bitmap to now mean an offset into a bitmap object glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 ); glutSwapBuffers(); } int main( int argc, char **argv ) { cudaDeviceProp prop; int dev; memset( &amp;prop, 0, sizeof( cudaDeviceProp ) ); prop.major = 1; prop.minor = 0; ( cudaChooseDevice( &amp;dev, &amp;prop ) ); // tell CUDA which dev we will be using for graphic interop // from the programming guide: Interoperability with OpenGL // requires that the CUDA device be specified by // cudaGLSetGLDevice() before any other runtime calls. ( cudaGLSetGLDevice( dev ) ); // these GLUT calls need to be made before the other OpenGL // calls, else we get a seg fault glutInit( &amp;argc, argv ); glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA ); glutInitWindowSize( DIM, DIM ); glutCreateWindow( "bitmap" ); glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer"); glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers"); glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers"); glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData"); // the first three are standard OpenGL, the 4th is the CUDA reg // of the bitmap these calls exist starting in OpenGL 1.5 glGenBuffers( 1, &amp;bufferObj ); glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj ); glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4 ,NULL, GL_DYNAMIC_DRAW_ARB ); // REGISTER THE GL BufferObj and CUDA Resource ( cudaGraphicsGLRegisterBuffer( &amp;resource, bufferObj, cudaGraphicsMapFlagsNone ) ); // do work with the memory dst being on the GPU, gotten via mapping HANDLE_ERROR( cudaGraphicsMapResources( 1, &amp;resource, NULL ) ); // MY MODIFIED CODE uchar4 *devPtr; size_t size; size_t sizeTotal = 0; cudaMalloc ( (uchar4 **)&amp;devPtr, size); uchar4 *devPtr2; cudaMalloc ( (uchar4 **)&amp;devPtr2, size); uchar4 *pBuffer; (cudaMalloc ( (uchar4 **)&amp;pBuffer, size)); uchar4 *pBufferCurrent; (cudaMalloc ( (uchar4 **)&amp;pBufferCurrent, size)); uchar4 *pBufferImage; (cudaMalloc ( (uchar4 **)&amp;pBufferImage, size)); // REGISTER THE C BUFFER and CUDA Resource HANDLE_ERROR( cudaGraphicsResourceGetMappedPointer( (void**)&amp;pBufferImage, &amp;size, resource) ); dim3 grids(DIM/16,DIM/16); dim3 threads(16,16); kernel&lt;&lt;&lt;grids,threads&gt;&gt;&gt;(devPtr); kernel2&lt;&lt;&lt;grids,threads&gt;&gt;&gt;(devPtr2); int a = 1; do { if (a==1) { copy&lt;&lt;&lt; grids, threads&gt;&gt;&gt;(pBufferImage, devPtr, devPtr2, size, a); } if(a==2) { copy&lt;&lt;&lt; grids, threads&gt;&gt;&gt;(pBufferImage, devPtr, devPtr2, size, a); } a++; } while (a&lt;=2); cudaGraphicsUnmapResources( 1, &amp;resource, NULL ) ); // set up GLUT and kick off main loop glutKeyboardFunc( key_func ); glutDisplayFunc( draw_func ); glutMainLoop(); } </code></pre>
    singulars
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload