diff --git a/stim/cuda/cost.h b/stim/cuda/cost.h index 83da752..b0effa0 100644 --- a/stim/cuda/cost.h +++ b/stim/cuda/cost.h @@ -3,19 +3,33 @@ #include #include #include -#include +#include "../visualization/colormap.h" #include -#include -#include -#include +#include "../math/vector.h" +#include "../cuda/devices.h" +#include "../cuda/threads.h" ///Cost function that works with the gl-spider class to find index of the item with min-cost. typedef unsigned char uchar; texture texIn; float *result; -float* v_dif; cudaArray* srcArray; bool testing = false; +/* +struct SharedMemory +{ + __device__ inline operator float* () + { + extern __shared__ float __smem[]; + return (float *)__smem; + } + + __device__ inline operator const float* () const + { + extern __shared__ float __smem[]; + return (float *)__smem; + } +};*/ inline void checkCUDAerrors(const char *msg) { @@ -26,30 +40,12 @@ inline void checkCUDAerrors(const char *msg) } } -///Finds the sum of all the pixes in a gives template element. -///Returns the abosolute value. -///@param *diff, a pointer to the memory block that holds the pixel-differences. -float get_sum(float *diff) -{ - - cublasStatus_t ret; - cublasHandle_t handle; - ret = cublasCreate(&handle); - - ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1); - float out; - ret = cublasSasum(handle, 20*10, v_dif, 1, &out); -// cublasDestroy(ret); - cublasDestroy(handle); - return out; -} - ///A virtual representation of a uniform template. ///Returns the value of the template pixel. ///@param x, location of a pixel. __device__ float Template(int x) { - if(x < 20/6 || x > 20*5/6 || (x > 20*2/6 && x < 20*4/6)){ + if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){ return 1.0; }else{ return 0.0; @@ -63,15 +59,66 @@ __device__ float Template(int x) __global__ void get_diff (float *result) { - //cuPrintf("Hello"); + //float* shared = SharedMemory(); + __shared__ float shared[16][8]; int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; - int idx = y*20+x; + int x_t = threadIdx.x; + int y_t = threadIdx.y; + //int idx = y*16+x; + int g_idx = blockIdx.y; float valIn = tex2D(texIn, x, y)/255.0; float valTemp = Template(x); - result[idx] = abs(valIn-valTemp); - //result[idx] = abs(valIn); + shared[x_t][y_t] = abs(valIn-valTemp); + + __syncthreads(); + + for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1) + { + __syncthreads(); + if (x_t < step) + { + shared[x_t][y_t] += shared[x_t + step][y_t]; + } + __syncthreads(); + } + __syncthreads(); + + for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1) + { + __syncthreads(); + if(y_t < step) + { + shared[x_t][y_t] += shared[x_t][y_t + step]; + } + __syncthreads(); + } + __syncthreads(); +/* for(unsigned int step = 1; step < blockDim.x; step *= 2) + { + __syncthreads(); + if (x_t %(2*step) == 0) + { + shared[x_t][y_t] += shared[x_t + step][y_t]; + } + } + __syncthreads(); + + for(unsigned int step = 1; step < blockDim.y; step *= 2) + { + __syncthreads(); + if(y_t%(2*step) == 0) + { + shared[x_t][y_t] += shared[x_t][y_t + step]; + } + } + __syncthreads(); */ + if(x_t == 0 && y_t == 0) + result[g_idx] = shared[0][0]; + + +// //result[idx] = abs(valIn); } @@ -82,12 +129,6 @@ void get_diff (float *result) ///@param DIM_Y, integer controlling how much memory to allocate. void initArray(cudaGraphicsResource_t src, int DIM_Y) { - //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc (); - //cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0); - //HANDLE_ERROR( - // cudaGraphicsGLRegisterImage(&src, - // fboID, - // GL_TEXTURE_2D, HANDLE_ERROR( cudaGraphicsMapResources(1, &src) ); @@ -97,10 +138,8 @@ void initArray(cudaGraphicsResource_t src, int DIM_Y) HANDLE_ERROR( cudaBindTextureToArray(texIn, srcArray) ); - cudaMalloc( (void**) &result, 20*DIM_Y*sizeof(float)); + cudaMalloc( (void**) &result, DIM_Y*sizeof(float)); checkCUDAerrors("Memory Allocation Issue 1"); - cudaMalloc((void **) &v_dif, 20*10*sizeof(float)); - checkCUDAerrors("Memory Allocation Issue 2"); //HANDLE_ERROR( // cudaBindTextureToArray(texIn, ptr, &channelDesc) // ); @@ -117,9 +156,6 @@ void cleanUP(cudaGraphicsResource_t src) cudaGraphicsUnmapResources(1,&src) ); HANDLE_ERROR( - cudaFree(v_dif) - ); - HANDLE_ERROR( cudaUnbindTexture(texIn) ); } @@ -151,25 +187,32 @@ stim::vec get_cost(cudaGraphicsResource_t src, int DIM_Y) // name << "sample_" << inter << "_" << idx << ".bmp"; // stim::gpu2image(v_dif, name.str(), 20,10,0,1); - float output[DIM_Y]; + //float output[DIM_Y]; + float *output; + output = (float* ) malloc(DIM_Y*sizeof(float)); stim::vec ret(0, 0); float mini = 10000000000000000.0; - int idx; - initArray(src, DIM_Y*10); - dim3 grid(20/2, DIM_Y*10/2); - dim3 block(2, 2); - - get_diff <<< grid, block >>> (result); - for (int i = 0; i < DIM_Y; i++){ - output[i] = get_sum(result+(20*10*i)); - if(output[i] <= mini){ + int idx = 0; + initArray(src, DIM_Y*8); + dim3 numBlocks(1, DIM_Y); + dim3 threadsPerBlock(16, 8); + + + get_diff <<< numBlocks, threadsPerBlock >>> (result); + cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost); + + for( int i = 0; i(mag, pos, temp, hor.n()); - UpdateBuffer(0.0, 0.0+idx*10.0); + UpdateBuffer(0.0, 0.0+idx*8.0); CHECK_OPENGL_ERROR } } @@ -233,7 +233,7 @@ class gl_spider ver = stim::rect(mag, temp, dir, hor.n()); - UpdateBuffer(0.0, 0.0+idx*10.0); + UpdateBuffer(0.0, 0.0+idx*8.0); CHECK_OPENGL_ERROR } } @@ -244,7 +244,7 @@ class gl_spider ///Method for populating the buffer with the sampled texture. ///uses the default m <1,1,0> void - genMagnitudeVectors(float delta = 0.5) + genMagnitudeVectors(float delta = 0.70) { //Set up the vectors necessary for Rectangle creation. @@ -274,7 +274,7 @@ class gl_spider ver = stim::rect(temp, pos, dir, hor.n()); - UpdateBuffer(0.0, 0.0+i*10.0); + UpdateBuffer(0.0, 0.0+i*8.0); CHECK_OPENGL_ERROR } glEndList(); @@ -286,7 +286,7 @@ class gl_spider void UpdateBuffer(float v_x, float v_y) { - float len = 10.0; + float len = 8.0; stim::vecp1; stim::vecp2; stim::vecp3; @@ -338,13 +338,13 @@ class gl_spider p2[1], p2[2] ); - glVertex2f(v_x+2*len, v_y); + glVertex2f(v_x+2.0*len, v_y); glTexCoord3f( p3[0], p3[1], p3[2] ); - glVertex2f(v_x+2*len, v_y+len); + glVertex2f(v_x+2.0*len, v_y+len); glTexCoord3f( p4[0], p4[1], @@ -383,47 +383,6 @@ class gl_spider glBindTexture(GL_TEXTURE_2D, 0); } - ///Method for controling the buffer and texture binding in order to properly - ///do the render to texture. - void - Bind() - { - float len = 10.0; - glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer - glFramebufferTexture2D( - GL_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, - texbufferID, - 0); - glBindFramebuffer(GL_FRAMEBUFFER, fboID); - GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; - glDrawBuffers(1, DrawBuffers); - glBindTexture(GL_TEXTURE_2D, texbufferID); - glClearColor(1,1,1,1); - glClear(GL_COLOR_BUFFER_BIT); - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - glViewport(0,0,2.0*len, numSamples*len); - gluOrtho2D(0.0,2.0*len,0.0,numSamples*len); - glEnable(GL_TEXTURE_3D); - glBindTexture(GL_TEXTURE_3D, texID); - - CHECK_OPENGL_ERROR - } - - ///Method for Unbinding all of the texture resources - void - Unbind() - { - //Finalize GL_buffer - glBindTexture(GL_TEXTURE_3D, 0); - glDisable(GL_TEXTURE_3D); - glBindFramebuffer(GL_FRAMEBUFFER,0); - glBindTexture(GL_TEXTURE_2D, 0); - } ///Method for using the gl manipulation to alighn templates from ///Template space (-0.5 0.5) to Texture space (0.0, 1.0), @@ -558,7 +517,7 @@ class gl_spider attachSpider(GLuint id) { texID = id; - GenerateFBO(20, numSamples*10); + GenerateFBO(16, numSamples*8); setDims(0.6, 0.6, 1.0); setSize(512.0, 512.0, 426.0); setMatrix(); @@ -704,6 +663,47 @@ class gl_spider return fboID; } + ///Method for controling the buffer and texture binding in order to properly + ///do the render to texture. + void + Bind() + { + float len = 8.0; + glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer + glFramebufferTexture2D( + GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, + texbufferID, + 0); + glBindFramebuffer(GL_FRAMEBUFFER, fboID); + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; + glDrawBuffers(1, DrawBuffers); + glBindTexture(GL_TEXTURE_2D, texbufferID); + glClearColor(1,1,1,1); + glClear(GL_COLOR_BUFFER_BIT); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + glViewport(0,0,2.0*len, numSamples*len); + gluOrtho2D(0.0,2.0*len,0.0,numSamples*len); + glEnable(GL_TEXTURE_3D); + glBindTexture(GL_TEXTURE_3D, texID); + + CHECK_OPENGL_ERROR + } + + ///Method for Unbinding all of the texture resources + void + Unbind() + { + //Finalize GL_buffer + glBindTexture(GL_TEXTURE_3D, 0); + glDisable(GL_TEXTURE_3D); + glBindFramebuffer(GL_FRAMEBUFFER,0); + glBindTexture(GL_TEXTURE_2D, 0); + } //--------------------------------------------------------------------------// //-----------------------------TEMPORARY METHODS----------------------------// //--------------------------------------------------------------------------// @@ -725,12 +725,12 @@ class gl_spider int Step() { - Bind(); + // Bind(); findOptimalDirection(); findOptimalPosition(); findOptimalScale(); // branchDetection(); - Unbind(); + // Unbind(); return current_cost; } @@ -776,9 +776,9 @@ class gl_spider glTexCoord3f(x,y,z0); glVertex2f(0.0, j*0.1+0.1); glTexCoord3f(x,y,z1); - glVertex2f(20.0, j*0.1+0.1); + glVertex2f(16.0, j*0.1+0.1); glTexCoord3f(xold,yold,z1); - glVertex2f(20.0, j*0.1); + glVertex2f(16.0, j*0.1); glTexCoord3f(xold,yold,z0); glVertex2f(0.0, j*0.1); xold=x; -- libgit2 0.21.4