diff --git a/stim/cuda/spider_cost.cuh b/stim/cuda/spider_cost.cuh index 119cd82..c2821fe 100644 --- a/stim/cuda/spider_cost.cuh +++ b/stim/cuda/spider_cost.cuh @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -120,9 +121,18 @@ namespace stim{ { //Bind the Texture in GL and allow access to cuda. +// #ifdef TIMING +// gpuStartTimer(); +// #endif t.MapCudaTexture(texbufferID, texType); +// #ifdef TIMING +// std::cout << " " << gpuStopTimer(); +// #endif //initialize the return arrays. +// #ifdef TIMING +// gpuStartTimer(); +// #endif float* output; output = (float* ) malloc(DIM_Y*sizeof(float)); @@ -133,14 +143,26 @@ namespace stim{ //variables for finding the min. float mini = 10000000000000000.0; int idx = 0; +// #ifdef TIMING +// std::cout << " " << gpuStopTimer(); +// #endif //cuda launch variables. +// #ifdef TIMING +// gpuStartTimer(); +// #endif dim3 numBlocks(1, DIM_Y); dim3 threadsPerBlock(dx, dy); - get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (t.getTexture(), result, dx, dy); - + cudaDeviceSynchronize(); +// #ifdef TIMING +// std::cout << " " << gpuStopTimer(); +// #endif + +// #ifdef TIMING +// gpuStartTimer(); +// #endif HANDLE_ERROR( cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost) ); @@ -151,7 +173,13 @@ namespace stim{ idx = i; } } +// #ifdef TIMING +// std::cout << " " << gpuStopTimer(); +// #endif +// #ifdef TIMING +// gpuStartTimer(); +// #endif // stringstream name; //for debugging // name << "Test.bmp"; // stim::gpu2image(print, name.str(),16,218,0,256); @@ -161,6 +189,9 @@ namespace stim{ ret[0] = idx; ret[1] = (int) output[idx]; // std::cout << "The cost is " << output[idx] << std::endl; free(output); +// #ifdef TIMING +// std::cout << " " << gpuStopTimer() << std::endl; +// #endif return ret; } -- libgit2 0.21.4