Commit ef5cebe5381eafb3c52998fe01c9a32c23597303

Authored by Pavel Govyadinov
1 parent efe7b7cc

stable version

Showing 1 changed file with 33 additions and 2 deletions   Show diff stats
stim/cuda/spider_cost.cuh
@@ -8,6 +8,7 @@ @@ -8,6 +8,7 @@
8 #include <stim/visualization/colormap.h> 8 #include <stim/visualization/colormap.h>
9 #include <sstream> 9 #include <sstream>
10 #include <stim/math/vector.h> 10 #include <stim/math/vector.h>
  11 +#include <stim/cuda/cudatools/timer.h>
11 #include <stim/cuda/cudatools/devices.h> 12 #include <stim/cuda/cudatools/devices.h>
12 #include <stim/cuda/cudatools/threads.h> 13 #include <stim/cuda/cudatools/threads.h>
13 #include <stim/cuda/cuda_texture.cuh> 14 #include <stim/cuda/cuda_texture.cuh>
@@ -120,9 +121,18 @@ namespace stim{ @@ -120,9 +121,18 @@ namespace stim{
120 { 121 {
121 122
122 //Bind the Texture in GL and allow access to cuda. 123 //Bind the Texture in GL and allow access to cuda.
  124 +// #ifdef TIMING
  125 +// gpuStartTimer();
  126 +// #endif
123 t.MapCudaTexture(texbufferID, texType); 127 t.MapCudaTexture(texbufferID, texType);
  128 +// #ifdef TIMING
  129 +// std::cout << " " << gpuStopTimer();
  130 +// #endif
124 131
125 //initialize the return arrays. 132 //initialize the return arrays.
  133 +// #ifdef TIMING
  134 +// gpuStartTimer();
  135 +// #endif
126 float* output; 136 float* output;
127 output = (float* ) malloc(DIM_Y*sizeof(float)); 137 output = (float* ) malloc(DIM_Y*sizeof(float));
128 138
@@ -133,14 +143,26 @@ namespace stim{ @@ -133,14 +143,26 @@ namespace stim{
133 //variables for finding the min. 143 //variables for finding the min.
134 float mini = 10000000000000000.0; 144 float mini = 10000000000000000.0;
135 int idx = 0; 145 int idx = 0;
  146 +// #ifdef TIMING
  147 +// std::cout << " " << gpuStopTimer();
  148 +// #endif
136 149
137 //cuda launch variables. 150 //cuda launch variables.
  151 +// #ifdef TIMING
  152 +// gpuStartTimer();
  153 +// #endif
138 dim3 numBlocks(1, DIM_Y); 154 dim3 numBlocks(1, DIM_Y);
139 dim3 threadsPerBlock(dx, dy); 155 dim3 threadsPerBlock(dx, dy);
140 156
141 -  
142 get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (t.getTexture(), result, dx, dy); 157 get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (t.getTexture(), result, dx, dy);
143 - 158 + cudaDeviceSynchronize();
  159 +// #ifdef TIMING
  160 +// std::cout << " " << gpuStopTimer();
  161 +// #endif
  162 +
  163 +// #ifdef TIMING
  164 +// gpuStartTimer();
  165 +// #endif
144 HANDLE_ERROR( 166 HANDLE_ERROR(
145 cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost) 167 cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
146 ); 168 );
@@ -151,7 +173,13 @@ namespace stim{ @@ -151,7 +173,13 @@ namespace stim{
151 idx = i; 173 idx = i;
152 } 174 }
153 } 175 }
  176 +// #ifdef TIMING
  177 +// std::cout << " " << gpuStopTimer();
  178 +// #endif
154 179
  180 +// #ifdef TIMING
  181 +// gpuStartTimer();
  182 +// #endif
155 // stringstream name; //for debugging 183 // stringstream name; //for debugging
156 // name << "Test.bmp"; 184 // name << "Test.bmp";
157 // stim::gpu2image<float>(print, name.str(),16,218,0,256); 185 // stim::gpu2image<float>(print, name.str(),16,218,0,256);
@@ -161,6 +189,9 @@ namespace stim{ @@ -161,6 +189,9 @@ namespace stim{
161 ret[0] = idx; ret[1] = (int) output[idx]; 189 ret[0] = idx; ret[1] = (int) output[idx];
162 // std::cout << "The cost is " << output[idx] << std::endl; 190 // std::cout << "The cost is " << output[idx] << std::endl;
163 free(output); 191 free(output);
  192 +// #ifdef TIMING
  193 +// std::cout << " " << gpuStopTimer() << std::endl;
  194 +// #endif
164 return ret; 195 return ret;
165 } 196 }
166 197