Commit ef5cebe5381eafb3c52998fe01c9a32c23597303
1 parent
efe7b7cc
stable version
Showing
1 changed file
with
33 additions
and
2 deletions
Show diff stats
stim/cuda/spider_cost.cuh
... | ... | @@ -8,6 +8,7 @@ |
8 | 8 | #include <stim/visualization/colormap.h> |
9 | 9 | #include <sstream> |
10 | 10 | #include <stim/math/vector.h> |
11 | +#include <stim/cuda/cudatools/timer.h> | |
11 | 12 | #include <stim/cuda/cudatools/devices.h> |
12 | 13 | #include <stim/cuda/cudatools/threads.h> |
13 | 14 | #include <stim/cuda/cuda_texture.cuh> |
... | ... | @@ -120,9 +121,18 @@ namespace stim{ |
120 | 121 | { |
121 | 122 | |
122 | 123 | //Bind the Texture in GL and allow access to cuda. |
124 | +// #ifdef TIMING | |
125 | +// gpuStartTimer(); | |
126 | +// #endif | |
123 | 127 | t.MapCudaTexture(texbufferID, texType); |
128 | +// #ifdef TIMING | |
129 | +// std::cout << " " << gpuStopTimer(); | |
130 | +// #endif | |
124 | 131 | |
125 | 132 | //initialize the return arrays. |
133 | +// #ifdef TIMING | |
134 | +// gpuStartTimer(); | |
135 | +// #endif | |
126 | 136 | float* output; |
127 | 137 | output = (float* ) malloc(DIM_Y*sizeof(float)); |
128 | 138 | |
... | ... | @@ -133,14 +143,26 @@ namespace stim{ |
133 | 143 | //variables for finding the min. |
134 | 144 | float mini = 10000000000000000.0; |
135 | 145 | int idx = 0; |
146 | +// #ifdef TIMING | |
147 | +// std::cout << " " << gpuStopTimer(); | |
148 | +// #endif | |
136 | 149 | |
137 | 150 | //cuda launch variables. |
151 | +// #ifdef TIMING | |
152 | +// gpuStartTimer(); | |
153 | +// #endif | |
138 | 154 | dim3 numBlocks(1, DIM_Y); |
139 | 155 | dim3 threadsPerBlock(dx, dy); |
140 | 156 | |
141 | - | |
142 | 157 | get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (t.getTexture(), result, dx, dy); |
143 | - | |
158 | + cudaDeviceSynchronize(); | |
159 | +// #ifdef TIMING | |
160 | +// std::cout << " " << gpuStopTimer(); | |
161 | +// #endif | |
162 | + | |
163 | +// #ifdef TIMING | |
164 | +// gpuStartTimer(); | |
165 | +// #endif | |
144 | 166 | HANDLE_ERROR( |
145 | 167 | cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost) |
146 | 168 | ); |
... | ... | @@ -151,7 +173,13 @@ namespace stim{ |
151 | 173 | idx = i; |
152 | 174 | } |
153 | 175 | } |
176 | +// #ifdef TIMING | |
177 | +// std::cout << " " << gpuStopTimer(); | |
178 | +// #endif | |
154 | 179 | |
180 | +// #ifdef TIMING | |
181 | +// gpuStartTimer(); | |
182 | +// #endif | |
155 | 183 | // stringstream name; //for debugging |
156 | 184 | // name << "Test.bmp"; |
157 | 185 | // stim::gpu2image<float>(print, name.str(),16,218,0,256); |
... | ... | @@ -161,6 +189,9 @@ namespace stim{ |
161 | 189 | ret[0] = idx; ret[1] = (int) output[idx]; |
162 | 190 | // std::cout << "The cost is " << output[idx] << std::endl; |
163 | 191 | free(output); |
192 | +// #ifdef TIMING | |
193 | +// std::cout << " " << gpuStopTimer() << std::endl; | |
194 | +// #endif | |
164 | 195 | return ret; |
165 | 196 | } |
166 | 197 | ... | ... |