Commit 2eefb0355e676d2637d3451ae251b1cbcc0df3df
1 parent
efb0a2b5
added debugging code to the cuda code that is enabled with -DDebug in cmake list…
…s. Fixed minor errors in the grids and image stack classes
Showing
7 changed files
with
121 additions
and
28 deletions
Show diff stats
stim/cuda/branch_detection.cuh
... | ... | @@ -11,7 +11,7 @@ typedef unsigned int uint; |
11 | 11 | |
12 | 12 | |
13 | 13 | std::vector< stim::vec<float> > |
14 | -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) | |
14 | +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y, int iter = 0) | |
15 | 15 | { |
16 | 16 | float sigma = 2.0; |
17 | 17 | unsigned int conn = 7; |
... | ... | @@ -22,7 +22,7 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) |
22 | 22 | stringstream name; |
23 | 23 | |
24 | 24 | |
25 | - cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold); | |
25 | + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold, iter); | |
26 | 26 | cudaDeviceSynchronize(); |
27 | 27 | |
28 | 28 | ... | ... |
stim/cuda/filter.cuh
... | ... | @@ -26,6 +26,11 @@ namespace stim |
26 | 26 | float* LoG; |
27 | 27 | float* res; |
28 | 28 | float* centers; |
29 | + | |
30 | +//#ifdef DEBUG | |
31 | + float* print; | |
32 | +//#endif | |
33 | + | |
29 | 34 | stim::cuda::cuda_texture tx; |
30 | 35 | |
31 | 36 | |
... | ... | @@ -44,6 +49,13 @@ namespace stim |
44 | 49 | HANDLE_ERROR( |
45 | 50 | cudaMalloc( (void**) ¢ers, DIM_Y*DIM_X*sizeof(float)) |
46 | 51 | ); |
52 | + | |
53 | +//#ifdef DEBUG | |
54 | + HANDLE_ERROR( | |
55 | + cudaMalloc( (void**) &print, DIM_Y*DIM_X*sizeof(float)) | |
56 | + ); | |
57 | +//#endif | |
58 | + | |
47 | 59 | // checkCUDAerrors("Memory Allocation, Result"); |
48 | 60 | } |
49 | 61 | |
... | ... | @@ -58,6 +70,11 @@ namespace stim |
58 | 70 | HANDLE_ERROR( |
59 | 71 | cudaFree(centers) |
60 | 72 | ); |
73 | +//#ifdef DEBUG | |
74 | + HANDLE_ERROR( | |
75 | + cudaFree(print) | |
76 | + ); | |
77 | +//#endif | |
61 | 78 | free(LoG); |
62 | 79 | } |
63 | 80 | |
... | ... | @@ -89,7 +106,7 @@ namespace stim |
89 | 106 | //Shared memory would be better. |
90 | 107 | __global__ |
91 | 108 | void |
92 | - applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){ | |
109 | + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG, float* p){ | |
93 | 110 | //R = floor(size/2) |
94 | 111 | //THIS IS A NAIVE WAY TO DO IT, and there is a better way) |
95 | 112 | |
... | ... | @@ -101,16 +118,15 @@ namespace stim |
101 | 118 | // float val = 0; |
102 | 119 | float tu = (x-kr+xi)/(float)DIM_X; |
103 | 120 | float tv = (y-kr+yi)/(float)DIM_Y; |
121 | + int idx = y*DIM_X+x; | |
104 | 122 | shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv)); |
105 | 123 | __syncthreads(); |
106 | - | |
107 | 124 | |
108 | 125 | //x = max(0,x); |
109 | 126 | //x = min(x, width-1); |
110 | 127 | //y = max(y, 0); |
111 | 128 | //y = min(y, height - 1); |
112 | 129 | |
113 | - int idx = y*DIM_X+x; | |
114 | 130 | // int k_idx; |
115 | 131 | for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1) |
116 | 132 | { |
... | ... | @@ -135,11 +151,12 @@ namespace stim |
135 | 151 | __syncthreads(); |
136 | 152 | if(xi == 0 && yi == 0) |
137 | 153 | res[idx] = shared[0][0]; |
154 | + | |
138 | 155 | } |
139 | 156 | |
140 | 157 | extern "C" |
141 | 158 | float * |
142 | - get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold) | |
159 | + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold, int iter = 0) | |
143 | 160 | { |
144 | 161 | tx.SetTextureCoordinates(1); |
145 | 162 | tx.SetAddressMode(1, 3); |
... | ... | @@ -153,7 +170,14 @@ namespace stim |
153 | 170 | dim3 numBlocks(DIM_X, DIM_Y); |
154 | 171 | dim3 threadsPerBlock(sizeK, sizeK); |
155 | 172 | |
156 | - applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG); | |
173 | + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG, print); | |
174 | + | |
175 | + #ifdef DEBUG | |
176 | + stringstream name; | |
177 | + name.str(""); | |
178 | + name << "Fiber Cylinder " << iter << ".bmp"; | |
179 | + stim::gpu2image<float>(res, name.str(), DIM_X, DIM_Y, 0, 255); | |
180 | + #endif | |
157 | 181 | |
158 | 182 | |
159 | 183 | stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y); | ... | ... |
stim/cuda/testKernel.cuh
... | ... | @@ -53,11 +53,31 @@ |
53 | 53 | |
54 | 54 | float valIn = tex2D<unsigned char>(texIn, x, y); |
55 | 55 | float templa = templ(x, 32)*255.0; |
56 | - print[idx] = abs(valIn-templa); ///temporary | |
56 | + //print[idx] = abs(valIn-templa); ///temporary | |
57 | + print[idx] = abs(valIn); | |
57 | 58 | //print[idx] = abs(templa); ///temporary |
58 | 59 | |
59 | 60 | } |
60 | 61 | |
62 | + ///Find the difference of the given set of samples and the template | |
63 | + ///using cuda acceleration. | |
64 | + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references | |
65 | + /// to the data. | |
66 | + ///@param float* result --a pointer to the memory that stores the result. | |
67 | + __global__ | |
68 | + //void get_diff (float *result) | |
69 | + void get_diff2 (cudaTextureObject_t texIn, float *print, int dx) | |
70 | + { | |
71 | + int x = threadIdx.x + blockIdx.x * blockDim.x; | |
72 | + int y = threadIdx.y + blockIdx.y * blockDim.y; | |
73 | + int idx = y*dx+x; | |
74 | + // int idx = y*16+x; | |
75 | + | |
76 | + float valIn = tex2D<unsigned char>(texIn, x, y); | |
77 | + print[idx] = abs(valIn); ///temporary | |
78 | + | |
79 | + } | |
80 | + | |
61 | 81 | void test(cudaTextureObject_t tObj, int x, int y, std::string nam) |
62 | 82 | { |
63 | 83 | |
... | ... | @@ -86,3 +106,31 @@ |
86 | 106 | cleanUP(); |
87 | 107 | } |
88 | 108 | |
109 | + | |
110 | + void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter) | |
111 | + { | |
112 | + | |
113 | + //Bind the Texture in GL and allow access to cuda. | |
114 | + | |
115 | + //initialize the return arrays. | |
116 | + | |
117 | + initArray(x,y); | |
118 | + dim3 numBlocks(1, y); | |
119 | + dim3 threadsPerBlock(x, 1); | |
120 | + int max_threads = stim::maxThreadsPerBlock(); | |
121 | + //dim3 threads(max_threads, 1); | |
122 | + //dim3 blocks(x / threads.x + 1, y); | |
123 | + //dim3 numBlocks(2, 2); | |
124 | + //dim3 threadsPerBlock(8, 108); | |
125 | + | |
126 | + | |
127 | +// get_diff <<< blocks, threads >>> (tx.getTexture(), print); | |
128 | + get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x); | |
129 | + | |
130 | + cudaDeviceSynchronize(); | |
131 | + stringstream name; //for debugging | |
132 | + name << nam.c_str(); | |
133 | + stim::gpu2image<float>(print, name.str(),x,y,0,255); | |
134 | + | |
135 | + cleanUP(); | |
136 | + } | ... | ... |
stim/envi/envi_header.h
stim/gl/gl_spider.h
... | ... | @@ -27,7 +27,6 @@ |
27 | 27 | #include <stim/cuda/branch_detection.cuh> |
28 | 28 | #include "../../../volume-spider/glnetwork.h" |
29 | 29 | #include <stim/visualization/cylinder.h> |
30 | -#include <stim/cuda/testKernel.cuh> | |
31 | 30 | #include <iostream> |
32 | 31 | #include <fstream> |
33 | 32 | #ifdef TIMING |
... | ... | @@ -40,6 +39,9 @@ |
40 | 39 | #include <ctime> |
41 | 40 | #endif |
42 | 41 | |
42 | +#ifdef DEBUG | |
43 | + #include <stim/cuda/testKernel.cuh> | |
44 | +#endif | |
43 | 45 | |
44 | 46 | namespace stim |
45 | 47 | { |
... | ... | @@ -143,8 +145,8 @@ class gl_spider // : public virtual gl_texture<T> |
143 | 145 | |
144 | 146 | |
145 | 147 | #ifdef DEBUG |
146 | - stringstream name; | |
147 | 148 | int iter; |
149 | + stringstream name; | |
148 | 150 | int iter_pos; |
149 | 151 | int iter_dir; |
150 | 152 | int iter_siz; |
... | ... | @@ -294,6 +296,7 @@ class gl_spider // : public virtual gl_texture<T> |
294 | 296 | DrawLongCylinder(n, l_template, l_square); ///Draw the cylinder. |
295 | 297 | stim::cylinder<float> cyl(cL, cM); |
296 | 298 | std::vector< stim::vec<float> > result = find_branch(cylinder_texID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); ///find all the centers in cuda |
299 | + | |
297 | 300 | stim::vec3<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); ///the borders of the texture. |
298 | 301 | float pval; //pvalue associated with the points on the cylinder. |
299 | 302 | if(!result.empty()) ///if we have any points |
... | ... | @@ -957,7 +960,7 @@ class gl_spider // : public virtual gl_texture<T> |
957 | 960 | iter_dir = 0; |
958 | 961 | iter_siz = 0; |
959 | 962 | #endif |
960 | - stepsize = 3.0; | |
963 | + stepsize = 6.0; | |
961 | 964 | n_pixels = 16.0; |
962 | 965 | |
963 | 966 | srand(100); |
... | ... | @@ -1380,20 +1383,31 @@ class gl_spider // : public virtual gl_texture<T> |
1380 | 1383 | Step() |
1381 | 1384 | { |
1382 | 1385 | #ifdef DEBUG |
1383 | - std::cerr << "Took a step" << std::endl; | |
1386 | + std::cerr << "Took a step"; | |
1384 | 1387 | #endif |
1385 | 1388 | Bind(direction_texID, direction_buffID, numSamples, n_pixels); |
1386 | 1389 | CHECK_OPENGL_ERROR |
1387 | 1390 | findOptimalDirection(); |
1388 | 1391 | Unbind(); |
1392 | + #ifdef DEBUG | |
1393 | + std::cerr << " " << current_cost; | |
1394 | + #endif | |
1389 | 1395 | Bind(position_texID, position_buffID, numSamplesPos, n_pixels); |
1390 | 1396 | findOptimalPosition(); |
1391 | 1397 | Unbind(); |
1398 | + #ifdef DEBUG | |
1399 | + std::cerr << " " << current_cost; | |
1400 | + #endif | |
1392 | 1401 | Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels); |
1393 | 1402 | findOptimalRadius(); |
1394 | 1403 | Unbind(); |
1404 | + #ifdef DEBUG | |
1405 | + std::cerr << " " << current_cost; | |
1406 | + #endif | |
1395 | 1407 | CHECK_OPENGL_ERROR |
1396 | - | |
1408 | + #ifdef DEBUG | |
1409 | + std::cerr << std::endl; | |
1410 | + #endif | |
1397 | 1411 | return current_cost; |
1398 | 1412 | } |
1399 | 1413 | |
... | ... | @@ -1539,9 +1553,9 @@ class gl_spider // : public virtual gl_texture<T> |
1539 | 1553 | // findOptimalDirection(); |
1540 | 1554 | // Unbind(); |
1541 | 1555 | //THIS IS EXPERIMENTAL |
1542 | - Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels); | |
1543 | - findOptimalRadius(); | |
1544 | - Unbind(); | |
1556 | + // Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels); | |
1557 | + // findOptimalRadius(); | |
1558 | + // Unbind(); | |
1545 | 1559 | //THIS IS EXPERIMENTAL |
1546 | 1560 | |
1547 | 1561 | // cL.push_back(curSeed); | ... | ... |
stim/gl/gl_texture.h
... | ... | @@ -30,9 +30,9 @@ class gl_texture : public virtual image_stack<T, F> |
30 | 30 | GLenum cpu_type; |
31 | 31 | GLenum gpu_type; |
32 | 32 | GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.) |
33 | - using image_stack<T>::R; | |
33 | + using image_stack<T,F>::R; | |
34 | 34 | //using image_stack<T>::S; |
35 | - using image_stack<T>::ptr; | |
35 | + using image_stack<T,F>::ptr; | |
36 | 36 | |
37 | 37 | /// Sets the internal texture_type, based on the data dimensions |
38 | 38 | void setTextureType(){ |
... | ... | @@ -247,7 +247,7 @@ class gl_texture : public virtual image_stack<T, F> |
247 | 247 | } |
248 | 248 | |
249 | 249 | ///returns the dimentions of the data in the x, y, z directions. |
250 | - vec<int> getSize(){ | |
250 | + stim::vec<int> getSize(){ | |
251 | 251 | stim::vec<int> size(R[1], R[2], R[3]); |
252 | 252 | return size; |
253 | 253 | } |
... | ... | @@ -282,7 +282,7 @@ class gl_texture : public virtual image_stack<T, F> |
282 | 282 | ///@param file_mask specifies the file(s) to be loaded |
283 | 283 | /// Sets the path and calls the loader on that path. |
284 | 284 | void load_images(std::string file_mask){ |
285 | - image_stack<T>::load_images(file_mask); //load images | |
285 | + image_stack<T, F>::load_images(file_mask); //load images | |
286 | 286 | guess_parameters(); |
287 | 287 | } |
288 | 288 | |
... | ... | @@ -292,13 +292,18 @@ class gl_texture : public virtual image_stack<T, F> |
292 | 292 | return texID; |
293 | 293 | } |
294 | 294 | |
295 | - | |
295 | + | |
296 | 296 | T* getData(){ |
297 | 297 | return ptr; |
298 | 298 | } |
299 | - | |
300 | - | |
299 | + | |
300 | + void setData(T* rts) | |
301 | + { | |
302 | + | |
303 | + } | |
304 | + | |
301 | 305 | }; |
306 | + | |
302 | 307 | } |
303 | 308 | |
304 | 309 | ... | ... |
stim/grids/image_stack.h
... | ... | @@ -17,13 +17,13 @@ class image_stack : public virtual stim::grid<T, 4, F>{ |
17 | 17 | |
18 | 18 | protected: |
19 | 19 | //using stim::grid<T, 4>::S; |
20 | - using stim::grid<T, 4>::R; | |
21 | - using stim::grid<T, 4>::ptr; | |
22 | - using stim::grid<T, 4>::read; | |
20 | + using stim::grid<T, 4, F>::R; | |
21 | + using stim::grid<T, 4, F>::ptr; | |
22 | + using stim::grid<T, 4, F>::read; | |
23 | 23 | |
24 | 24 | public: |
25 | 25 | //default constructor |
26 | - image_stack() : grid<T, 4>() { | |
26 | + image_stack() : grid<T, 4, F>() { | |
27 | 27 | |
28 | 28 | } |
29 | 29 | |
... | ... | @@ -113,7 +113,8 @@ public: |
113 | 113 | /// @param i is the page to be saved |
114 | 114 | void save_image(std::string file_name, unsigned int i){ |
115 | 115 | stim::image<T> I; //create an image |
116 | - I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid | |
116 | + I.set_interleaved(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid | |
117 | +// I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid | |
117 | 118 | I.save(file_name); |
118 | 119 | } |
119 | 120 | ... | ... |