Commit 2eefb0355e676d2637d3451ae251b1cbcc0df3df

Authored by Pavel Govyadinov
1 parent efb0a2b5

added debugging code to the cuda code that is enabled with -DDebug in cmake list…

…s. Fixed minor errors in the grids and image stack classes
stim/cuda/branch_detection.cuh
... ... @@ -11,7 +11,7 @@ typedef unsigned int uint;
11 11  
12 12  
13 13 std::vector< stim::vec<float> >
14   -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  14 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y, int iter = 0)
15 15 {
16 16 float sigma = 2.0;
17 17 unsigned int conn = 7;
... ... @@ -22,7 +22,7 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
22 22 stringstream name;
23 23  
24 24  
25   - cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold);
  25 + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold, iter);
26 26 cudaDeviceSynchronize();
27 27  
28 28  
... ...
stim/cuda/filter.cuh
... ... @@ -26,6 +26,11 @@ namespace stim
26 26 float* LoG;
27 27 float* res;
28 28 float* centers;
  29 +
  30 +//#ifdef DEBUG
  31 + float* print;
  32 +//#endif
  33 +
29 34 stim::cuda::cuda_texture tx;
30 35  
31 36  
... ... @@ -44,6 +49,13 @@ namespace stim
44 49 HANDLE_ERROR(
45 50 cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float))
46 51 );
  52 +
  53 +//#ifdef DEBUG
  54 + HANDLE_ERROR(
  55 + cudaMalloc( (void**) &print, DIM_Y*DIM_X*sizeof(float))
  56 + );
  57 +//#endif
  58 +
47 59 // checkCUDAerrors("Memory Allocation, Result");
48 60 }
49 61  
... ... @@ -58,6 +70,11 @@ namespace stim
58 70 HANDLE_ERROR(
59 71 cudaFree(centers)
60 72 );
  73 +//#ifdef DEBUG
  74 + HANDLE_ERROR(
  75 + cudaFree(print)
  76 + );
  77 +//#endif
61 78 free(LoG);
62 79 }
63 80  
... ... @@ -89,7 +106,7 @@ namespace stim
89 106 //Shared memory would be better.
90 107 __global__
91 108 void
92   - applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){
  109 + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG, float* p){
93 110 //R = floor(size/2)
94 111 //THIS IS A NAIVE WAY TO DO IT, and there is a better way)
95 112  
... ... @@ -101,16 +118,15 @@ namespace stim
101 118 // float val = 0;
102 119 float tu = (x-kr+xi)/(float)DIM_X;
103 120 float tv = (y-kr+yi)/(float)DIM_Y;
  121 + int idx = y*DIM_X+x;
104 122 shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
105 123 __syncthreads();
106   -
107 124  
108 125 //x = max(0,x);
109 126 //x = min(x, width-1);
110 127 //y = max(y, 0);
111 128 //y = min(y, height - 1);
112 129  
113   - int idx = y*DIM_X+x;
114 130 // int k_idx;
115 131 for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
116 132 {
... ... @@ -135,11 +151,12 @@ namespace stim
135 151 __syncthreads();
136 152 if(xi == 0 && yi == 0)
137 153 res[idx] = shared[0][0];
  154 +
138 155 }
139 156  
140 157 extern "C"
141 158 float *
142   - get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold)
  159 + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold, int iter = 0)
143 160 {
144 161 tx.SetTextureCoordinates(1);
145 162 tx.SetAddressMode(1, 3);
... ... @@ -153,7 +170,14 @@ namespace stim
153 170 dim3 numBlocks(DIM_X, DIM_Y);
154 171 dim3 threadsPerBlock(sizeK, sizeK);
155 172  
156   - applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG);
  173 + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG, print);
  174 +
  175 + #ifdef DEBUG
  176 + stringstream name;
  177 + name.str("");
  178 + name << "Fiber Cylinder " << iter << ".bmp";
  179 + stim::gpu2image<float>(res, name.str(), DIM_X, DIM_Y, 0, 255);
  180 + #endif
157 181  
158 182  
159 183 stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y);
... ...
stim/cuda/testKernel.cuh
... ... @@ -53,11 +53,31 @@
53 53  
54 54 float valIn = tex2D<unsigned char>(texIn, x, y);
55 55 float templa = templ(x, 32)*255.0;
56   - print[idx] = abs(valIn-templa); ///temporary
  56 + //print[idx] = abs(valIn-templa); ///temporary
  57 + print[idx] = abs(valIn);
57 58 //print[idx] = abs(templa); ///temporary
58 59  
59 60 }
60 61  
  62 + ///Find the difference of the given set of samples and the template
  63 + ///using cuda acceleration.
  64 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  65 + /// to the data.
  66 + ///@param float* result --a pointer to the memory that stores the result.
  67 + __global__
  68 + //void get_diff (float *result)
  69 + void get_diff2 (cudaTextureObject_t texIn, float *print, int dx)
  70 + {
  71 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  72 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  73 + int idx = y*dx+x;
  74 + // int idx = y*16+x;
  75 +
  76 + float valIn = tex2D<unsigned char>(texIn, x, y);
  77 + print[idx] = abs(valIn); ///temporary
  78 +
  79 + }
  80 +
61 81 void test(cudaTextureObject_t tObj, int x, int y, std::string nam)
62 82 {
63 83  
... ... @@ -86,3 +106,31 @@
86 106 cleanUP();
87 107 }
88 108  
  109 +
  110 + void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter)
  111 + {
  112 +
  113 + //Bind the Texture in GL and allow access to cuda.
  114 +
  115 + //initialize the return arrays.
  116 +
  117 + initArray(x,y);
  118 + dim3 numBlocks(1, y);
  119 + dim3 threadsPerBlock(x, 1);
  120 + int max_threads = stim::maxThreadsPerBlock();
  121 + //dim3 threads(max_threads, 1);
  122 + //dim3 blocks(x / threads.x + 1, y);
  123 + //dim3 numBlocks(2, 2);
  124 + //dim3 threadsPerBlock(8, 108);
  125 +
  126 +
  127 +// get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  128 + get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
  129 +
  130 + cudaDeviceSynchronize();
  131 + stringstream name; //for debugging
  132 + name << nam.c_str();
  133 + stim::gpu2image<float>(print, name.str(),x,y,0,255);
  134 +
  135 + cleanUP();
  136 + }
... ...
stim/envi/envi_header.h
... ... @@ -8,6 +8,7 @@
8 8 #include <vector>
9 9 #include <algorithm>
10 10 #include <stdlib.h>
  11 +#include <cmath>
11 12  
12 13 //information from an ENVI header file
13 14 //A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html
... ...
stim/gl/gl_spider.h
... ... @@ -27,7 +27,6 @@
27 27 #include <stim/cuda/branch_detection.cuh>
28 28 #include "../../../volume-spider/glnetwork.h"
29 29 #include <stim/visualization/cylinder.h>
30   -#include <stim/cuda/testKernel.cuh>
31 30 #include <iostream>
32 31 #include <fstream>
33 32 #ifdef TIMING
... ... @@ -40,6 +39,9 @@
40 39 #include <ctime>
41 40 #endif
42 41  
  42 +#ifdef DEBUG
  43 + #include <stim/cuda/testKernel.cuh>
  44 +#endif
43 45  
44 46 namespace stim
45 47 {
... ... @@ -143,8 +145,8 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
143 145  
144 146  
145 147 #ifdef DEBUG
146   - stringstream name;
147 148 int iter;
  149 + stringstream name;
148 150 int iter_pos;
149 151 int iter_dir;
150 152 int iter_siz;
... ... @@ -294,6 +296,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
294 296 DrawLongCylinder(n, l_template, l_square); ///Draw the cylinder.
295 297 stim::cylinder<float> cyl(cL, cM);
296 298 std::vector< stim::vec<float> > result = find_branch(cylinder_texID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); ///find all the centers in cuda
  299 +
297 300 stim::vec3<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); ///the borders of the texture.
298 301 float pval; //pvalue associated with the points on the cylinder.
299 302 if(!result.empty()) ///if we have any points
... ... @@ -957,7 +960,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
957 960 iter_dir = 0;
958 961 iter_siz = 0;
959 962 #endif
960   - stepsize = 3.0;
  963 + stepsize = 6.0;
961 964 n_pixels = 16.0;
962 965  
963 966 srand(100);
... ... @@ -1380,20 +1383,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1380 1383 Step()
1381 1384 {
1382 1385 #ifdef DEBUG
1383   - std::cerr << "Took a step" << std::endl;
  1386 + std::cerr << "Took a step";
1384 1387 #endif
1385 1388 Bind(direction_texID, direction_buffID, numSamples, n_pixels);
1386 1389 CHECK_OPENGL_ERROR
1387 1390 findOptimalDirection();
1388 1391 Unbind();
  1392 + #ifdef DEBUG
  1393 + std::cerr << " " << current_cost;
  1394 + #endif
1389 1395 Bind(position_texID, position_buffID, numSamplesPos, n_pixels);
1390 1396 findOptimalPosition();
1391 1397 Unbind();
  1398 + #ifdef DEBUG
  1399 + std::cerr << " " << current_cost;
  1400 + #endif
1392 1401 Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
1393 1402 findOptimalRadius();
1394 1403 Unbind();
  1404 + #ifdef DEBUG
  1405 + std::cerr << " " << current_cost;
  1406 + #endif
1395 1407 CHECK_OPENGL_ERROR
1396   -
  1408 + #ifdef DEBUG
  1409 + std::cerr << std::endl;
  1410 + #endif
1397 1411 return current_cost;
1398 1412 }
1399 1413  
... ... @@ -1539,9 +1553,9 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1539 1553 // findOptimalDirection();
1540 1554 // Unbind();
1541 1555 //THIS IS EXPERIMENTAL
1542   - Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
1543   - findOptimalRadius();
1544   - Unbind();
  1556 + // Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
  1557 + // findOptimalRadius();
  1558 + // Unbind();
1545 1559 //THIS IS EXPERIMENTAL
1546 1560  
1547 1561 // cL.push_back(curSeed);
... ...
stim/gl/gl_texture.h
... ... @@ -30,9 +30,9 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
30 30 GLenum cpu_type;
31 31 GLenum gpu_type;
32 32 GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.)
33   - using image_stack<T>::R;
  33 + using image_stack<T,F>::R;
34 34 //using image_stack<T>::S;
35   - using image_stack<T>::ptr;
  35 + using image_stack<T,F>::ptr;
36 36  
37 37 /// Sets the internal texture_type, based on the data dimensions
38 38 void setTextureType(){
... ... @@ -247,7 +247,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
247 247 }
248 248  
249 249 ///returns the dimentions of the data in the x, y, z directions.
250   - vec<int> getSize(){
  250 + stim::vec<int> getSize(){
251 251 stim::vec<int> size(R[1], R[2], R[3]);
252 252 return size;
253 253 }
... ... @@ -282,7 +282,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
282 282 ///@param file_mask specifies the file(s) to be loaded
283 283 /// Sets the path and calls the loader on that path.
284 284 void load_images(std::string file_mask){
285   - image_stack<T>::load_images(file_mask); //load images
  285 + image_stack<T, F>::load_images(file_mask); //load images
286 286 guess_parameters();
287 287 }
288 288  
... ... @@ -292,13 +292,18 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
292 292 return texID;
293 293 }
294 294  
295   -
  295 +
296 296 T* getData(){
297 297 return ptr;
298 298 }
299   -
300   -
  299 +
  300 + void setData(T* rts)
  301 + {
  302 +
  303 + }
  304 +
301 305 };
  306 +
302 307 }
303 308  
304 309  
... ...
stim/grids/image_stack.h
... ... @@ -17,13 +17,13 @@ class image_stack : public virtual stim::grid&lt;T, 4, F&gt;{
17 17  
18 18 protected:
19 19 //using stim::grid<T, 4>::S;
20   - using stim::grid<T, 4>::R;
21   - using stim::grid<T, 4>::ptr;
22   - using stim::grid<T, 4>::read;
  20 + using stim::grid<T, 4, F>::R;
  21 + using stim::grid<T, 4, F>::ptr;
  22 + using stim::grid<T, 4, F>::read;
23 23  
24 24 public:
25 25 //default constructor
26   - image_stack() : grid<T, 4>() {
  26 + image_stack() : grid<T, 4, F>() {
27 27  
28 28 }
29 29  
... ... @@ -113,7 +113,8 @@ public:
113 113 /// @param i is the page to be saved
114 114 void save_image(std::string file_name, unsigned int i){
115 115 stim::image<T> I; //create an image
116   - I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
  116 + I.set_interleaved(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
  117 +// I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
117 118 I.save(file_name);
118 119 }
119 120  
... ...