Commit 2eefb0355e676d2637d3451ae251b1cbcc0df3df

Authored by Pavel Govyadinov
1 parent efb0a2b5

added debugging code to the cuda code that is enabled with -DDebug in cmake list…

…s. Fixed minor errors in the grids and image stack classes
stim/cuda/branch_detection.cuh
@@ -11,7 +11,7 @@ typedef unsigned int uint; @@ -11,7 +11,7 @@ typedef unsigned int uint;
11 11
12 12
13 std::vector< stim::vec<float> > 13 std::vector< stim::vec<float> >
14 -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) 14 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y, int iter = 0)
15 { 15 {
16 float sigma = 2.0; 16 float sigma = 2.0;
17 unsigned int conn = 7; 17 unsigned int conn = 7;
@@ -22,7 +22,7 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) @@ -22,7 +22,7 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
22 stringstream name; 22 stringstream name;
23 23
24 24
25 - cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold); 25 + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold, iter);
26 cudaDeviceSynchronize(); 26 cudaDeviceSynchronize();
27 27
28 28
stim/cuda/filter.cuh
@@ -26,6 +26,11 @@ namespace stim @@ -26,6 +26,11 @@ namespace stim
26 float* LoG; 26 float* LoG;
27 float* res; 27 float* res;
28 float* centers; 28 float* centers;
  29 +
  30 +//#ifdef DEBUG
  31 + float* print;
  32 +//#endif
  33 +
29 stim::cuda::cuda_texture tx; 34 stim::cuda::cuda_texture tx;
30 35
31 36
@@ -44,6 +49,13 @@ namespace stim @@ -44,6 +49,13 @@ namespace stim
44 HANDLE_ERROR( 49 HANDLE_ERROR(
45 cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float)) 50 cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float))
46 ); 51 );
  52 +
  53 +//#ifdef DEBUG
  54 + HANDLE_ERROR(
  55 + cudaMalloc( (void**) &print, DIM_Y*DIM_X*sizeof(float))
  56 + );
  57 +//#endif
  58 +
47 // checkCUDAerrors("Memory Allocation, Result"); 59 // checkCUDAerrors("Memory Allocation, Result");
48 } 60 }
49 61
@@ -58,6 +70,11 @@ namespace stim @@ -58,6 +70,11 @@ namespace stim
58 HANDLE_ERROR( 70 HANDLE_ERROR(
59 cudaFree(centers) 71 cudaFree(centers)
60 ); 72 );
  73 +//#ifdef DEBUG
  74 + HANDLE_ERROR(
  75 + cudaFree(print)
  76 + );
  77 +//#endif
61 free(LoG); 78 free(LoG);
62 } 79 }
63 80
@@ -89,7 +106,7 @@ namespace stim @@ -89,7 +106,7 @@ namespace stim
89 //Shared memory would be better. 106 //Shared memory would be better.
90 __global__ 107 __global__
91 void 108 void
92 - applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){ 109 + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG, float* p){
93 //R = floor(size/2) 110 //R = floor(size/2)
94 //THIS IS A NAIVE WAY TO DO IT, and there is a better way) 111 //THIS IS A NAIVE WAY TO DO IT, and there is a better way)
95 112
@@ -101,16 +118,15 @@ namespace stim @@ -101,16 +118,15 @@ namespace stim
101 // float val = 0; 118 // float val = 0;
102 float tu = (x-kr+xi)/(float)DIM_X; 119 float tu = (x-kr+xi)/(float)DIM_X;
103 float tv = (y-kr+yi)/(float)DIM_Y; 120 float tv = (y-kr+yi)/(float)DIM_Y;
  121 + int idx = y*DIM_X+x;
104 shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv)); 122 shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
105 __syncthreads(); 123 __syncthreads();
106 -  
107 124
108 //x = max(0,x); 125 //x = max(0,x);
109 //x = min(x, width-1); 126 //x = min(x, width-1);
110 //y = max(y, 0); 127 //y = max(y, 0);
111 //y = min(y, height - 1); 128 //y = min(y, height - 1);
112 129
113 - int idx = y*DIM_X+x;  
114 // int k_idx; 130 // int k_idx;
115 for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1) 131 for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
116 { 132 {
@@ -135,11 +151,12 @@ namespace stim @@ -135,11 +151,12 @@ namespace stim
135 __syncthreads(); 151 __syncthreads();
136 if(xi == 0 && yi == 0) 152 if(xi == 0 && yi == 0)
137 res[idx] = shared[0][0]; 153 res[idx] = shared[0][0];
  154 +
138 } 155 }
139 156
140 extern "C" 157 extern "C"
141 float * 158 float *
142 - get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold) 159 + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold, int iter = 0)
143 { 160 {
144 tx.SetTextureCoordinates(1); 161 tx.SetTextureCoordinates(1);
145 tx.SetAddressMode(1, 3); 162 tx.SetAddressMode(1, 3);
@@ -153,7 +170,14 @@ namespace stim @@ -153,7 +170,14 @@ namespace stim
153 dim3 numBlocks(DIM_X, DIM_Y); 170 dim3 numBlocks(DIM_X, DIM_Y);
154 dim3 threadsPerBlock(sizeK, sizeK); 171 dim3 threadsPerBlock(sizeK, sizeK);
155 172
156 - applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG); 173 + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG, print);
  174 +
  175 + #ifdef DEBUG
  176 + stringstream name;
  177 + name.str("");
  178 + name << "Fiber Cylinder " << iter << ".bmp";
  179 + stim::gpu2image<float>(res, name.str(), DIM_X, DIM_Y, 0, 255);
  180 + #endif
157 181
158 182
159 stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y); 183 stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y);
stim/cuda/testKernel.cuh
@@ -53,11 +53,31 @@ @@ -53,11 +53,31 @@
53 53
54 float valIn = tex2D<unsigned char>(texIn, x, y); 54 float valIn = tex2D<unsigned char>(texIn, x, y);
55 float templa = templ(x, 32)*255.0; 55 float templa = templ(x, 32)*255.0;
56 - print[idx] = abs(valIn-templa); ///temporary 56 + //print[idx] = abs(valIn-templa); ///temporary
  57 + print[idx] = abs(valIn);
57 //print[idx] = abs(templa); ///temporary 58 //print[idx] = abs(templa); ///temporary
58 59
59 } 60 }
60 61
  62 + ///Find the difference of the given set of samples and the template
  63 + ///using cuda acceleration.
  64 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  65 + /// to the data.
  66 + ///@param float* result --a pointer to the memory that stores the result.
  67 + __global__
  68 + //void get_diff (float *result)
  69 + void get_diff2 (cudaTextureObject_t texIn, float *print, int dx)
  70 + {
  71 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  72 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  73 + int idx = y*dx+x;
  74 + // int idx = y*16+x;
  75 +
  76 + float valIn = tex2D<unsigned char>(texIn, x, y);
  77 + print[idx] = abs(valIn); ///temporary
  78 +
  79 + }
  80 +
61 void test(cudaTextureObject_t tObj, int x, int y, std::string nam) 81 void test(cudaTextureObject_t tObj, int x, int y, std::string nam)
62 { 82 {
63 83
@@ -86,3 +106,31 @@ @@ -86,3 +106,31 @@
86 cleanUP(); 106 cleanUP();
87 } 107 }
88 108
  109 +
  110 + void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter)
  111 + {
  112 +
  113 + //Bind the Texture in GL and allow access to cuda.
  114 +
  115 + //initialize the return arrays.
  116 +
  117 + initArray(x,y);
  118 + dim3 numBlocks(1, y);
  119 + dim3 threadsPerBlock(x, 1);
  120 + int max_threads = stim::maxThreadsPerBlock();
  121 + //dim3 threads(max_threads, 1);
  122 + //dim3 blocks(x / threads.x + 1, y);
  123 + //dim3 numBlocks(2, 2);
  124 + //dim3 threadsPerBlock(8, 108);
  125 +
  126 +
  127 +// get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  128 + get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
  129 +
  130 + cudaDeviceSynchronize();
  131 + stringstream name; //for debugging
  132 + name << nam.c_str();
  133 + stim::gpu2image<float>(print, name.str(),x,y,0,255);
  134 +
  135 + cleanUP();
  136 + }
stim/envi/envi_header.h
@@ -8,6 +8,7 @@ @@ -8,6 +8,7 @@
8 #include <vector> 8 #include <vector>
9 #include <algorithm> 9 #include <algorithm>
10 #include <stdlib.h> 10 #include <stdlib.h>
  11 +#include <cmath>
11 12
12 //information from an ENVI header file 13 //information from an ENVI header file
13 //A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html 14 //A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html
stim/gl/gl_spider.h
@@ -27,7 +27,6 @@ @@ -27,7 +27,6 @@
27 #include <stim/cuda/branch_detection.cuh> 27 #include <stim/cuda/branch_detection.cuh>
28 #include "../../../volume-spider/glnetwork.h" 28 #include "../../../volume-spider/glnetwork.h"
29 #include <stim/visualization/cylinder.h> 29 #include <stim/visualization/cylinder.h>
30 -#include <stim/cuda/testKernel.cuh>  
31 #include <iostream> 30 #include <iostream>
32 #include <fstream> 31 #include <fstream>
33 #ifdef TIMING 32 #ifdef TIMING
@@ -40,6 +39,9 @@ @@ -40,6 +39,9 @@
40 #include <ctime> 39 #include <ctime>
41 #endif 40 #endif
42 41
  42 +#ifdef DEBUG
  43 + #include <stim/cuda/testKernel.cuh>
  44 +#endif
43 45
44 namespace stim 46 namespace stim
45 { 47 {
@@ -143,8 +145,8 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -143,8 +145,8 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
143 145
144 146
145 #ifdef DEBUG 147 #ifdef DEBUG
146 - stringstream name;  
147 int iter; 148 int iter;
  149 + stringstream name;
148 int iter_pos; 150 int iter_pos;
149 int iter_dir; 151 int iter_dir;
150 int iter_siz; 152 int iter_siz;
@@ -294,6 +296,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -294,6 +296,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
294 DrawLongCylinder(n, l_template, l_square); ///Draw the cylinder. 296 DrawLongCylinder(n, l_template, l_square); ///Draw the cylinder.
295 stim::cylinder<float> cyl(cL, cM); 297 stim::cylinder<float> cyl(cL, cM);
296 std::vector< stim::vec<float> > result = find_branch(cylinder_texID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); ///find all the centers in cuda 298 std::vector< stim::vec<float> > result = find_branch(cylinder_texID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); ///find all the centers in cuda
  299 +
297 stim::vec3<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); ///the borders of the texture. 300 stim::vec3<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); ///the borders of the texture.
298 float pval; //pvalue associated with the points on the cylinder. 301 float pval; //pvalue associated with the points on the cylinder.
299 if(!result.empty()) ///if we have any points 302 if(!result.empty()) ///if we have any points
@@ -957,7 +960,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -957,7 +960,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
957 iter_dir = 0; 960 iter_dir = 0;
958 iter_siz = 0; 961 iter_siz = 0;
959 #endif 962 #endif
960 - stepsize = 3.0; 963 + stepsize = 6.0;
961 n_pixels = 16.0; 964 n_pixels = 16.0;
962 965
963 srand(100); 966 srand(100);
@@ -1380,20 +1383,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1380,20 +1383,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1380 Step() 1383 Step()
1381 { 1384 {
1382 #ifdef DEBUG 1385 #ifdef DEBUG
1383 - std::cerr << "Took a step" << std::endl; 1386 + std::cerr << "Took a step";
1384 #endif 1387 #endif
1385 Bind(direction_texID, direction_buffID, numSamples, n_pixels); 1388 Bind(direction_texID, direction_buffID, numSamples, n_pixels);
1386 CHECK_OPENGL_ERROR 1389 CHECK_OPENGL_ERROR
1387 findOptimalDirection(); 1390 findOptimalDirection();
1388 Unbind(); 1391 Unbind();
  1392 + #ifdef DEBUG
  1393 + std::cerr << " " << current_cost;
  1394 + #endif
1389 Bind(position_texID, position_buffID, numSamplesPos, n_pixels); 1395 Bind(position_texID, position_buffID, numSamplesPos, n_pixels);
1390 findOptimalPosition(); 1396 findOptimalPosition();
1391 Unbind(); 1397 Unbind();
  1398 + #ifdef DEBUG
  1399 + std::cerr << " " << current_cost;
  1400 + #endif
1392 Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels); 1401 Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
1393 findOptimalRadius(); 1402 findOptimalRadius();
1394 Unbind(); 1403 Unbind();
  1404 + #ifdef DEBUG
  1405 + std::cerr << " " << current_cost;
  1406 + #endif
1395 CHECK_OPENGL_ERROR 1407 CHECK_OPENGL_ERROR
1396 - 1408 + #ifdef DEBUG
  1409 + std::cerr << std::endl;
  1410 + #endif
1397 return current_cost; 1411 return current_cost;
1398 } 1412 }
1399 1413
@@ -1539,9 +1553,9 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1539,9 +1553,9 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1539 // findOptimalDirection(); 1553 // findOptimalDirection();
1540 // Unbind(); 1554 // Unbind();
1541 //THIS IS EXPERIMENTAL 1555 //THIS IS EXPERIMENTAL
1542 - Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);  
1543 - findOptimalRadius();  
1544 - Unbind(); 1556 + // Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
  1557 + // findOptimalRadius();
  1558 + // Unbind();
1545 //THIS IS EXPERIMENTAL 1559 //THIS IS EXPERIMENTAL
1546 1560
1547 // cL.push_back(curSeed); 1561 // cL.push_back(curSeed);
stim/gl/gl_texture.h
@@ -30,9 +30,9 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -30,9 +30,9 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
30 GLenum cpu_type; 30 GLenum cpu_type;
31 GLenum gpu_type; 31 GLenum gpu_type;
32 GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.) 32 GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.)
33 - using image_stack<T>::R; 33 + using image_stack<T,F>::R;
34 //using image_stack<T>::S; 34 //using image_stack<T>::S;
35 - using image_stack<T>::ptr; 35 + using image_stack<T,F>::ptr;
36 36
37 /// Sets the internal texture_type, based on the data dimensions 37 /// Sets the internal texture_type, based on the data dimensions
38 void setTextureType(){ 38 void setTextureType(){
@@ -247,7 +247,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -247,7 +247,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
247 } 247 }
248 248
249 ///returns the dimentions of the data in the x, y, z directions. 249 ///returns the dimentions of the data in the x, y, z directions.
250 - vec<int> getSize(){ 250 + stim::vec<int> getSize(){
251 stim::vec<int> size(R[1], R[2], R[3]); 251 stim::vec<int> size(R[1], R[2], R[3]);
252 return size; 252 return size;
253 } 253 }
@@ -282,7 +282,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -282,7 +282,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
282 ///@param file_mask specifies the file(s) to be loaded 282 ///@param file_mask specifies the file(s) to be loaded
283 /// Sets the path and calls the loader on that path. 283 /// Sets the path and calls the loader on that path.
284 void load_images(std::string file_mask){ 284 void load_images(std::string file_mask){
285 - image_stack<T>::load_images(file_mask); //load images 285 + image_stack<T, F>::load_images(file_mask); //load images
286 guess_parameters(); 286 guess_parameters();
287 } 287 }
288 288
@@ -292,13 +292,18 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -292,13 +292,18 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
292 return texID; 292 return texID;
293 } 293 }
294 294
295 - 295 +
296 T* getData(){ 296 T* getData(){
297 return ptr; 297 return ptr;
298 } 298 }
299 -  
300 - 299 +
  300 + void setData(T* rts)
  301 + {
  302 +
  303 + }
  304 +
301 }; 305 };
  306 +
302 } 307 }
303 308
304 309
stim/grids/image_stack.h
@@ -17,13 +17,13 @@ class image_stack : public virtual stim::grid&lt;T, 4, F&gt;{ @@ -17,13 +17,13 @@ class image_stack : public virtual stim::grid&lt;T, 4, F&gt;{
17 17
18 protected: 18 protected:
19 //using stim::grid<T, 4>::S; 19 //using stim::grid<T, 4>::S;
20 - using stim::grid<T, 4>::R;  
21 - using stim::grid<T, 4>::ptr;  
22 - using stim::grid<T, 4>::read; 20 + using stim::grid<T, 4, F>::R;
  21 + using stim::grid<T, 4, F>::ptr;
  22 + using stim::grid<T, 4, F>::read;
23 23
24 public: 24 public:
25 //default constructor 25 //default constructor
26 - image_stack() : grid<T, 4>() { 26 + image_stack() : grid<T, 4, F>() {
27 27
28 } 28 }
29 29
@@ -113,7 +113,8 @@ public: @@ -113,7 +113,8 @@ public:
113 /// @param i is the page to be saved 113 /// @param i is the page to be saved
114 void save_image(std::string file_name, unsigned int i){ 114 void save_image(std::string file_name, unsigned int i){
115 stim::image<T> I; //create an image 115 stim::image<T> I; //create an image
116 - I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid 116 + I.set_interleaved(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
  117 +// I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
117 I.save(file_name); 118 I.save(file_name);
118 } 119 }
119 120