Commit 27194b5660d6d3a15b6deebfe219a9ee84ecc126

Authored by Pavel Govyadinov
1 parent 8761b649

major bug fixes, stopped using the ivote code for branch detection and started u…

…sing a general Laplassian of Gaussians algorithm (the algorithm invers the image atm), fixed a minor bug with the dynamic network creation
stim/cuda/branch_detection.cuh
@@ -5,157 +5,31 @@ @@ -5,157 +5,31 @@
5 //#include <math.h> 5 //#include <math.h>
6 #include <stim/visualization/colormap.h> 6 #include <stim/visualization/colormap.h>
7 #include <stim/cuda/cuda_texture.cuh> 7 #include <stim/cuda/cuda_texture.cuh>
8 -#include <stim/cuda/templates/gradient.cuh>  
9 -#include <stim/cuda/templates/gaussian_blur.cuh>  
10 -#include <stim/cuda/arraymath.cuh>  
11 -#include <stim/cuda/ivote.cuh>  
12 -#include <stim/cuda/testKernel.cuh> 8 +#include <stim/cuda/filter.cuh>
13 typedef unsigned int uint; 9 typedef unsigned int uint;
14 typedef unsigned int uchar; 10 typedef unsigned int uchar;
15 11
16 -stim::cuda::cuda_texture t;  
17 -float* gpuTable;  
18 -float* gpuGrad;  
19 -float* gpuVote;  
20 -float* gpuI;  
21 -float* gpuCenters;  
22 -  
23 -void atan_2d(float* cpuTable, unsigned int rmax)  
24 -{  
25 - //initialize the width and height of the window which atan2 are computed in.  
26 - int xsize = 2*rmax +1;  
27 - int ysize = 2*rmax +1;  
28 -  
29 - // assign the center coordinates of the atan2 window to yi and xi  
30 - int yi = rmax;  
31 - int xi = rmax;  
32 -  
33 -  
34 - for (int xt = 0; xt < xsize; xt++){  
35 -  
36 - for(int yt = 0; yt < ysize; yt++){  
37 -  
38 - //convert the current 2D coordinates to 1D  
39 - int id = yt * xsize + xt;  
40 - // calculate the distance between the pixel and the center of the atan2 window  
41 - float xd = xi - xt;  
42 - float yd = yi - yt;  
43 -  
44 - // calculate the angle between the pixel and the center of the atan2 window and store the result.  
45 - float atan_2d_vote = atan2(yd, xd);  
46 - cpuTable[id] = atan_2d_vote;  
47 - }  
48 - }  
49 -  
50 -}  
51 -  
52 -void initCuda(unsigned int bytes_table, unsigned int bytes_ds)  
53 -{  
54 - HANDLE_ERROR(  
55 - cudaMalloc((void**) &gpuTable, bytes_table)  
56 - );  
57 - HANDLE_ERROR(  
58 - cudaMalloc((void**) &gpuI, bytes_ds)  
59 - );  
60 - HANDLE_ERROR(  
61 - cudaMalloc((void**) &gpuGrad, bytes_ds*2)  
62 - );  
63 - HANDLE_ERROR(  
64 - cudaMalloc((void**) &gpuVote, bytes_ds)  
65 - );  
66 - HANDLE_ERROR(  
67 - cudaMalloc((void**) &gpuCenters, bytes_ds)  
68 - );  
69 -}  
70 -  
71 -void cleanCuda()  
72 -{  
73 - HANDLE_ERROR(  
74 - cudaFree(gpuTable)  
75 - );  
76 - HANDLE_ERROR(  
77 - cudaFree(gpuGrad)  
78 - );  
79 - HANDLE_ERROR(  
80 - cudaFree(gpuVote)  
81 - );  
82 - HANDLE_ERROR(  
83 - cudaFree(gpuCenters)  
84 - );  
85 - HANDLE_ERROR(  
86 - cudaFree(gpuI)  
87 - );  
88 -}  
89 12
90 std::vector< stim::vec<float> > 13 std::vector< stim::vec<float> >
91 find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) 14 find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
92 { 15 {
93 - float phi = 15.1*M_PI/180;  
94 - int iter = 5;  
95 - float dphi = phi/iter;  
96 - float rmax = 10;  
97 - float sigma = 4;  
98 - unsigned int pixels = x * y;  
99 - unsigned int bytes = sizeof(float) * pixels;  
100 - unsigned int bytes_table = sizeof(float) * (2*rmax + 1) * (2*rmax + 1);  
101 - unsigned int x_ds = (x + (x % 1 == 0 ? 0:1));  
102 - unsigned int y_ds = (y + (x % 1 == 0 ? 0:1));  
103 - unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;  
104 - unsigned int conn = 10;  
105 - float final_t = 200.0;  
106 - float* cpuTable = (float*) malloc(bytes_table);  
107 - float* cpuCenters = (float*) malloc(bytes_ds); 16 + float sigma = 2.0;
  17 + unsigned int conn = 7;
  18 + float threshold = 40.0;
  19 + float* cpuCenters = (float*) malloc(x*y*sizeof(float));
  20 + int sizek = 7;
108 21
109 stringstream name; 22 stringstream name;
110 23
111 24
112 -  
113 -  
114 -// test(texbufferID, texType, x, y);  
115 - std::vector<stim::vec<float> > output;  
116 - initCuda(bytes_table, bytes_ds);  
117 -  
118 - atan_2d(cpuTable, rmax);  
119 - cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);  
120 -  
121 -  
122 - t.MapCudaTexture(texbufferID, texType);  
123 - cudaDeviceSynchronize();  
124 - stim::cuda::tex_gaussian_blur2<float>(  
125 - gpuI, sigma, x, y, t.getTexture(), t.getArray()  
126 - );  
127 -// stim::gpu2image<float>(gpuI, "Blur.jpg", x,y , 0, 255);  
128 -// stim::gpu2image<float>(t.getArray(), "ORIGINAL.jpg", x,y , 0, 255); 25 + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold);
129 cudaDeviceSynchronize(); 26 cudaDeviceSynchronize();
130 27
131 28
132 - stim::cuda::gpu_gradient_2d<float>(  
133 - gpuGrad, gpuI, x, y  
134 - );  
135 - cudaDeviceSynchronize();  
136 -  
137 - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);  
138 - cudaDeviceSynchronize();  
139 29
140 - cudaDeviceSynchronize();  
141 - for (int i = 0; i < iter; i++)  
142 - {  
143 - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);  
144 - cudaDeviceSynchronize();  
145 - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);  
146 - cudaDeviceSynchronize();  
147 - phi = phi - dphi;  
148 - } 30 + std::vector<stim::vec<float> > output;
149 31
150 cudaDeviceSynchronize(); 32 cudaDeviceSynchronize();
151 - stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);  
152 -// stim::gpu2image<float>(gpuCenters, "Centers.jpg", x, y, 0, 1);  
153 - cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);  
154 -// stim::cpu2image<float>(cpuCenters, "CentersXPU.jpg", x, y, 0, 1);  
155 -// std::cerr << pixels << " " << x << " " << y << std::endl;  
156 -// std::cerr << "y is " << y << ", x is " << x << std::endl;  
157 -  
158 -// std::cout << "Before " << output.size() << std::endl;  
159 33
160 for(int i = 0; i < x; i++) 34 for(int i = 0; i < x; i++)
161 { 35 {
@@ -166,8 +40,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) @@ -166,8 +40,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
166 { 40 {
167 float x_v = (float) i; 41 float x_v = (float) i;
168 float y_v = (float) j; 42 float y_v = (float) j;
169 - std::cout << x_v/x*360.0 << std::endl;  
170 - std::cout << y_v/y << std::endl;  
171 output.push_back(stim::vec<float>((x_v/(float)x*360.0), 43 output.push_back(stim::vec<float>((x_v/(float)x*360.0),
172 (y_v), y_v/8)); 44 (y_v), y_v/8));
173 } 45 }
@@ -175,29 +47,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) @@ -175,29 +47,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
175 } 47 }
176 } 48 }
177 49
178 -/* for(int i = 0; i < pixels; i++)  
179 - {  
180 - int ix = (i % x);  
181 - int iy = (i / x);  
182 - if(cpuCenters[i] != 0)  
183 - {  
184 -  
185 - float x_v = (float) ix;  
186 - float y_v = (float) iy;  
187 - std::cout << x_v/x*360 << std::endl;  
188 - std::cout << y_v/y << std::endl;  
189 - output.push_back(stim::vec<float>((x_v/(float)x*360),  
190 - (y_v/(float)y), 0.0));  
191 -  
192 - }  
193 - } */  
194 -  
195 -// std::cout << "After " << output.size() << std::endl;  
196 -  
197 -  
198 - t.UnmapCudaTexture();  
199 - cleanCuda();  
200 - free(cpuTable);  
201 free(cpuCenters); 50 free(cpuCenters);
202 return output; 51 return output;
203 } 52 }
stim/cuda/branch_detection2.cuh deleted
1 -#include <stim/cuda/templates/gaussian_blur.cuh>  
2 -#include <stim/cuda/templates/gradient.cuh>  
3 -#include <stim/cuda/arraymath.cuh>  
4 -#include <stim/cuda/ivote.cuh>  
5 -  
6 -  
7 -  
8 -  
9 -  
10 -  
11 -  
12 -  
13 -  
14 -  
15 -void atan_2(float* cpuTable, unsigned int rmax){  
16 -  
17 - //initialize the width and height of the window which atan2 are computed in.  
18 - int xsize = 2*rmax +1;  
19 - int ysize = 2*rmax +1;  
20 -  
21 - // assign the center coordinates of the atan2 window to yi and xi  
22 - int yi = rmax;  
23 - int xi = rmax;  
24 -  
25 -  
26 - for (int xt = 0; xt < xsize; xt++){  
27 -  
28 - for(int yt = 0; yt < ysize; yt++){  
29 -  
30 - //convert the current 2D coordinates to 1D  
31 - int id = yt * xsize + xt;  
32 - // calculate the distance between the pixel and the center of the atan2 window  
33 - float xd = xi - xt;  
34 - float yd = yi - yt;  
35 -  
36 - // calculate the angle between the pixel and the center of the atan2 window and store the result.  
37 - float atan_2d_vote = atan2(yd, xd);  
38 - cpuTable[id] = atan_2d_vote;  
39 - }  
40 - }  
41 -  
42 -}  
43 -std::vector<stim::vec<float> >  
44 -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)  
45 -{  
46 -  
47 - float* cpuTable = (float  
48 -  
49 - unsigned int pixels = x * y;  
50 - unsigned int bytes = sizeof(float) * pixels;  
51 -  
52 - //calculate the number of bytes in the atan2 table  
53 -  
54 - unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float);  
55 -  
56 -  
57 -  
58 - //allocate space on the GPU for the atan2 table  
59 -  
60 - float* gpuTable;  
61 -  
62 - cudaMalloc(&gpuTable, bytes_table);  
63 -  
64 -  
65 -  
66 - cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);  
67 -  
68 - unsigned int sigma_ds = 1/resize;  
69 - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));  
70 - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));  
71 - unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;  
72 -  
73 -  
74 - float* gpuI;  
75 - cudaMalloc(&gpuI, bytes_ds);  
76 -  
77 -  
78 - float* gpuGrad;  
79 - cudaMalloc(&gpuGrad, bytes_ds*2);  
80 -  
81 - float* gpuVote;  
82 - cudaMalloc(&gpuVote, bytes_ds);  
83 -  
84 - // allocate space on the GPU for the detected cell centes  
85 -  
86 - float* gpuCenters;  
87 -  
88 - cudaMalloc(&gpuCenters, bytes_ds);  
89 -  
90 -  
91 - stim::cuda::gpu_down_sample<float>(gpuI, gpuI0, resize, x , y);  
92 - cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost);  
93 -  
94 -x = x_ds;  
95 - y = y_ds;  
96 - t = t * resize;  
97 - //sigma = sigma * resize;  
98 -  
99 - cudaDeviceSynchronize();  
100 - stim::cuda::gpu_gaussian_blur2<float>(gpuI,sigma, x, y);  
101 - cudaDeviceSynchronize();  
102 - cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost);  
103 - cudaDeviceSynchronize();  
104 -  
105 - stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);  
106 - cudaDeviceSynchronize();  
107 - cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);  
108 -  
109 - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);  
110 - cudaDeviceSynchronize();  
111 - cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);  
112 -  
113 -  
114 - //multiply the gradient by a constant and calculate the absolute value (to save an image)  
115 -  
116 - stim::cuda::cpu_multiply<float>(cpuCart2Polar, 40, x * y * 2);  
117 -  
118 - cudaDeviceSynchronize();  
119 -  
120 - stim::cuda::cpu_abs<float>(cpuCart2Polar, x * y * 2);  
121 -  
122 - cudaDeviceSynchronize();  
123 -  
124 -  
125 - for (int i =0; i<iter; i++){  
126 -  
127 - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);  
128 - cudaDeviceSynchronize();  
129 - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);  
130 - cudaDeviceSynchronize();  
131 - switch (i){  
132 - case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);  
133 - break;  
134 - case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);  
135 - break;  
136 - case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);  
137 - break;  
138 - case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);  
139 - break;  
140 - case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);  
141 - break;  
142 - default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);  
143 - break;  
144 - }  
145 - phi = phi - dphi;  
146 - }  
147 -  
148 - stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, t, conn, x, y);  
149 - cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);  
150 -  
151 -}  
stim/cuda/cuda_texture.cuh
@@ -41,6 +41,46 @@ namespace stim @@ -41,6 +41,46 @@ namespace stim
41 texDesc.normalizedCoords = 0; 41 texDesc.normalizedCoords = 0;
42 } 42 }
43 43
  44 +
  45 + ///Enable the nromalized texture coordinates.
  46 + ///@param bool, 1 for on, 0 for off
  47 + void
  48 + SetTextureCoordinates(bool val)
  49 + {
  50 + if(val)
  51 + texDesc.normalizedCoords = 1;
  52 + else
  53 + texDesc.normalizedCoords = 0;
  54 + }
  55 +
  56 + ///sets the dimension dim to used the mode at the borders of the texture.
  57 + ///@param dim : 0-x, 1-y, 2-z
  58 + ///@param mode: cudaAddressModeWrap = 0,
  59 + /// cudaAddressModeClamp = 1,
  60 + /// cudaAddressMNodeMirror = 2,
  61 + /// cudaAddressModeBorder = 3,
  62 + void
  63 + SetAddressMode(int dim, int mode)
  64 + {
  65 + switch(mode)
  66 + {
  67 + case 0:
  68 + texDesc.addressMode[dim] = cudaAddressModeWrap;
  69 + break;
  70 + case 1:
  71 + texDesc.addressMode[dim] = cudaAddressModeClamp;
  72 + break;
  73 + case 2:
  74 + texDesc.addressMode[dim] = cudaAddressModeMirror;
  75 + break;
  76 + case 3:
  77 + texDesc.addressMode[dim] = cudaAddressModeBorder;
  78 + break;
  79 + default:
  80 + break;
  81 + }
  82 + }
  83 +
44 //-------------------------------------------------------------------------// 84 //-------------------------------------------------------------------------//
45 //-------------------------------CUDA_MAPPING------------------------------// 85 //-------------------------------CUDA_MAPPING------------------------------//
46 //-------------------------------------------------------------------------// 86 //-------------------------------------------------------------------------//
stim/cuda/filter.cuh 0 → 100644
  1 +#ifndef STIM_FILTER_H
  2 +#define STIM_FILTER_H
  3 +
  4 +#include <assert.h>
  5 +#include <cuda.h>
  6 +#include <cuda_runtime.h>
  7 +#include <stdio.h>
  8 +#include <stim/visualization/colormap.h>
  9 +#include <sstream>
  10 +#include <stim/cuda/cudatools/devices.h>
  11 +#include <stim/cuda/cudatools/threads.h>
  12 +#include <stim/cuda/cuda_texture.cuh>
  13 +#include <stim/cuda/ivote.cuh>
  14 +#include <stim/cuda/arraymath.cuh>
  15 +
  16 +#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )
  17 +#define M_PI 3.141592654f
  18 +
  19 +
  20 +namespace stim
  21 +{
  22 + namespace cuda
  23 + {
  24 +
  25 + float* gpuLoG;
  26 + float* LoG;
  27 + float* res;
  28 + float* centers;
  29 + stim::cuda::cuda_texture tx;
  30 +
  31 +
  32 +
  33 + void initArray(int DIM_X, int DIM_Y, int kl)
  34 + {
  35 +
  36 + LoG = (float*) malloc(kl*kl*sizeof(float));
  37 + HANDLE_ERROR(
  38 + cudaMalloc( (void**) &gpuLoG, kl*kl*sizeof(float))
  39 + );
  40 + // checkCUDAerrors("Memory Allocation, LoG");
  41 + HANDLE_ERROR(
  42 + cudaMalloc( (void**) &res, DIM_Y*DIM_X*sizeof(float))
  43 + );
  44 + HANDLE_ERROR(
  45 + cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float))
  46 + );
  47 + // checkCUDAerrors("Memory Allocation, Result");
  48 + }
  49 +
  50 + void cleanUp(cudaGraphicsResource_t src)
  51 + {
  52 + HANDLE_ERROR(
  53 + cudaFree(gpuLoG)
  54 + );
  55 + HANDLE_ERROR(
  56 + cudaFree(res)
  57 + );
  58 + HANDLE_ERROR(
  59 + cudaFree(centers)
  60 + );
  61 + free(LoG);
  62 + }
  63 +
  64 + void
  65 + filterKernel (float kl, float sigma, float *LoG)
  66 + {
  67 + float t = 0.0;
  68 + float kr = kl/2;
  69 + float x, y;
  70 + int idx;
  71 + for(int i = 0; i < kl; i++){
  72 + for(int j = 0; j < kl; j++){
  73 + idx = j*kl+i;
  74 + x = i - kr - 0.5;
  75 + y = j - kr - 0.5;
  76 + LoG[idx] = (-1.0/M_PI/powf(sigma, 4))* (1 - (powf(x,2)+powf(y,2))/2.0/powf(sigma, 2))
  77 + *expf(-(powf(x,2)+powf(y,2))/2/powf(sigma,2));
  78 + t +=LoG[idx];
  79 + }
  80 + }
  81 +
  82 + for(int i = 0; i < kl*kl; i++)
  83 + {
  84 + LoG[i] = LoG[i]/t;
  85 + }
  86 +
  87 + }
  88 +
  89 + //Shared memory would be better.
  90 + __global__
  91 + void
  92 + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){
  93 + //R = floor(size/2)
  94 + //THIS IS A NAIVE WAY TO DO IT, and there is a better way)
  95 +
  96 + __shared__ float shared[7][7];
  97 + int x = blockIdx.x;
  98 + int y = blockIdx.y;
  99 + int xi = threadIdx.x;
  100 + int yi = threadIdx.y;
  101 + float val = 0;
  102 + float tu = (x-kr+xi)/(float)DIM_X;
  103 + float tv = (y-kr+yi)/(float)DIM_Y;
  104 + shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
  105 + __syncthreads();
  106 +
  107 +
  108 + //x = max(0,x);
  109 + //x = min(x, width-1);
  110 + //y = max(y, 0);
  111 + //y = min(y, height - 1);
  112 +
  113 + int idx = y*DIM_X+x;
  114 + int k_idx;
  115 + for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  116 + {
  117 + __syncthreads();
  118 + if (xi < step)
  119 + {
  120 + shared[xi][yi] += shared[xi + step][yi];
  121 + }
  122 + __syncthreads();
  123 + }
  124 + __syncthreads();
  125 +
  126 + for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  127 + {
  128 + __syncthreads();
  129 + if(yi < step)
  130 + {
  131 + shared[xi][yi] += shared[xi][yi + step];
  132 + }
  133 + __syncthreads();
  134 + }
  135 + __syncthreads();
  136 + if(xi == 0 && yi == 0)
  137 + res[idx] = shared[0][0];
  138 + }
  139 +
  140 + extern "C"
  141 + float *
  142 + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold)
  143 + {
  144 + tx.SetTextureCoordinates(1);
  145 + tx.SetAddressMode(1, 3);
  146 + tx.MapCudaTexture(texbufferID, texType);
  147 + float* result = (float*) malloc(DIM_X*DIM_Y*sizeof(float));
  148 +
  149 + initArray(DIM_X, DIM_Y, sizeK);
  150 +
  151 + filterKernel(sizeK, sigma, LoG);
  152 + cudaMemcpy(gpuLoG, LoG, sizeK*sizeK*sizeof(float), cudaMemcpyHostToDevice);
  153 + dim3 numBlocks(DIM_X, DIM_Y);
  154 + dim3 threadsPerBlock(sizeK, sizeK);
  155 +
  156 + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG);
  157 +
  158 +
  159 + stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y);
  160 +
  161 + cudaDeviceSynchronize();
  162 +
  163 +
  164 + cudaMemcpy(result, centers, DIM_X*DIM_Y*sizeof(float), cudaMemcpyDeviceToHost);
  165 +
  166 + tx.UnmapCudaTexture();
  167 + cleanUP();
  168 + return result;
  169 + }
  170 +
  171 + }
  172 +}
  173 +#endif
stim/cuda/filter.h deleted
1 -#include <assert.h>  
2 -#include <cuda.h>  
3 -#include <cuda_runtime.h>  
4 -#include <stdio.h>  
5 -#include <stim/visualization/colormap.h>  
6 -#include <sstream>  
7 -  
8 -#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )  
9 -  
10 -int kr;  
11 -int kl;  
12 -float sigma;  
13 -float* LoG;  
14 -float* result;  
15 -cudaArray* srcArray;  
16 -texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;  
17 -  
18 -  
19 -__device__ float filterKernel ()  
20 -{  
21 - float t = 0;  
22 - idx = j*kl+i;  
23 - for(int i = 0; i < kl; i++){  
24 - for(int j = 0; j < kl; j++){  
25 - x = i - floor(kl);  
26 - y = j - floor(kl);  
27 - LoG(idx) = (-1/M_PI/sigma^4)* (1 - (x^2+y^2)/2/sigma^2)  
28 - *exp(-(x^2+y^2)/2/sigma^2);  
29 - t +=LoG(idx);  
30 - }  
31 - }  
32 - LoG =/ t;  
33 -}  
34 -  
35 -void initArray(cudaGraphicsResource_t src, int DIM_X, int DIM_Y)  
36 -{  
37 - HANDLE_ERROR(  
38 - cudaGraphicsMapResources(1, &src)  
39 - );  
40 - HANDLE_ERROR(  
41 - cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0,0)  
42 - );  
43 - HANDLE_ERROR(  
44 - cudaBindTertureToArray(texIn, srcArray)  
45 - );  
46 - cudaMalloc( (void**) &LoG, kl*kl*sizeof(float));  
47 - checkCUDAerrors("Memory Allocation, LoG");  
48 - cudaMalloc( (void**) &result, DIM_Y*DIM_X*sizeof(float));  
49 - checkCUDAerrors("Memory Allocation, Result");  
50 -}  
51 -  
52 -void cleanUp(cudaGraphicsResource_t src);  
53 -{  
54 - HANDLE_ERROR(  
55 - cudaUnbindTexture(texIn)  
56 - );  
57 - HANDLE_ERROR(  
58 - cudaFree(LoG)  
59 - );  
60 - HANDLE_ERROR(  
61 - cudaFree(result)  
62 - );  
63 - HANDLE_ERROR(  
64 - cudaGraphicsUnmapResources(1, &src)  
65 - );  
66 -}  
67 -  
68 -//Shared memory would be better.  
69 -__global__  
70 -void  
71 -applyFilter(unsigned int DIM_X, unsigned int DIM_Y){  
72 -//R = floor(size/2)  
73 -//THIS IS A NAIVE WAY TO DO IT, and there is a better way)  
74 - //__shared__ float shared[(DIM_X+2*R), (DIM_Y+2*R)];  
75 -  
76 - const int x = IMAD(blockDim.x, blockIdx.x, threadIdx.x);  
77 - const int y = IMAD(blockDim.y, blockIdx.y, threadIdx.y);  
78 - float val = 0;  
79 - //x = max(0,x);  
80 - //x = min(x, width-1);  
81 - //y = max(y, 0);  
82 - //y = min(y, height - 1);  
83 -  
84 - int idx = y*DIM_X+x;  
85 - //unsigned int bindex = threadIdx.y * blockDim.y + threadIdx.x;  
86 -  
87 - //float valIn = tex2D(texIn, x, y);  
88 - for (int i = -kr; i <= kr; i++){ //rows  
89 - for (int j = -kr; i <= kr; j++){ //colls  
90 - k_idx = (j+kr)+(i+kr)*kl;  
91 - xi = max(0, x+i);  
92 - xi = min(x+i, DIM_X-1);  
93 - yj = max(y+j, 0);  
94 - yj = min(y+j, DIM_Y-1);  
95 - val += LoG(k_idx)*tex2D(texIn,x+i, y+j);  
96 - }  
97 - }  
98 -  
99 - result[idx] = val;  
100 -}  
stim/cuda/sharedmem.cuh
@@ -35,34 +35,6 @@ namespace stim{ @@ -35,34 +35,6 @@ namespace stim{
35 } 35 }
36 } 36 }
37 37
38 - template<typename T, typename D>  
39 - __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,  
40 - unsigned int x, unsigned int y, unsigned int X, unsigned int Y,  
41 - dim3 threadIdx, dim3 blockDim){  
42 -  
43 - //calculate the number of iterations required for the copy  
44 - unsigned int xI, yI;  
45 - xI = X/blockDim.x + 1; //number of iterations along X  
46 - yI = Y/blockDim.y + 1; //number of iterations along Y  
47 -  
48 - //for each iteration  
49 - for(unsigned int xi = 0; xi < xI; xi++){  
50 - for(unsigned int yi = 0; yi < yI; yi++){  
51 -  
52 - //calculate the index into shared memory  
53 - unsigned int sx = xi * blockDim.x + threadIdx.x;  
54 - unsigned int sy = yi * blockDim.y + threadIdx.y;  
55 -  
56 - //calculate the index into the texture  
57 - unsigned int tx = x + sx;  
58 - unsigned int ty = y + sy;  
59 -  
60 - //perform the copy  
61 - if(sx < X && sy < Y)  
62 - dest[sy * X + sx] = abs(255 - tex2D<D>(src, tx, ty));  
63 - }  
64 - }  
65 - }  
66 38
67 } 39 }
68 } 40 }
stim/gl/gl_spider.h
@@ -96,7 +96,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -96,7 +96,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
96 std::vector< stim::vec<float> > cD; //Direction of line currently being traced. 96 std::vector< stim::vec<float> > cD; //Direction of line currently being traced.
97 std::vector< stim::vec<float> > cM; //Magnitude of line currently being traced. 97 std::vector< stim::vec<float> > cM; //Magnitude of line currently being traced.
98 98
99 -// stim::glObj<float> sk; //object to store the skeleton.  
100 stim::glnetwork<float> nt; //object for storing the network. 99 stim::glnetwork<float> nt; //object for storing the network.
101 100
102 stim::vec<float> rev; //reverse vector; 101 stim::vec<float> rev; //reverse vector;
@@ -141,7 +140,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -141,7 +140,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
141 setMatrix(); //create the transformation matrix. 140 setMatrix(); //create the transformation matrix.
142 glCallList(dList+1); //move the templates to p, d, m. 141 glCallList(dList+1); //move the templates to p, d, m.
143 int best = getCost(ptexbufferID, numSamplesPos); //find min cost. 142 int best = getCost(ptexbufferID, numSamplesPos); //find min cost.
144 - std::cerr << best << std::endl;  
145 stim::vec<float> next( //find next position. 143 stim::vec<float> next( //find next position.
146 pV[best][0], 144 pV[best][0],
147 pV[best][1], 145 pV[best][1],
@@ -170,6 +168,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -170,6 +168,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
170 168
171 ///subject to change. 169 ///subject to change.
172 ///finds branches. 170 ///finds branches.
  171 + ///depreciated
173 void 172 void
174 branchDetection() 173 branchDetection()
175 { 174 {
@@ -197,14 +196,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -197,14 +196,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
197 -p[1] + cylp[1]*S[1]*R[1], 196 -p[1] + cylp[1]*S[1]*R[1],
198 -p[2] + cylp[2]*S[2]*R[2]); 197 -p[2] + cylp[2]*S[2]*R[2]);
199 seeddir = seeddir.norm(); 198 seeddir = seeddir.norm();
200 -// float seedm = m[0]/2.0;  
201 float seedm = m[0]; 199 float seedm = m[0];
202 // Uncomment for global run 200 // Uncomment for global run
203 -/* stim::vec<float> lSeed = getLastSeed();  
204 - if(sqrt(pow((lSeed[0] - vec[0]),2)  
205 - + pow((lSeed[1] - vec[1]),2) +  
206 - pow((lSeed[2] - vec[2]),2)) > m[0]/4.0  
207 - && */  
208 if( 201 if(
209 !(vec[0] > size[0] || vec[1] > size[1] 202 !(vec[0] > size[0] || vec[1] > size[1]
210 || vec[2] > size[2] || vec[0] < 0 203 || vec[2] > size[2] || vec[0] < 0
@@ -220,6 +213,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -220,6 +213,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
220 } 213 }
221 214
222 215
  216 + ///finds all the branches in the a given fiber.
  217 + ///using LoG method.
223 void 218 void
224 branchDetection2(int n = 8, int l_template = 8, int l_square = 8) 219 branchDetection2(int n = 8, int l_template = 8, int l_square = 8)
225 { 220 {
@@ -231,7 +226,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -231,7 +226,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
231 std::vector< stim::vec<float> > result = find_branch(btexbufferID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); 226 std::vector< stim::vec<float> > result = find_branch(btexbufferID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template);
232 stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); 227 stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]);
233 float pval; 228 float pval;
234 -// std::cerr << "the number of points is " << result.size() << std::endl;  
235 if(!result.empty()) 229 if(!result.empty())
236 { 230 {
237 for(int i = 0; i < result.size(); i++) 231 for(int i = 0; i < result.size(); i++)
@@ -242,7 +236,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -242,7 +236,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
242 236
243 pval = ((cyl.getl(id+1)-cyl.getl(id))* 237 pval = ((cyl.getl(id+1)-cyl.getl(id))*
244 (fmod(result[i][2], id))+cyl.getl(id))/cyl.getl(cL.size()-1); 238 (fmod(result[i][2], id))+cyl.getl(id))/cyl.getl(cL.size()-1);
245 -// std::cout << id << " " << cyl.getl(id) << " " << pval << " " << cyl.getl(cL.size()-1) << fmod(result[i][2], id) << std::endl;  
246 } 239 }
247 else if(id == 0) 240 else if(id == 0)
248 { 241 {
@@ -252,14 +245,9 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -252,14 +245,9 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
252 { 245 {
253 pval = (cyl.getl(id)/cyl.getl(cL.size()-1)); 246 pval = (cyl.getl(id)/cyl.getl(cL.size()-1));
254 } 247 }
255 -// std::cout << "Testing "<< i << ": " << result[i][0] << ", " << result[i][1] << ", " << result[i][2] << std::endl;  
256 -// std::cout << "Testing " << pval << std::endl;  
257 stim::vec<float> v = cyl.surf(pval, result[i][0]); 248 stim::vec<float> v = cyl.surf(pval, result[i][0]);
258 -// std::cout << v[0] << " ," << v[1] << " ," << v[2] << std::endl;  
259 stim::vec<float> di = cyl.p(pval); 249 stim::vec<float> di = cyl.p(pval);
260 -// std::cout << di[0] << " ," << di[1] << " ," << di[2] << std::endl;  
261 float rad = cyl.r(pval); 250 float rad = cyl.r(pval);
262 - std::cout << rad << std::endl;  
263 if( 251 if(
264 !(v[0] > size[0] || v[1] > size[1] 252 !(v[0] > size[0] || v[1] > size[1]
265 || v[2] > size[2] || v[0] < 0 253 || v[2] > size[2] || v[0] < 0
@@ -271,7 +259,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -271,7 +259,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
271 } 259 }
272 } 260 }
273 } 261 }
274 -// std::cout << "I ran the new branch detection" << std::endl;  
275 } 262 }
276 } 263 }
277 264
@@ -1506,10 +1493,10 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -1506,10 +1493,10 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1506 spos[2] = spos[2]-sdir[2]*smag[0]/2.; 1493 spos[2] = spos[2]-sdir[2]*smag[0]/2.;
1507 int h = selectObject(spos, -sdir, smag[0]); 1494 int h = selectObject(spos, -sdir, smag[0]);
1508 //did start with a fiber? 1495 //did start with a fiber?
1509 - if(h != -1){ 1496 + if(h != -1 && h != nt.sizeE()){
1510 // std::cout << "got here double" << smag.str() << std::endl; 1497 // std::cout << "got here double" << smag.str() << std::endl;
1511 nt.addEdge(ce,cm, h, in.second); 1498 nt.addEdge(ce,cm, h, in.second);
1512 - } 1499 + } else { nt.addEdge(ce,cm, -1, -1);}
1513 } 1500 }
1514 } 1501 }
1515 } 1502 }
@@ -1551,7 +1538,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -1551,7 +1538,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1551 { 1538 {
1552 int cost = Step(); 1539 int cost = Step();
1553 if (cost > min_cost){ 1540 if (cost > min_cost){
1554 - std::cout << "Cost Limit" << std::endl;  
1555 running = false; 1541 running = false;
1556 // sk.End(); 1542 // sk.End();
1557 branchDetection2(); 1543 branchDetection2();
@@ -1566,10 +1552,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -1566,10 +1552,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1566 || pos[2] > size[2] || pos[0] < 0 1552 || pos[2] > size[2] || pos[0] < 0
1567 || pos[1] < 0 || pos[2] < 0) 1553 || pos[1] < 0 || pos[2] < 0)
1568 { 1554 {
1569 - std::cout << "Edge Limit" << std::endl;  
1570 -// std::cout << "Found Edge" << std::endl;  
1571 running = false; 1555 running = false;
1572 -// sk.End();  
1573 branchDetection2(); 1556 branchDetection2();
1574 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1); 1557 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1);
1575 addToNetwork(a, spos, smag, sdir); 1558 addToNetwork(a, spos, smag, sdir);
@@ -1583,13 +1566,10 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -1583,13 +1566,10 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1583 rev = -getDirection(); 1566 rev = -getDirection();
1584 started = true; 1567 started = true;
1585 } 1568 }
1586 -// std::cout << i << p << std::endl;  
1587 //Has the template size gotten unreasonable? 1569 //Has the template size gotten unreasonable?
1588 mag = getMagnitude(); 1570 mag = getMagnitude();
1589 if(mag[0] > 75 || mag[0] < 1){ 1571 if(mag[0] > 75 || mag[0] < 1){
1590 - std::cout << "Magnitude Limit" << std::endl;  
1591 running = false; 1572 running = false;
1592 -// sk.End();  
1593 branchDetection2(); 1573 branchDetection2();
1594 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1); 1574 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1);
1595 addToNetwork(a, spos, smag, sdir); 1575 addToNetwork(a, spos, smag, sdir);
@@ -1601,9 +1581,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -1601,9 +1581,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1601 h = selectObject(p, getDirection(), m[0]); 1581 h = selectObject(p, getDirection(), m[0]);
1602 //Have we hit something previously traced? 1582 //Have we hit something previously traced?
1603 if(h != -1){ 1583 if(h != -1){
1604 - std::cout << "Hit Limit" << std::endl;  
1605 running = false; 1584 running = false;
1606 -// sk.End();  
1607 branchDetection2(); 1585 branchDetection2();
1608 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), h); 1586 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), h);
1609 addToNetwork(a, spos, smag, sdir); 1587 addToNetwork(a, spos, smag, sdir);
@@ -1613,14 +1591,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt; @@ -1613,14 +1591,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1613 else { 1591 else {
1614 cL.push_back(stim::vec<float>(p[0], p[1],p[2])); 1592 cL.push_back(stim::vec<float>(p[0], p[1],p[2]));
1615 cM.push_back(stim::vec<float>(m[0], m[0])); 1593 cM.push_back(stim::vec<float>(m[0], m[0]));
1616 -// cM.push_back(m[0]);  
1617 -  
1618 -// sk.TexCoord(m[0]);  
1619 -// sk.Vertex(p[0], p[1], p[2]);  
1620 Bind(btexbufferID, bfboID, 27); 1594 Bind(btexbufferID, bfboID, 27);
1621 - CHECK_OPENGL_ERROR  
1622 -// branchDetection();  
1623 - CHECK_OPENGL_ERROR  
1624 Unbind(); 1595 Unbind();
1625 CHECK_OPENGL_ERROR 1596 CHECK_OPENGL_ERROR
1626 1597