Commit 27194b5660d6d3a15b6deebfe219a9ee84ecc126

Authored by Pavel Govyadinov
1 parent 8761b649

major bug fixes, stopped using the ivote code for branch detection and started u…

…sing a general Laplassian of Gaussians algorithm (the algorithm invers the image atm), fixed a minor bug with the dynamic network creation
stim/cuda/branch_detection.cuh
... ... @@ -5,157 +5,31 @@
5 5 //#include <math.h>
6 6 #include <stim/visualization/colormap.h>
7 7 #include <stim/cuda/cuda_texture.cuh>
8   -#include <stim/cuda/templates/gradient.cuh>
9   -#include <stim/cuda/templates/gaussian_blur.cuh>
10   -#include <stim/cuda/arraymath.cuh>
11   -#include <stim/cuda/ivote.cuh>
12   -#include <stim/cuda/testKernel.cuh>
  8 +#include <stim/cuda/filter.cuh>
13 9 typedef unsigned int uint;
14 10 typedef unsigned int uchar;
15 11  
16   -stim::cuda::cuda_texture t;
17   -float* gpuTable;
18   -float* gpuGrad;
19   -float* gpuVote;
20   -float* gpuI;
21   -float* gpuCenters;
22   -
23   -void atan_2d(float* cpuTable, unsigned int rmax)
24   -{
25   - //initialize the width and height of the window which atan2 are computed in.
26   - int xsize = 2*rmax +1;
27   - int ysize = 2*rmax +1;
28   -
29   - // assign the center coordinates of the atan2 window to yi and xi
30   - int yi = rmax;
31   - int xi = rmax;
32   -
33   -
34   - for (int xt = 0; xt < xsize; xt++){
35   -
36   - for(int yt = 0; yt < ysize; yt++){
37   -
38   - //convert the current 2D coordinates to 1D
39   - int id = yt * xsize + xt;
40   - // calculate the distance between the pixel and the center of the atan2 window
41   - float xd = xi - xt;
42   - float yd = yi - yt;
43   -
44   - // calculate the angle between the pixel and the center of the atan2 window and store the result.
45   - float atan_2d_vote = atan2(yd, xd);
46   - cpuTable[id] = atan_2d_vote;
47   - }
48   - }
49   -
50   -}
51   -
52   -void initCuda(unsigned int bytes_table, unsigned int bytes_ds)
53   -{
54   - HANDLE_ERROR(
55   - cudaMalloc((void**) &gpuTable, bytes_table)
56   - );
57   - HANDLE_ERROR(
58   - cudaMalloc((void**) &gpuI, bytes_ds)
59   - );
60   - HANDLE_ERROR(
61   - cudaMalloc((void**) &gpuGrad, bytes_ds*2)
62   - );
63   - HANDLE_ERROR(
64   - cudaMalloc((void**) &gpuVote, bytes_ds)
65   - );
66   - HANDLE_ERROR(
67   - cudaMalloc((void**) &gpuCenters, bytes_ds)
68   - );
69   -}
70   -
71   -void cleanCuda()
72   -{
73   - HANDLE_ERROR(
74   - cudaFree(gpuTable)
75   - );
76   - HANDLE_ERROR(
77   - cudaFree(gpuGrad)
78   - );
79   - HANDLE_ERROR(
80   - cudaFree(gpuVote)
81   - );
82   - HANDLE_ERROR(
83   - cudaFree(gpuCenters)
84   - );
85   - HANDLE_ERROR(
86   - cudaFree(gpuI)
87   - );
88   -}
89 12  
90 13 std::vector< stim::vec<float> >
91 14 find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
92 15 {
93   - float phi = 15.1*M_PI/180;
94   - int iter = 5;
95   - float dphi = phi/iter;
96   - float rmax = 10;
97   - float sigma = 4;
98   - unsigned int pixels = x * y;
99   - unsigned int bytes = sizeof(float) * pixels;
100   - unsigned int bytes_table = sizeof(float) * (2*rmax + 1) * (2*rmax + 1);
101   - unsigned int x_ds = (x + (x % 1 == 0 ? 0:1));
102   - unsigned int y_ds = (y + (x % 1 == 0 ? 0:1));
103   - unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
104   - unsigned int conn = 10;
105   - float final_t = 200.0;
106   - float* cpuTable = (float*) malloc(bytes_table);
107   - float* cpuCenters = (float*) malloc(bytes_ds);
  16 + float sigma = 2.0;
  17 + unsigned int conn = 7;
  18 + float threshold = 40.0;
  19 + float* cpuCenters = (float*) malloc(x*y*sizeof(float));
  20 + int sizek = 7;
108 21  
109 22 stringstream name;
110 23  
111 24  
112   -
113   -
114   -// test(texbufferID, texType, x, y);
115   - std::vector<stim::vec<float> > output;
116   - initCuda(bytes_table, bytes_ds);
117   -
118   - atan_2d(cpuTable, rmax);
119   - cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
120   -
121   -
122   - t.MapCudaTexture(texbufferID, texType);
123   - cudaDeviceSynchronize();
124   - stim::cuda::tex_gaussian_blur2<float>(
125   - gpuI, sigma, x, y, t.getTexture(), t.getArray()
126   - );
127   -// stim::gpu2image<float>(gpuI, "Blur.jpg", x,y , 0, 255);
128   -// stim::gpu2image<float>(t.getArray(), "ORIGINAL.jpg", x,y , 0, 255);
  25 + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold);
129 26 cudaDeviceSynchronize();
130 27  
131 28  
132   - stim::cuda::gpu_gradient_2d<float>(
133   - gpuGrad, gpuI, x, y
134   - );
135   - cudaDeviceSynchronize();
136   -
137   - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
138   - cudaDeviceSynchronize();
139 29  
140   - cudaDeviceSynchronize();
141   - for (int i = 0; i < iter; i++)
142   - {
143   - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
144   - cudaDeviceSynchronize();
145   - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
146   - cudaDeviceSynchronize();
147   - phi = phi - dphi;
148   - }
  30 + std::vector<stim::vec<float> > output;
149 31  
150 32 cudaDeviceSynchronize();
151   - stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
152   -// stim::gpu2image<float>(gpuCenters, "Centers.jpg", x, y, 0, 1);
153   - cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
154   -// stim::cpu2image<float>(cpuCenters, "CentersXPU.jpg", x, y, 0, 1);
155   -// std::cerr << pixels << " " << x << " " << y << std::endl;
156   -// std::cerr << "y is " << y << ", x is " << x << std::endl;
157   -
158   -// std::cout << "Before " << output.size() << std::endl;
159 33  
160 34 for(int i = 0; i < x; i++)
161 35 {
... ... @@ -166,8 +40,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
166 40 {
167 41 float x_v = (float) i;
168 42 float y_v = (float) j;
169   - std::cout << x_v/x*360.0 << std::endl;
170   - std::cout << y_v/y << std::endl;
171 43 output.push_back(stim::vec<float>((x_v/(float)x*360.0),
172 44 (y_v), y_v/8));
173 45 }
... ... @@ -175,29 +47,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
175 47 }
176 48 }
177 49  
178   -/* for(int i = 0; i < pixels; i++)
179   - {
180   - int ix = (i % x);
181   - int iy = (i / x);
182   - if(cpuCenters[i] != 0)
183   - {
184   -
185   - float x_v = (float) ix;
186   - float y_v = (float) iy;
187   - std::cout << x_v/x*360 << std::endl;
188   - std::cout << y_v/y << std::endl;
189   - output.push_back(stim::vec<float>((x_v/(float)x*360),
190   - (y_v/(float)y), 0.0));
191   -
192   - }
193   - } */
194   -
195   -// std::cout << "After " << output.size() << std::endl;
196   -
197   -
198   - t.UnmapCudaTexture();
199   - cleanCuda();
200   - free(cpuTable);
201 50 free(cpuCenters);
202 51 return output;
203 52 }
... ...
stim/cuda/branch_detection2.cuh deleted
1   -#include <stim/cuda/templates/gaussian_blur.cuh>
2   -#include <stim/cuda/templates/gradient.cuh>
3   -#include <stim/cuda/arraymath.cuh>
4   -#include <stim/cuda/ivote.cuh>
5   -
6   -
7   -
8   -
9   -
10   -
11   -
12   -
13   -
14   -
15   -void atan_2(float* cpuTable, unsigned int rmax){
16   -
17   - //initialize the width and height of the window which atan2 are computed in.
18   - int xsize = 2*rmax +1;
19   - int ysize = 2*rmax +1;
20   -
21   - // assign the center coordinates of the atan2 window to yi and xi
22   - int yi = rmax;
23   - int xi = rmax;
24   -
25   -
26   - for (int xt = 0; xt < xsize; xt++){
27   -
28   - for(int yt = 0; yt < ysize; yt++){
29   -
30   - //convert the current 2D coordinates to 1D
31   - int id = yt * xsize + xt;
32   - // calculate the distance between the pixel and the center of the atan2 window
33   - float xd = xi - xt;
34   - float yd = yi - yt;
35   -
36   - // calculate the angle between the pixel and the center of the atan2 window and store the result.
37   - float atan_2d_vote = atan2(yd, xd);
38   - cpuTable[id] = atan_2d_vote;
39   - }
40   - }
41   -
42   -}
43   -std::vector<stim::vec<float> >
44   -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
45   -{
46   -
47   - float* cpuTable = (float
48   -
49   - unsigned int pixels = x * y;
50   - unsigned int bytes = sizeof(float) * pixels;
51   -
52   - //calculate the number of bytes in the atan2 table
53   -
54   - unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float);
55   -
56   -
57   -
58   - //allocate space on the GPU for the atan2 table
59   -
60   - float* gpuTable;
61   -
62   - cudaMalloc(&gpuTable, bytes_table);
63   -
64   -
65   -
66   - cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
67   -
68   - unsigned int sigma_ds = 1/resize;
69   - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
70   - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
71   - unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
72   -
73   -
74   - float* gpuI;
75   - cudaMalloc(&gpuI, bytes_ds);
76   -
77   -
78   - float* gpuGrad;
79   - cudaMalloc(&gpuGrad, bytes_ds*2);
80   -
81   - float* gpuVote;
82   - cudaMalloc(&gpuVote, bytes_ds);
83   -
84   - // allocate space on the GPU for the detected cell centes
85   -
86   - float* gpuCenters;
87   -
88   - cudaMalloc(&gpuCenters, bytes_ds);
89   -
90   -
91   - stim::cuda::gpu_down_sample<float>(gpuI, gpuI0, resize, x , y);
92   - cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
93   -
94   -x = x_ds;
95   - y = y_ds;
96   - t = t * resize;
97   - //sigma = sigma * resize;
98   -
99   - cudaDeviceSynchronize();
100   - stim::cuda::gpu_gaussian_blur2<float>(gpuI,sigma, x, y);
101   - cudaDeviceSynchronize();
102   - cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
103   - cudaDeviceSynchronize();
104   -
105   - stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);
106   - cudaDeviceSynchronize();
107   - cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
108   -
109   - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
110   - cudaDeviceSynchronize();
111   - cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
112   -
113   -
114   - //multiply the gradient by a constant and calculate the absolute value (to save an image)
115   -
116   - stim::cuda::cpu_multiply<float>(cpuCart2Polar, 40, x * y * 2);
117   -
118   - cudaDeviceSynchronize();
119   -
120   - stim::cuda::cpu_abs<float>(cpuCart2Polar, x * y * 2);
121   -
122   - cudaDeviceSynchronize();
123   -
124   -
125   - for (int i =0; i<iter; i++){
126   -
127   - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
128   - cudaDeviceSynchronize();
129   - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
130   - cudaDeviceSynchronize();
131   - switch (i){
132   - case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
133   - break;
134   - case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
135   - break;
136   - case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
137   - break;
138   - case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
139   - break;
140   - case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
141   - break;
142   - default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
143   - break;
144   - }
145   - phi = phi - dphi;
146   - }
147   -
148   - stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, t, conn, x, y);
149   - cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
150   -
151   -}
stim/cuda/cuda_texture.cuh
... ... @@ -41,6 +41,46 @@ namespace stim
41 41 texDesc.normalizedCoords = 0;
42 42 }
43 43  
  44 +
  45 + ///Enable the nromalized texture coordinates.
  46 + ///@param bool, 1 for on, 0 for off
  47 + void
  48 + SetTextureCoordinates(bool val)
  49 + {
  50 + if(val)
  51 + texDesc.normalizedCoords = 1;
  52 + else
  53 + texDesc.normalizedCoords = 0;
  54 + }
  55 +
  56 + ///sets the dimension dim to used the mode at the borders of the texture.
  57 + ///@param dim : 0-x, 1-y, 2-z
  58 + ///@param mode: cudaAddressModeWrap = 0,
  59 + /// cudaAddressModeClamp = 1,
  60 + /// cudaAddressMNodeMirror = 2,
  61 + /// cudaAddressModeBorder = 3,
  62 + void
  63 + SetAddressMode(int dim, int mode)
  64 + {
  65 + switch(mode)
  66 + {
  67 + case 0:
  68 + texDesc.addressMode[dim] = cudaAddressModeWrap;
  69 + break;
  70 + case 1:
  71 + texDesc.addressMode[dim] = cudaAddressModeClamp;
  72 + break;
  73 + case 2:
  74 + texDesc.addressMode[dim] = cudaAddressModeMirror;
  75 + break;
  76 + case 3:
  77 + texDesc.addressMode[dim] = cudaAddressModeBorder;
  78 + break;
  79 + default:
  80 + break;
  81 + }
  82 + }
  83 +
44 84 //-------------------------------------------------------------------------//
45 85 //-------------------------------CUDA_MAPPING------------------------------//
46 86 //-------------------------------------------------------------------------//
... ...
stim/cuda/filter.cuh 0 → 100644
  1 +#ifndef STIM_FILTER_H
  2 +#define STIM_FILTER_H
  3 +
  4 +#include <assert.h>
  5 +#include <cuda.h>
  6 +#include <cuda_runtime.h>
  7 +#include <stdio.h>
  8 +#include <stim/visualization/colormap.h>
  9 +#include <sstream>
  10 +#include <stim/cuda/cudatools/devices.h>
  11 +#include <stim/cuda/cudatools/threads.h>
  12 +#include <stim/cuda/cuda_texture.cuh>
  13 +#include <stim/cuda/ivote.cuh>
  14 +#include <stim/cuda/arraymath.cuh>
  15 +
  16 +#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )
  17 +#define M_PI 3.141592654f
  18 +
  19 +
  20 +namespace stim
  21 +{
  22 + namespace cuda
  23 + {
  24 +
  25 + float* gpuLoG;
  26 + float* LoG;
  27 + float* res;
  28 + float* centers;
  29 + stim::cuda::cuda_texture tx;
  30 +
  31 +
  32 +
  33 + void initArray(int DIM_X, int DIM_Y, int kl)
  34 + {
  35 +
  36 + LoG = (float*) malloc(kl*kl*sizeof(float));
  37 + HANDLE_ERROR(
  38 + cudaMalloc( (void**) &gpuLoG, kl*kl*sizeof(float))
  39 + );
  40 + // checkCUDAerrors("Memory Allocation, LoG");
  41 + HANDLE_ERROR(
  42 + cudaMalloc( (void**) &res, DIM_Y*DIM_X*sizeof(float))
  43 + );
  44 + HANDLE_ERROR(
  45 + cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float))
  46 + );
  47 + // checkCUDAerrors("Memory Allocation, Result");
  48 + }
  49 +
  50 + void cleanUp(cudaGraphicsResource_t src)
  51 + {
  52 + HANDLE_ERROR(
  53 + cudaFree(gpuLoG)
  54 + );
  55 + HANDLE_ERROR(
  56 + cudaFree(res)
  57 + );
  58 + HANDLE_ERROR(
  59 + cudaFree(centers)
  60 + );
  61 + free(LoG);
  62 + }
  63 +
  64 + void
  65 + filterKernel (float kl, float sigma, float *LoG)
  66 + {
  67 + float t = 0.0;
  68 + float kr = kl/2;
  69 + float x, y;
  70 + int idx;
  71 + for(int i = 0; i < kl; i++){
  72 + for(int j = 0; j < kl; j++){
  73 + idx = j*kl+i;
  74 + x = i - kr - 0.5;
  75 + y = j - kr - 0.5;
  76 + LoG[idx] = (-1.0/M_PI/powf(sigma, 4))* (1 - (powf(x,2)+powf(y,2))/2.0/powf(sigma, 2))
  77 + *expf(-(powf(x,2)+powf(y,2))/2/powf(sigma,2));
  78 + t +=LoG[idx];
  79 + }
  80 + }
  81 +
  82 + for(int i = 0; i < kl*kl; i++)
  83 + {
  84 + LoG[i] = LoG[i]/t;
  85 + }
  86 +
  87 + }
  88 +
  89 + //Shared memory would be better.
  90 + __global__
  91 + void
  92 + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){
  93 + //R = floor(size/2)
  94 + //THIS IS A NAIVE WAY TO DO IT, and there is a better way)
  95 +
  96 + __shared__ float shared[7][7];
  97 + int x = blockIdx.x;
  98 + int y = blockIdx.y;
  99 + int xi = threadIdx.x;
  100 + int yi = threadIdx.y;
  101 + float val = 0;
  102 + float tu = (x-kr+xi)/(float)DIM_X;
  103 + float tv = (y-kr+yi)/(float)DIM_Y;
  104 + shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
  105 + __syncthreads();
  106 +
  107 +
  108 + //x = max(0,x);
  109 + //x = min(x, width-1);
  110 + //y = max(y, 0);
  111 + //y = min(y, height - 1);
  112 +
  113 + int idx = y*DIM_X+x;
  114 + int k_idx;
  115 + for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  116 + {
  117 + __syncthreads();
  118 + if (xi < step)
  119 + {
  120 + shared[xi][yi] += shared[xi + step][yi];
  121 + }
  122 + __syncthreads();
  123 + }
  124 + __syncthreads();
  125 +
  126 + for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  127 + {
  128 + __syncthreads();
  129 + if(yi < step)
  130 + {
  131 + shared[xi][yi] += shared[xi][yi + step];
  132 + }
  133 + __syncthreads();
  134 + }
  135 + __syncthreads();
  136 + if(xi == 0 && yi == 0)
  137 + res[idx] = shared[0][0];
  138 + }
  139 +
  140 + extern "C"
  141 + float *
  142 + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold)
  143 + {
  144 + tx.SetTextureCoordinates(1);
  145 + tx.SetAddressMode(1, 3);
  146 + tx.MapCudaTexture(texbufferID, texType);
  147 + float* result = (float*) malloc(DIM_X*DIM_Y*sizeof(float));
  148 +
  149 + initArray(DIM_X, DIM_Y, sizeK);
  150 +
  151 + filterKernel(sizeK, sigma, LoG);
  152 + cudaMemcpy(gpuLoG, LoG, sizeK*sizeK*sizeof(float), cudaMemcpyHostToDevice);
  153 + dim3 numBlocks(DIM_X, DIM_Y);
  154 + dim3 threadsPerBlock(sizeK, sizeK);
  155 +
  156 + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG);
  157 +
  158 +
  159 + stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y);
  160 +
  161 + cudaDeviceSynchronize();
  162 +
  163 +
  164 + cudaMemcpy(result, centers, DIM_X*DIM_Y*sizeof(float), cudaMemcpyDeviceToHost);
  165 +
  166 + tx.UnmapCudaTexture();
  167 + cleanUP();
  168 + return result;
  169 + }
  170 +
  171 + }
  172 +}
  173 +#endif
... ...
stim/cuda/filter.h deleted
1   -#include <assert.h>
2   -#include <cuda.h>
3   -#include <cuda_runtime.h>
4   -#include <stdio.h>
5   -#include <stim/visualization/colormap.h>
6   -#include <sstream>
7   -
8   -#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )
9   -
10   -int kr;
11   -int kl;
12   -float sigma;
13   -float* LoG;
14   -float* result;
15   -cudaArray* srcArray;
16   -texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;
17   -
18   -
19   -__device__ float filterKernel ()
20   -{
21   - float t = 0;
22   - idx = j*kl+i;
23   - for(int i = 0; i < kl; i++){
24   - for(int j = 0; j < kl; j++){
25   - x = i - floor(kl);
26   - y = j - floor(kl);
27   - LoG(idx) = (-1/M_PI/sigma^4)* (1 - (x^2+y^2)/2/sigma^2)
28   - *exp(-(x^2+y^2)/2/sigma^2);
29   - t +=LoG(idx);
30   - }
31   - }
32   - LoG =/ t;
33   -}
34   -
35   -void initArray(cudaGraphicsResource_t src, int DIM_X, int DIM_Y)
36   -{
37   - HANDLE_ERROR(
38   - cudaGraphicsMapResources(1, &src)
39   - );
40   - HANDLE_ERROR(
41   - cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0,0)
42   - );
43   - HANDLE_ERROR(
44   - cudaBindTertureToArray(texIn, srcArray)
45   - );
46   - cudaMalloc( (void**) &LoG, kl*kl*sizeof(float));
47   - checkCUDAerrors("Memory Allocation, LoG");
48   - cudaMalloc( (void**) &result, DIM_Y*DIM_X*sizeof(float));
49   - checkCUDAerrors("Memory Allocation, Result");
50   -}
51   -
52   -void cleanUp(cudaGraphicsResource_t src);
53   -{
54   - HANDLE_ERROR(
55   - cudaUnbindTexture(texIn)
56   - );
57   - HANDLE_ERROR(
58   - cudaFree(LoG)
59   - );
60   - HANDLE_ERROR(
61   - cudaFree(result)
62   - );
63   - HANDLE_ERROR(
64   - cudaGraphicsUnmapResources(1, &src)
65   - );
66   -}
67   -
68   -//Shared memory would be better.
69   -__global__
70   -void
71   -applyFilter(unsigned int DIM_X, unsigned int DIM_Y){
72   -//R = floor(size/2)
73   -//THIS IS A NAIVE WAY TO DO IT, and there is a better way)
74   - //__shared__ float shared[(DIM_X+2*R), (DIM_Y+2*R)];
75   -
76   - const int x = IMAD(blockDim.x, blockIdx.x, threadIdx.x);
77   - const int y = IMAD(blockDim.y, blockIdx.y, threadIdx.y);
78   - float val = 0;
79   - //x = max(0,x);
80   - //x = min(x, width-1);
81   - //y = max(y, 0);
82   - //y = min(y, height - 1);
83   -
84   - int idx = y*DIM_X+x;
85   - //unsigned int bindex = threadIdx.y * blockDim.y + threadIdx.x;
86   -
87   - //float valIn = tex2D(texIn, x, y);
88   - for (int i = -kr; i <= kr; i++){ //rows
89   - for (int j = -kr; i <= kr; j++){ //colls
90   - k_idx = (j+kr)+(i+kr)*kl;
91   - xi = max(0, x+i);
92   - xi = min(x+i, DIM_X-1);
93   - yj = max(y+j, 0);
94   - yj = min(y+j, DIM_Y-1);
95   - val += LoG(k_idx)*tex2D(texIn,x+i, y+j);
96   - }
97   - }
98   -
99   - result[idx] = val;
100   -}
stim/cuda/sharedmem.cuh
... ... @@ -35,34 +35,6 @@ namespace stim{
35 35 }
36 36 }
37 37  
38   - template<typename T, typename D>
39   - __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
40   - unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
41   - dim3 threadIdx, dim3 blockDim){
42   -
43   - //calculate the number of iterations required for the copy
44   - unsigned int xI, yI;
45   - xI = X/blockDim.x + 1; //number of iterations along X
46   - yI = Y/blockDim.y + 1; //number of iterations along Y
47   -
48   - //for each iteration
49   - for(unsigned int xi = 0; xi < xI; xi++){
50   - for(unsigned int yi = 0; yi < yI; yi++){
51   -
52   - //calculate the index into shared memory
53   - unsigned int sx = xi * blockDim.x + threadIdx.x;
54   - unsigned int sy = yi * blockDim.y + threadIdx.y;
55   -
56   - //calculate the index into the texture
57   - unsigned int tx = x + sx;
58   - unsigned int ty = y + sy;
59   -
60   - //perform the copy
61   - if(sx < X && sy < Y)
62   - dest[sy * X + sx] = abs(255 - tex2D<D>(src, tx, ty));
63   - }
64   - }
65   - }
66 38  
67 39 }
68 40 }
... ...
stim/gl/gl_spider.h
... ... @@ -96,7 +96,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
96 96 std::vector< stim::vec<float> > cD; //Direction of line currently being traced.
97 97 std::vector< stim::vec<float> > cM; //Magnitude of line currently being traced.
98 98  
99   -// stim::glObj<float> sk; //object to store the skeleton.
100 99 stim::glnetwork<float> nt; //object for storing the network.
101 100  
102 101 stim::vec<float> rev; //reverse vector;
... ... @@ -141,7 +140,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
141 140 setMatrix(); //create the transformation matrix.
142 141 glCallList(dList+1); //move the templates to p, d, m.
143 142 int best = getCost(ptexbufferID, numSamplesPos); //find min cost.
144   - std::cerr << best << std::endl;
145 143 stim::vec<float> next( //find next position.
146 144 pV[best][0],
147 145 pV[best][1],
... ... @@ -170,6 +168,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
170 168  
171 169 ///subject to change.
172 170 ///finds branches.
  171 + ///depreciated
173 172 void
174 173 branchDetection()
175 174 {
... ... @@ -197,14 +196,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
197 196 -p[1] + cylp[1]*S[1]*R[1],
198 197 -p[2] + cylp[2]*S[2]*R[2]);
199 198 seeddir = seeddir.norm();
200   -// float seedm = m[0]/2.0;
201 199 float seedm = m[0];
202 200 // Uncomment for global run
203   -/* stim::vec<float> lSeed = getLastSeed();
204   - if(sqrt(pow((lSeed[0] - vec[0]),2)
205   - + pow((lSeed[1] - vec[1]),2) +
206   - pow((lSeed[2] - vec[2]),2)) > m[0]/4.0
207   - && */
208 201 if(
209 202 !(vec[0] > size[0] || vec[1] > size[1]
210 203 || vec[2] > size[2] || vec[0] < 0
... ... @@ -220,6 +213,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
220 213 }
221 214  
222 215  
  216 + ///finds all the branches in the a given fiber.
  217 + ///using LoG method.
223 218 void
224 219 branchDetection2(int n = 8, int l_template = 8, int l_square = 8)
225 220 {
... ... @@ -231,7 +226,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
231 226 std::vector< stim::vec<float> > result = find_branch(btexbufferID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template);
232 227 stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]);
233 228 float pval;
234   -// std::cerr << "the number of points is " << result.size() << std::endl;
235 229 if(!result.empty())
236 230 {
237 231 for(int i = 0; i < result.size(); i++)
... ... @@ -242,7 +236,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
242 236  
243 237 pval = ((cyl.getl(id+1)-cyl.getl(id))*
244 238 (fmod(result[i][2], id))+cyl.getl(id))/cyl.getl(cL.size()-1);
245   -// std::cout << id << " " << cyl.getl(id) << " " << pval << " " << cyl.getl(cL.size()-1) << fmod(result[i][2], id) << std::endl;
246 239 }
247 240 else if(id == 0)
248 241 {
... ... @@ -252,14 +245,9 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
252 245 {
253 246 pval = (cyl.getl(id)/cyl.getl(cL.size()-1));
254 247 }
255   -// std::cout << "Testing "<< i << ": " << result[i][0] << ", " << result[i][1] << ", " << result[i][2] << std::endl;
256   -// std::cout << "Testing " << pval << std::endl;
257 248 stim::vec<float> v = cyl.surf(pval, result[i][0]);
258   -// std::cout << v[0] << " ," << v[1] << " ," << v[2] << std::endl;
259 249 stim::vec<float> di = cyl.p(pval);
260   -// std::cout << di[0] << " ," << di[1] << " ," << di[2] << std::endl;
261 250 float rad = cyl.r(pval);
262   - std::cout << rad << std::endl;
263 251 if(
264 252 !(v[0] > size[0] || v[1] > size[1]
265 253 || v[2] > size[2] || v[0] < 0
... ... @@ -271,7 +259,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
271 259 }
272 260 }
273 261 }
274   -// std::cout << "I ran the new branch detection" << std::endl;
275 262 }
276 263 }
277 264  
... ... @@ -1506,10 +1493,10 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1506 1493 spos[2] = spos[2]-sdir[2]*smag[0]/2.;
1507 1494 int h = selectObject(spos, -sdir, smag[0]);
1508 1495 //did start with a fiber?
1509   - if(h != -1){
  1496 + if(h != -1 && h != nt.sizeE()){
1510 1497 // std::cout << "got here double" << smag.str() << std::endl;
1511 1498 nt.addEdge(ce,cm, h, in.second);
1512   - }
  1499 + } else { nt.addEdge(ce,cm, -1, -1);}
1513 1500 }
1514 1501 }
1515 1502 }
... ... @@ -1551,7 +1538,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1551 1538 {
1552 1539 int cost = Step();
1553 1540 if (cost > min_cost){
1554   - std::cout << "Cost Limit" << std::endl;
1555 1541 running = false;
1556 1542 // sk.End();
1557 1543 branchDetection2();
... ... @@ -1566,10 +1552,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1566 1552 || pos[2] > size[2] || pos[0] < 0
1567 1553 || pos[1] < 0 || pos[2] < 0)
1568 1554 {
1569   - std::cout << "Edge Limit" << std::endl;
1570   -// std::cout << "Found Edge" << std::endl;
1571 1555 running = false;
1572   -// sk.End();
1573 1556 branchDetection2();
1574 1557 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1);
1575 1558 addToNetwork(a, spos, smag, sdir);
... ... @@ -1583,13 +1566,10 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1583 1566 rev = -getDirection();
1584 1567 started = true;
1585 1568 }
1586   -// std::cout << i << p << std::endl;
1587 1569 //Has the template size gotten unreasonable?
1588 1570 mag = getMagnitude();
1589 1571 if(mag[0] > 75 || mag[0] < 1){
1590   - std::cout << "Magnitude Limit" << std::endl;
1591 1572 running = false;
1592   -// sk.End();
1593 1573 branchDetection2();
1594 1574 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1);
1595 1575 addToNetwork(a, spos, smag, sdir);
... ... @@ -1601,9 +1581,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1601 1581 h = selectObject(p, getDirection(), m[0]);
1602 1582 //Have we hit something previously traced?
1603 1583 if(h != -1){
1604   - std::cout << "Hit Limit" << std::endl;
1605 1584 running = false;
1606   -// sk.End();
1607 1585 branchDetection2();
1608 1586 pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), h);
1609 1587 addToNetwork(a, spos, smag, sdir);
... ... @@ -1613,14 +1591,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1613 1591 else {
1614 1592 cL.push_back(stim::vec<float>(p[0], p[1],p[2]));
1615 1593 cM.push_back(stim::vec<float>(m[0], m[0]));
1616   -// cM.push_back(m[0]);
1617   -
1618   -// sk.TexCoord(m[0]);
1619   -// sk.Vertex(p[0], p[1], p[2]);
1620 1594 Bind(btexbufferID, bfboID, 27);
1621   - CHECK_OPENGL_ERROR
1622   -// branchDetection();
1623   - CHECK_OPENGL_ERROR
1624 1595 Unbind();
1625 1596 CHECK_OPENGL_ERROR
1626 1597  
... ...