Commit 27194b5660d6d3a15b6deebfe219a9ee84ecc126
1 parent
8761b649
major bug fixes, stopped using the ivote code for branch detection and started u…
…sing a general Laplassian of Gaussians algorithm (the algorithm invers the image atm), fixed a minor bug with the dynamic network creation
Showing
7 changed files
with
226 additions
and
472 deletions
Show diff stats
stim/cuda/branch_detection.cuh
... | ... | @@ -5,157 +5,31 @@ |
5 | 5 | //#include <math.h> |
6 | 6 | #include <stim/visualization/colormap.h> |
7 | 7 | #include <stim/cuda/cuda_texture.cuh> |
8 | -#include <stim/cuda/templates/gradient.cuh> | |
9 | -#include <stim/cuda/templates/gaussian_blur.cuh> | |
10 | -#include <stim/cuda/arraymath.cuh> | |
11 | -#include <stim/cuda/ivote.cuh> | |
12 | -#include <stim/cuda/testKernel.cuh> | |
8 | +#include <stim/cuda/filter.cuh> | |
13 | 9 | typedef unsigned int uint; |
14 | 10 | typedef unsigned int uchar; |
15 | 11 | |
16 | -stim::cuda::cuda_texture t; | |
17 | -float* gpuTable; | |
18 | -float* gpuGrad; | |
19 | -float* gpuVote; | |
20 | -float* gpuI; | |
21 | -float* gpuCenters; | |
22 | - | |
23 | -void atan_2d(float* cpuTable, unsigned int rmax) | |
24 | -{ | |
25 | - //initialize the width and height of the window which atan2 are computed in. | |
26 | - int xsize = 2*rmax +1; | |
27 | - int ysize = 2*rmax +1; | |
28 | - | |
29 | - // assign the center coordinates of the atan2 window to yi and xi | |
30 | - int yi = rmax; | |
31 | - int xi = rmax; | |
32 | - | |
33 | - | |
34 | - for (int xt = 0; xt < xsize; xt++){ | |
35 | - | |
36 | - for(int yt = 0; yt < ysize; yt++){ | |
37 | - | |
38 | - //convert the current 2D coordinates to 1D | |
39 | - int id = yt * xsize + xt; | |
40 | - // calculate the distance between the pixel and the center of the atan2 window | |
41 | - float xd = xi - xt; | |
42 | - float yd = yi - yt; | |
43 | - | |
44 | - // calculate the angle between the pixel and the center of the atan2 window and store the result. | |
45 | - float atan_2d_vote = atan2(yd, xd); | |
46 | - cpuTable[id] = atan_2d_vote; | |
47 | - } | |
48 | - } | |
49 | - | |
50 | -} | |
51 | - | |
52 | -void initCuda(unsigned int bytes_table, unsigned int bytes_ds) | |
53 | -{ | |
54 | - HANDLE_ERROR( | |
55 | - cudaMalloc((void**) &gpuTable, bytes_table) | |
56 | - ); | |
57 | - HANDLE_ERROR( | |
58 | - cudaMalloc((void**) &gpuI, bytes_ds) | |
59 | - ); | |
60 | - HANDLE_ERROR( | |
61 | - cudaMalloc((void**) &gpuGrad, bytes_ds*2) | |
62 | - ); | |
63 | - HANDLE_ERROR( | |
64 | - cudaMalloc((void**) &gpuVote, bytes_ds) | |
65 | - ); | |
66 | - HANDLE_ERROR( | |
67 | - cudaMalloc((void**) &gpuCenters, bytes_ds) | |
68 | - ); | |
69 | -} | |
70 | - | |
71 | -void cleanCuda() | |
72 | -{ | |
73 | - HANDLE_ERROR( | |
74 | - cudaFree(gpuTable) | |
75 | - ); | |
76 | - HANDLE_ERROR( | |
77 | - cudaFree(gpuGrad) | |
78 | - ); | |
79 | - HANDLE_ERROR( | |
80 | - cudaFree(gpuVote) | |
81 | - ); | |
82 | - HANDLE_ERROR( | |
83 | - cudaFree(gpuCenters) | |
84 | - ); | |
85 | - HANDLE_ERROR( | |
86 | - cudaFree(gpuI) | |
87 | - ); | |
88 | -} | |
89 | 12 | |
90 | 13 | std::vector< stim::vec<float> > |
91 | 14 | find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) |
92 | 15 | { |
93 | - float phi = 15.1*M_PI/180; | |
94 | - int iter = 5; | |
95 | - float dphi = phi/iter; | |
96 | - float rmax = 10; | |
97 | - float sigma = 4; | |
98 | - unsigned int pixels = x * y; | |
99 | - unsigned int bytes = sizeof(float) * pixels; | |
100 | - unsigned int bytes_table = sizeof(float) * (2*rmax + 1) * (2*rmax + 1); | |
101 | - unsigned int x_ds = (x + (x % 1 == 0 ? 0:1)); | |
102 | - unsigned int y_ds = (y + (x % 1 == 0 ? 0:1)); | |
103 | - unsigned int bytes_ds = sizeof(float) * x_ds * y_ds; | |
104 | - unsigned int conn = 10; | |
105 | - float final_t = 200.0; | |
106 | - float* cpuTable = (float*) malloc(bytes_table); | |
107 | - float* cpuCenters = (float*) malloc(bytes_ds); | |
16 | + float sigma = 2.0; | |
17 | + unsigned int conn = 7; | |
18 | + float threshold = 40.0; | |
19 | + float* cpuCenters = (float*) malloc(x*y*sizeof(float)); | |
20 | + int sizek = 7; | |
108 | 21 | |
109 | 22 | stringstream name; |
110 | 23 | |
111 | 24 | |
112 | - | |
113 | - | |
114 | -// test(texbufferID, texType, x, y); | |
115 | - std::vector<stim::vec<float> > output; | |
116 | - initCuda(bytes_table, bytes_ds); | |
117 | - | |
118 | - atan_2d(cpuTable, rmax); | |
119 | - cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice); | |
120 | - | |
121 | - | |
122 | - t.MapCudaTexture(texbufferID, texType); | |
123 | - cudaDeviceSynchronize(); | |
124 | - stim::cuda::tex_gaussian_blur2<float>( | |
125 | - gpuI, sigma, x, y, t.getTexture(), t.getArray() | |
126 | - ); | |
127 | -// stim::gpu2image<float>(gpuI, "Blur.jpg", x,y , 0, 255); | |
128 | -// stim::gpu2image<float>(t.getArray(), "ORIGINAL.jpg", x,y , 0, 255); | |
25 | + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold); | |
129 | 26 | cudaDeviceSynchronize(); |
130 | 27 | |
131 | 28 | |
132 | - stim::cuda::gpu_gradient_2d<float>( | |
133 | - gpuGrad, gpuI, x, y | |
134 | - ); | |
135 | - cudaDeviceSynchronize(); | |
136 | - | |
137 | - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); | |
138 | - cudaDeviceSynchronize(); | |
139 | 29 | |
140 | - cudaDeviceSynchronize(); | |
141 | - for (int i = 0; i < iter; i++) | |
142 | - { | |
143 | - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); | |
144 | - cudaDeviceSynchronize(); | |
145 | - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); | |
146 | - cudaDeviceSynchronize(); | |
147 | - phi = phi - dphi; | |
148 | - } | |
30 | + std::vector<stim::vec<float> > output; | |
149 | 31 | |
150 | 32 | cudaDeviceSynchronize(); |
151 | - stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y); | |
152 | -// stim::gpu2image<float>(gpuCenters, "Centers.jpg", x, y, 0, 1); | |
153 | - cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost); | |
154 | -// stim::cpu2image<float>(cpuCenters, "CentersXPU.jpg", x, y, 0, 1); | |
155 | -// std::cerr << pixels << " " << x << " " << y << std::endl; | |
156 | -// std::cerr << "y is " << y << ", x is " << x << std::endl; | |
157 | - | |
158 | -// std::cout << "Before " << output.size() << std::endl; | |
159 | 33 | |
160 | 34 | for(int i = 0; i < x; i++) |
161 | 35 | { |
... | ... | @@ -166,8 +40,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) |
166 | 40 | { |
167 | 41 | float x_v = (float) i; |
168 | 42 | float y_v = (float) j; |
169 | - std::cout << x_v/x*360.0 << std::endl; | |
170 | - std::cout << y_v/y << std::endl; | |
171 | 43 | output.push_back(stim::vec<float>((x_v/(float)x*360.0), |
172 | 44 | (y_v), y_v/8)); |
173 | 45 | } |
... | ... | @@ -175,29 +47,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) |
175 | 47 | } |
176 | 48 | } |
177 | 49 | |
178 | -/* for(int i = 0; i < pixels; i++) | |
179 | - { | |
180 | - int ix = (i % x); | |
181 | - int iy = (i / x); | |
182 | - if(cpuCenters[i] != 0) | |
183 | - { | |
184 | - | |
185 | - float x_v = (float) ix; | |
186 | - float y_v = (float) iy; | |
187 | - std::cout << x_v/x*360 << std::endl; | |
188 | - std::cout << y_v/y << std::endl; | |
189 | - output.push_back(stim::vec<float>((x_v/(float)x*360), | |
190 | - (y_v/(float)y), 0.0)); | |
191 | - | |
192 | - } | |
193 | - } */ | |
194 | - | |
195 | -// std::cout << "After " << output.size() << std::endl; | |
196 | - | |
197 | - | |
198 | - t.UnmapCudaTexture(); | |
199 | - cleanCuda(); | |
200 | - free(cpuTable); | |
201 | 50 | free(cpuCenters); |
202 | 51 | return output; |
203 | 52 | } | ... | ... |
stim/cuda/branch_detection2.cuh deleted
1 | -#include <stim/cuda/templates/gaussian_blur.cuh> | |
2 | -#include <stim/cuda/templates/gradient.cuh> | |
3 | -#include <stim/cuda/arraymath.cuh> | |
4 | -#include <stim/cuda/ivote.cuh> | |
5 | - | |
6 | - | |
7 | - | |
8 | - | |
9 | - | |
10 | - | |
11 | - | |
12 | - | |
13 | - | |
14 | - | |
15 | -void atan_2(float* cpuTable, unsigned int rmax){ | |
16 | - | |
17 | - //initialize the width and height of the window which atan2 are computed in. | |
18 | - int xsize = 2*rmax +1; | |
19 | - int ysize = 2*rmax +1; | |
20 | - | |
21 | - // assign the center coordinates of the atan2 window to yi and xi | |
22 | - int yi = rmax; | |
23 | - int xi = rmax; | |
24 | - | |
25 | - | |
26 | - for (int xt = 0; xt < xsize; xt++){ | |
27 | - | |
28 | - for(int yt = 0; yt < ysize; yt++){ | |
29 | - | |
30 | - //convert the current 2D coordinates to 1D | |
31 | - int id = yt * xsize + xt; | |
32 | - // calculate the distance between the pixel and the center of the atan2 window | |
33 | - float xd = xi - xt; | |
34 | - float yd = yi - yt; | |
35 | - | |
36 | - // calculate the angle between the pixel and the center of the atan2 window and store the result. | |
37 | - float atan_2d_vote = atan2(yd, xd); | |
38 | - cpuTable[id] = atan_2d_vote; | |
39 | - } | |
40 | - } | |
41 | - | |
42 | -} | |
43 | -std::vector<stim::vec<float> > | |
44 | -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) | |
45 | -{ | |
46 | - | |
47 | - float* cpuTable = (float | |
48 | - | |
49 | - unsigned int pixels = x * y; | |
50 | - unsigned int bytes = sizeof(float) * pixels; | |
51 | - | |
52 | - //calculate the number of bytes in the atan2 table | |
53 | - | |
54 | - unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float); | |
55 | - | |
56 | - | |
57 | - | |
58 | - //allocate space on the GPU for the atan2 table | |
59 | - | |
60 | - float* gpuTable; | |
61 | - | |
62 | - cudaMalloc(&gpuTable, bytes_table); | |
63 | - | |
64 | - | |
65 | - | |
66 | - cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice); | |
67 | - | |
68 | - unsigned int sigma_ds = 1/resize; | |
69 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | |
70 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | |
71 | - unsigned int bytes_ds = sizeof(float) * x_ds * y_ds; | |
72 | - | |
73 | - | |
74 | - float* gpuI; | |
75 | - cudaMalloc(&gpuI, bytes_ds); | |
76 | - | |
77 | - | |
78 | - float* gpuGrad; | |
79 | - cudaMalloc(&gpuGrad, bytes_ds*2); | |
80 | - | |
81 | - float* gpuVote; | |
82 | - cudaMalloc(&gpuVote, bytes_ds); | |
83 | - | |
84 | - // allocate space on the GPU for the detected cell centes | |
85 | - | |
86 | - float* gpuCenters; | |
87 | - | |
88 | - cudaMalloc(&gpuCenters, bytes_ds); | |
89 | - | |
90 | - | |
91 | - stim::cuda::gpu_down_sample<float>(gpuI, gpuI0, resize, x , y); | |
92 | - cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost); | |
93 | - | |
94 | -x = x_ds; | |
95 | - y = y_ds; | |
96 | - t = t * resize; | |
97 | - //sigma = sigma * resize; | |
98 | - | |
99 | - cudaDeviceSynchronize(); | |
100 | - stim::cuda::gpu_gaussian_blur2<float>(gpuI,sigma, x, y); | |
101 | - cudaDeviceSynchronize(); | |
102 | - cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost); | |
103 | - cudaDeviceSynchronize(); | |
104 | - | |
105 | - stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); | |
106 | - cudaDeviceSynchronize(); | |
107 | - cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost); | |
108 | - | |
109 | - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); | |
110 | - cudaDeviceSynchronize(); | |
111 | - cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost); | |
112 | - | |
113 | - | |
114 | - //multiply the gradient by a constant and calculate the absolute value (to save an image) | |
115 | - | |
116 | - stim::cuda::cpu_multiply<float>(cpuCart2Polar, 40, x * y * 2); | |
117 | - | |
118 | - cudaDeviceSynchronize(); | |
119 | - | |
120 | - stim::cuda::cpu_abs<float>(cpuCart2Polar, x * y * 2); | |
121 | - | |
122 | - cudaDeviceSynchronize(); | |
123 | - | |
124 | - | |
125 | - for (int i =0; i<iter; i++){ | |
126 | - | |
127 | - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); | |
128 | - cudaDeviceSynchronize(); | |
129 | - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); | |
130 | - cudaDeviceSynchronize(); | |
131 | - switch (i){ | |
132 | - case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); | |
133 | - break; | |
134 | - case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); | |
135 | - break; | |
136 | - case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); | |
137 | - break; | |
138 | - case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); | |
139 | - break; | |
140 | - case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); | |
141 | - break; | |
142 | - default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); | |
143 | - break; | |
144 | - } | |
145 | - phi = phi - dphi; | |
146 | - } | |
147 | - | |
148 | - stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, t, conn, x, y); | |
149 | - cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost); | |
150 | - | |
151 | -} |
stim/cuda/cuda_texture.cuh
... | ... | @@ -41,6 +41,46 @@ namespace stim |
41 | 41 | texDesc.normalizedCoords = 0; |
42 | 42 | } |
43 | 43 | |
44 | + | |
45 | + ///Enable the nromalized texture coordinates. | |
46 | + ///@param bool, 1 for on, 0 for off | |
47 | + void | |
48 | + SetTextureCoordinates(bool val) | |
49 | + { | |
50 | + if(val) | |
51 | + texDesc.normalizedCoords = 1; | |
52 | + else | |
53 | + texDesc.normalizedCoords = 0; | |
54 | + } | |
55 | + | |
56 | + ///sets the dimension dim to used the mode at the borders of the texture. | |
57 | + ///@param dim : 0-x, 1-y, 2-z | |
58 | + ///@param mode: cudaAddressModeWrap = 0, | |
59 | + /// cudaAddressModeClamp = 1, | |
60 | + /// cudaAddressMNodeMirror = 2, | |
61 | + /// cudaAddressModeBorder = 3, | |
62 | + void | |
63 | + SetAddressMode(int dim, int mode) | |
64 | + { | |
65 | + switch(mode) | |
66 | + { | |
67 | + case 0: | |
68 | + texDesc.addressMode[dim] = cudaAddressModeWrap; | |
69 | + break; | |
70 | + case 1: | |
71 | + texDesc.addressMode[dim] = cudaAddressModeClamp; | |
72 | + break; | |
73 | + case 2: | |
74 | + texDesc.addressMode[dim] = cudaAddressModeMirror; | |
75 | + break; | |
76 | + case 3: | |
77 | + texDesc.addressMode[dim] = cudaAddressModeBorder; | |
78 | + break; | |
79 | + default: | |
80 | + break; | |
81 | + } | |
82 | + } | |
83 | + | |
44 | 84 | //-------------------------------------------------------------------------// |
45 | 85 | //-------------------------------CUDA_MAPPING------------------------------// |
46 | 86 | //-------------------------------------------------------------------------// | ... | ... |
1 | +#ifndef STIM_FILTER_H | |
2 | +#define STIM_FILTER_H | |
3 | + | |
4 | +#include <assert.h> | |
5 | +#include <cuda.h> | |
6 | +#include <cuda_runtime.h> | |
7 | +#include <stdio.h> | |
8 | +#include <stim/visualization/colormap.h> | |
9 | +#include <sstream> | |
10 | +#include <stim/cuda/cudatools/devices.h> | |
11 | +#include <stim/cuda/cudatools/threads.h> | |
12 | +#include <stim/cuda/cuda_texture.cuh> | |
13 | +#include <stim/cuda/ivote.cuh> | |
14 | +#include <stim/cuda/arraymath.cuh> | |
15 | + | |
16 | +#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) ) | |
17 | +#define M_PI 3.141592654f | |
18 | + | |
19 | + | |
20 | +namespace stim | |
21 | +{ | |
22 | + namespace cuda | |
23 | + { | |
24 | + | |
25 | + float* gpuLoG; | |
26 | + float* LoG; | |
27 | + float* res; | |
28 | + float* centers; | |
29 | + stim::cuda::cuda_texture tx; | |
30 | + | |
31 | + | |
32 | + | |
33 | + void initArray(int DIM_X, int DIM_Y, int kl) | |
34 | + { | |
35 | + | |
36 | + LoG = (float*) malloc(kl*kl*sizeof(float)); | |
37 | + HANDLE_ERROR( | |
38 | + cudaMalloc( (void**) &gpuLoG, kl*kl*sizeof(float)) | |
39 | + ); | |
40 | + // checkCUDAerrors("Memory Allocation, LoG"); | |
41 | + HANDLE_ERROR( | |
42 | + cudaMalloc( (void**) &res, DIM_Y*DIM_X*sizeof(float)) | |
43 | + ); | |
44 | + HANDLE_ERROR( | |
45 | + cudaMalloc( (void**) ¢ers, DIM_Y*DIM_X*sizeof(float)) | |
46 | + ); | |
47 | + // checkCUDAerrors("Memory Allocation, Result"); | |
48 | + } | |
49 | + | |
50 | + void cleanUp(cudaGraphicsResource_t src) | |
51 | + { | |
52 | + HANDLE_ERROR( | |
53 | + cudaFree(gpuLoG) | |
54 | + ); | |
55 | + HANDLE_ERROR( | |
56 | + cudaFree(res) | |
57 | + ); | |
58 | + HANDLE_ERROR( | |
59 | + cudaFree(centers) | |
60 | + ); | |
61 | + free(LoG); | |
62 | + } | |
63 | + | |
64 | + void | |
65 | + filterKernel (float kl, float sigma, float *LoG) | |
66 | + { | |
67 | + float t = 0.0; | |
68 | + float kr = kl/2; | |
69 | + float x, y; | |
70 | + int idx; | |
71 | + for(int i = 0; i < kl; i++){ | |
72 | + for(int j = 0; j < kl; j++){ | |
73 | + idx = j*kl+i; | |
74 | + x = i - kr - 0.5; | |
75 | + y = j - kr - 0.5; | |
76 | + LoG[idx] = (-1.0/M_PI/powf(sigma, 4))* (1 - (powf(x,2)+powf(y,2))/2.0/powf(sigma, 2)) | |
77 | + *expf(-(powf(x,2)+powf(y,2))/2/powf(sigma,2)); | |
78 | + t +=LoG[idx]; | |
79 | + } | |
80 | + } | |
81 | + | |
82 | + for(int i = 0; i < kl*kl; i++) | |
83 | + { | |
84 | + LoG[i] = LoG[i]/t; | |
85 | + } | |
86 | + | |
87 | + } | |
88 | + | |
89 | + //Shared memory would be better. | |
90 | + __global__ | |
91 | + void | |
92 | + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){ | |
93 | + //R = floor(size/2) | |
94 | + //THIS IS A NAIVE WAY TO DO IT, and there is a better way) | |
95 | + | |
96 | + __shared__ float shared[7][7]; | |
97 | + int x = blockIdx.x; | |
98 | + int y = blockIdx.y; | |
99 | + int xi = threadIdx.x; | |
100 | + int yi = threadIdx.y; | |
101 | + float val = 0; | |
102 | + float tu = (x-kr+xi)/(float)DIM_X; | |
103 | + float tv = (y-kr+yi)/(float)DIM_Y; | |
104 | + shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv)); | |
105 | + __syncthreads(); | |
106 | + | |
107 | + | |
108 | + //x = max(0,x); | |
109 | + //x = min(x, width-1); | |
110 | + //y = max(y, 0); | |
111 | + //y = min(y, height - 1); | |
112 | + | |
113 | + int idx = y*DIM_X+x; | |
114 | + int k_idx; | |
115 | + for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1) | |
116 | + { | |
117 | + __syncthreads(); | |
118 | + if (xi < step) | |
119 | + { | |
120 | + shared[xi][yi] += shared[xi + step][yi]; | |
121 | + } | |
122 | + __syncthreads(); | |
123 | + } | |
124 | + __syncthreads(); | |
125 | + | |
126 | + for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1) | |
127 | + { | |
128 | + __syncthreads(); | |
129 | + if(yi < step) | |
130 | + { | |
131 | + shared[xi][yi] += shared[xi][yi + step]; | |
132 | + } | |
133 | + __syncthreads(); | |
134 | + } | |
135 | + __syncthreads(); | |
136 | + if(xi == 0 && yi == 0) | |
137 | + res[idx] = shared[0][0]; | |
138 | + } | |
139 | + | |
140 | + extern "C" | |
141 | + float * | |
142 | + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold) | |
143 | + { | |
144 | + tx.SetTextureCoordinates(1); | |
145 | + tx.SetAddressMode(1, 3); | |
146 | + tx.MapCudaTexture(texbufferID, texType); | |
147 | + float* result = (float*) malloc(DIM_X*DIM_Y*sizeof(float)); | |
148 | + | |
149 | + initArray(DIM_X, DIM_Y, sizeK); | |
150 | + | |
151 | + filterKernel(sizeK, sigma, LoG); | |
152 | + cudaMemcpy(gpuLoG, LoG, sizeK*sizeK*sizeof(float), cudaMemcpyHostToDevice); | |
153 | + dim3 numBlocks(DIM_X, DIM_Y); | |
154 | + dim3 threadsPerBlock(sizeK, sizeK); | |
155 | + | |
156 | + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG); | |
157 | + | |
158 | + | |
159 | + stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y); | |
160 | + | |
161 | + cudaDeviceSynchronize(); | |
162 | + | |
163 | + | |
164 | + cudaMemcpy(result, centers, DIM_X*DIM_Y*sizeof(float), cudaMemcpyDeviceToHost); | |
165 | + | |
166 | + tx.UnmapCudaTexture(); | |
167 | + cleanUP(); | |
168 | + return result; | |
169 | + } | |
170 | + | |
171 | + } | |
172 | +} | |
173 | +#endif | ... | ... |
stim/cuda/filter.h deleted
1 | -#include <assert.h> | |
2 | -#include <cuda.h> | |
3 | -#include <cuda_runtime.h> | |
4 | -#include <stdio.h> | |
5 | -#include <stim/visualization/colormap.h> | |
6 | -#include <sstream> | |
7 | - | |
8 | -#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) ) | |
9 | - | |
10 | -int kr; | |
11 | -int kl; | |
12 | -float sigma; | |
13 | -float* LoG; | |
14 | -float* result; | |
15 | -cudaArray* srcArray; | |
16 | -texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn; | |
17 | - | |
18 | - | |
19 | -__device__ float filterKernel () | |
20 | -{ | |
21 | - float t = 0; | |
22 | - idx = j*kl+i; | |
23 | - for(int i = 0; i < kl; i++){ | |
24 | - for(int j = 0; j < kl; j++){ | |
25 | - x = i - floor(kl); | |
26 | - y = j - floor(kl); | |
27 | - LoG(idx) = (-1/M_PI/sigma^4)* (1 - (x^2+y^2)/2/sigma^2) | |
28 | - *exp(-(x^2+y^2)/2/sigma^2); | |
29 | - t +=LoG(idx); | |
30 | - } | |
31 | - } | |
32 | - LoG =/ t; | |
33 | -} | |
34 | - | |
35 | -void initArray(cudaGraphicsResource_t src, int DIM_X, int DIM_Y) | |
36 | -{ | |
37 | - HANDLE_ERROR( | |
38 | - cudaGraphicsMapResources(1, &src) | |
39 | - ); | |
40 | - HANDLE_ERROR( | |
41 | - cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0,0) | |
42 | - ); | |
43 | - HANDLE_ERROR( | |
44 | - cudaBindTertureToArray(texIn, srcArray) | |
45 | - ); | |
46 | - cudaMalloc( (void**) &LoG, kl*kl*sizeof(float)); | |
47 | - checkCUDAerrors("Memory Allocation, LoG"); | |
48 | - cudaMalloc( (void**) &result, DIM_Y*DIM_X*sizeof(float)); | |
49 | - checkCUDAerrors("Memory Allocation, Result"); | |
50 | -} | |
51 | - | |
52 | -void cleanUp(cudaGraphicsResource_t src); | |
53 | -{ | |
54 | - HANDLE_ERROR( | |
55 | - cudaUnbindTexture(texIn) | |
56 | - ); | |
57 | - HANDLE_ERROR( | |
58 | - cudaFree(LoG) | |
59 | - ); | |
60 | - HANDLE_ERROR( | |
61 | - cudaFree(result) | |
62 | - ); | |
63 | - HANDLE_ERROR( | |
64 | - cudaGraphicsUnmapResources(1, &src) | |
65 | - ); | |
66 | -} | |
67 | - | |
68 | -//Shared memory would be better. | |
69 | -__global__ | |
70 | -void | |
71 | -applyFilter(unsigned int DIM_X, unsigned int DIM_Y){ | |
72 | -//R = floor(size/2) | |
73 | -//THIS IS A NAIVE WAY TO DO IT, and there is a better way) | |
74 | - //__shared__ float shared[(DIM_X+2*R), (DIM_Y+2*R)]; | |
75 | - | |
76 | - const int x = IMAD(blockDim.x, blockIdx.x, threadIdx.x); | |
77 | - const int y = IMAD(blockDim.y, blockIdx.y, threadIdx.y); | |
78 | - float val = 0; | |
79 | - //x = max(0,x); | |
80 | - //x = min(x, width-1); | |
81 | - //y = max(y, 0); | |
82 | - //y = min(y, height - 1); | |
83 | - | |
84 | - int idx = y*DIM_X+x; | |
85 | - //unsigned int bindex = threadIdx.y * blockDim.y + threadIdx.x; | |
86 | - | |
87 | - //float valIn = tex2D(texIn, x, y); | |
88 | - for (int i = -kr; i <= kr; i++){ //rows | |
89 | - for (int j = -kr; i <= kr; j++){ //colls | |
90 | - k_idx = (j+kr)+(i+kr)*kl; | |
91 | - xi = max(0, x+i); | |
92 | - xi = min(x+i, DIM_X-1); | |
93 | - yj = max(y+j, 0); | |
94 | - yj = min(y+j, DIM_Y-1); | |
95 | - val += LoG(k_idx)*tex2D(texIn,x+i, y+j); | |
96 | - } | |
97 | - } | |
98 | - | |
99 | - result[idx] = val; | |
100 | -} |
stim/cuda/sharedmem.cuh
... | ... | @@ -35,34 +35,6 @@ namespace stim{ |
35 | 35 | } |
36 | 36 | } |
37 | 37 | |
38 | - template<typename T, typename D> | |
39 | - __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src, | |
40 | - unsigned int x, unsigned int y, unsigned int X, unsigned int Y, | |
41 | - dim3 threadIdx, dim3 blockDim){ | |
42 | - | |
43 | - //calculate the number of iterations required for the copy | |
44 | - unsigned int xI, yI; | |
45 | - xI = X/blockDim.x + 1; //number of iterations along X | |
46 | - yI = Y/blockDim.y + 1; //number of iterations along Y | |
47 | - | |
48 | - //for each iteration | |
49 | - for(unsigned int xi = 0; xi < xI; xi++){ | |
50 | - for(unsigned int yi = 0; yi < yI; yi++){ | |
51 | - | |
52 | - //calculate the index into shared memory | |
53 | - unsigned int sx = xi * blockDim.x + threadIdx.x; | |
54 | - unsigned int sy = yi * blockDim.y + threadIdx.y; | |
55 | - | |
56 | - //calculate the index into the texture | |
57 | - unsigned int tx = x + sx; | |
58 | - unsigned int ty = y + sy; | |
59 | - | |
60 | - //perform the copy | |
61 | - if(sx < X && sy < Y) | |
62 | - dest[sy * X + sx] = abs(255 - tex2D<D>(src, tx, ty)); | |
63 | - } | |
64 | - } | |
65 | - } | |
66 | 38 | |
67 | 39 | } |
68 | 40 | } | ... | ... |
stim/gl/gl_spider.h
... | ... | @@ -96,7 +96,6 @@ class gl_spider : public virtual gl_texture<T> |
96 | 96 | std::vector< stim::vec<float> > cD; //Direction of line currently being traced. |
97 | 97 | std::vector< stim::vec<float> > cM; //Magnitude of line currently being traced. |
98 | 98 | |
99 | -// stim::glObj<float> sk; //object to store the skeleton. | |
100 | 99 | stim::glnetwork<float> nt; //object for storing the network. |
101 | 100 | |
102 | 101 | stim::vec<float> rev; //reverse vector; |
... | ... | @@ -141,7 +140,6 @@ class gl_spider : public virtual gl_texture<T> |
141 | 140 | setMatrix(); //create the transformation matrix. |
142 | 141 | glCallList(dList+1); //move the templates to p, d, m. |
143 | 142 | int best = getCost(ptexbufferID, numSamplesPos); //find min cost. |
144 | - std::cerr << best << std::endl; | |
145 | 143 | stim::vec<float> next( //find next position. |
146 | 144 | pV[best][0], |
147 | 145 | pV[best][1], |
... | ... | @@ -170,6 +168,7 @@ class gl_spider : public virtual gl_texture<T> |
170 | 168 | |
171 | 169 | ///subject to change. |
172 | 170 | ///finds branches. |
171 | + ///depreciated | |
173 | 172 | void |
174 | 173 | branchDetection() |
175 | 174 | { |
... | ... | @@ -197,14 +196,8 @@ class gl_spider : public virtual gl_texture<T> |
197 | 196 | -p[1] + cylp[1]*S[1]*R[1], |
198 | 197 | -p[2] + cylp[2]*S[2]*R[2]); |
199 | 198 | seeddir = seeddir.norm(); |
200 | -// float seedm = m[0]/2.0; | |
201 | 199 | float seedm = m[0]; |
202 | 200 | // Uncomment for global run |
203 | -/* stim::vec<float> lSeed = getLastSeed(); | |
204 | - if(sqrt(pow((lSeed[0] - vec[0]),2) | |
205 | - + pow((lSeed[1] - vec[1]),2) + | |
206 | - pow((lSeed[2] - vec[2]),2)) > m[0]/4.0 | |
207 | - && */ | |
208 | 201 | if( |
209 | 202 | !(vec[0] > size[0] || vec[1] > size[1] |
210 | 203 | || vec[2] > size[2] || vec[0] < 0 |
... | ... | @@ -220,6 +213,8 @@ class gl_spider : public virtual gl_texture<T> |
220 | 213 | } |
221 | 214 | |
222 | 215 | |
216 | + ///finds all the branches in the a given fiber. | |
217 | + ///using LoG method. | |
223 | 218 | void |
224 | 219 | branchDetection2(int n = 8, int l_template = 8, int l_square = 8) |
225 | 220 | { |
... | ... | @@ -231,7 +226,6 @@ class gl_spider : public virtual gl_texture<T> |
231 | 226 | std::vector< stim::vec<float> > result = find_branch(btexbufferID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); |
232 | 227 | stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); |
233 | 228 | float pval; |
234 | -// std::cerr << "the number of points is " << result.size() << std::endl; | |
235 | 229 | if(!result.empty()) |
236 | 230 | { |
237 | 231 | for(int i = 0; i < result.size(); i++) |
... | ... | @@ -242,7 +236,6 @@ class gl_spider : public virtual gl_texture<T> |
242 | 236 | |
243 | 237 | pval = ((cyl.getl(id+1)-cyl.getl(id))* |
244 | 238 | (fmod(result[i][2], id))+cyl.getl(id))/cyl.getl(cL.size()-1); |
245 | -// std::cout << id << " " << cyl.getl(id) << " " << pval << " " << cyl.getl(cL.size()-1) << fmod(result[i][2], id) << std::endl; | |
246 | 239 | } |
247 | 240 | else if(id == 0) |
248 | 241 | { |
... | ... | @@ -252,14 +245,9 @@ class gl_spider : public virtual gl_texture<T> |
252 | 245 | { |
253 | 246 | pval = (cyl.getl(id)/cyl.getl(cL.size()-1)); |
254 | 247 | } |
255 | -// std::cout << "Testing "<< i << ": " << result[i][0] << ", " << result[i][1] << ", " << result[i][2] << std::endl; | |
256 | -// std::cout << "Testing " << pval << std::endl; | |
257 | 248 | stim::vec<float> v = cyl.surf(pval, result[i][0]); |
258 | -// std::cout << v[0] << " ," << v[1] << " ," << v[2] << std::endl; | |
259 | 249 | stim::vec<float> di = cyl.p(pval); |
260 | -// std::cout << di[0] << " ," << di[1] << " ," << di[2] << std::endl; | |
261 | 250 | float rad = cyl.r(pval); |
262 | - std::cout << rad << std::endl; | |
263 | 251 | if( |
264 | 252 | !(v[0] > size[0] || v[1] > size[1] |
265 | 253 | || v[2] > size[2] || v[0] < 0 |
... | ... | @@ -271,7 +259,6 @@ class gl_spider : public virtual gl_texture<T> |
271 | 259 | } |
272 | 260 | } |
273 | 261 | } |
274 | -// std::cout << "I ran the new branch detection" << std::endl; | |
275 | 262 | } |
276 | 263 | } |
277 | 264 | |
... | ... | @@ -1506,10 +1493,10 @@ class gl_spider : public virtual gl_texture<T> |
1506 | 1493 | spos[2] = spos[2]-sdir[2]*smag[0]/2.; |
1507 | 1494 | int h = selectObject(spos, -sdir, smag[0]); |
1508 | 1495 | //did start with a fiber? |
1509 | - if(h != -1){ | |
1496 | + if(h != -1 && h != nt.sizeE()){ | |
1510 | 1497 | // std::cout << "got here double" << smag.str() << std::endl; |
1511 | 1498 | nt.addEdge(ce,cm, h, in.second); |
1512 | - } | |
1499 | + } else { nt.addEdge(ce,cm, -1, -1);} | |
1513 | 1500 | } |
1514 | 1501 | } |
1515 | 1502 | } |
... | ... | @@ -1551,7 +1538,6 @@ class gl_spider : public virtual gl_texture<T> |
1551 | 1538 | { |
1552 | 1539 | int cost = Step(); |
1553 | 1540 | if (cost > min_cost){ |
1554 | - std::cout << "Cost Limit" << std::endl; | |
1555 | 1541 | running = false; |
1556 | 1542 | // sk.End(); |
1557 | 1543 | branchDetection2(); |
... | ... | @@ -1566,10 +1552,7 @@ class gl_spider : public virtual gl_texture<T> |
1566 | 1552 | || pos[2] > size[2] || pos[0] < 0 |
1567 | 1553 | || pos[1] < 0 || pos[2] < 0) |
1568 | 1554 | { |
1569 | - std::cout << "Edge Limit" << std::endl; | |
1570 | -// std::cout << "Found Edge" << std::endl; | |
1571 | 1555 | running = false; |
1572 | -// sk.End(); | |
1573 | 1556 | branchDetection2(); |
1574 | 1557 | pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1); |
1575 | 1558 | addToNetwork(a, spos, smag, sdir); |
... | ... | @@ -1583,13 +1566,10 @@ class gl_spider : public virtual gl_texture<T> |
1583 | 1566 | rev = -getDirection(); |
1584 | 1567 | started = true; |
1585 | 1568 | } |
1586 | -// std::cout << i << p << std::endl; | |
1587 | 1569 | //Has the template size gotten unreasonable? |
1588 | 1570 | mag = getMagnitude(); |
1589 | 1571 | if(mag[0] > 75 || mag[0] < 1){ |
1590 | - std::cout << "Magnitude Limit" << std::endl; | |
1591 | 1572 | running = false; |
1592 | -// sk.End(); | |
1593 | 1573 | branchDetection2(); |
1594 | 1574 | pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), -1); |
1595 | 1575 | addToNetwork(a, spos, smag, sdir); |
... | ... | @@ -1601,9 +1581,7 @@ class gl_spider : public virtual gl_texture<T> |
1601 | 1581 | h = selectObject(p, getDirection(), m[0]); |
1602 | 1582 | //Have we hit something previously traced? |
1603 | 1583 | if(h != -1){ |
1604 | - std::cout << "Hit Limit" << std::endl; | |
1605 | 1584 | running = false; |
1606 | -// sk.End(); | |
1607 | 1585 | branchDetection2(); |
1608 | 1586 | pair<stim::fiber<float>, int> a(stim::fiber<float> (cL, cM), h); |
1609 | 1587 | addToNetwork(a, spos, smag, sdir); |
... | ... | @@ -1613,14 +1591,7 @@ class gl_spider : public virtual gl_texture<T> |
1613 | 1591 | else { |
1614 | 1592 | cL.push_back(stim::vec<float>(p[0], p[1],p[2])); |
1615 | 1593 | cM.push_back(stim::vec<float>(m[0], m[0])); |
1616 | -// cM.push_back(m[0]); | |
1617 | - | |
1618 | -// sk.TexCoord(m[0]); | |
1619 | -// sk.Vertex(p[0], p[1], p[2]); | |
1620 | 1594 | Bind(btexbufferID, bfboID, 27); |
1621 | - CHECK_OPENGL_ERROR | |
1622 | -// branchDetection(); | |
1623 | - CHECK_OPENGL_ERROR | |
1624 | 1595 | Unbind(); |
1625 | 1596 | CHECK_OPENGL_ERROR |
1626 | 1597 | ... | ... |