Commit a744d027c4d9d6583db594dc75811c6ff90eaef6
1 parent
46820b25
upload the ivote3 functions using the new method for finding voting area (bounda…
…ries of voting area)'
Showing
9 changed files
with
657 additions
and
122 deletions
Show diff stats
No preview for this file type
Matlab_3D/validation.m
1 | 1 | clear all; |
2 | -disp('***************** NEW RUN *********************'); | |
2 | +%for t=100:100:5000 | |
3 | +t=2350; | |
3 | 4 | X = 128; |
4 | 5 | Y = 128; |
5 | 6 | Z = 128; |
... | ... | @@ -7,15 +8,15 @@ D = 10; |
7 | 8 | t0=1; |
8 | 9 | r1=12; |
9 | 10 | r2=10; |
10 | -t=2300; | |
11 | -itr=8; | |
12 | -vote=10; | |
11 | +% t=2300; | |
12 | +itr=5; | |
13 | +vote=7; | |
13 | 14 | std = [5 5]; |
14 | 15 | gt_filename = 'gt2.vol'; |
15 | 16 | % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); |
16 | -out_filename = sprintf('D:/build/ivote3-bld/shared2D-v8/centers.%d.vol',t); | |
17 | +out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t); | |
17 | 18 | % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); |
18 | -txt_filename = sprintf('0-t%d--1.txt',t); | |
19 | +txt_filename = sprintf('D:/build/ivote3-bld/0-t%d-atomic-aabb.txt',t); | |
19 | 20 | spec = sprintf('Nissl-std%d.%d-r%d.%d-t%d-out%d.%d',std(1), std(2),r1,r2,t,itr,vote); |
20 | 21 | fid0 = fopen(gt_filename); |
21 | 22 | gt = fread(fid0,[X Y*Z], 'single'); |
... | ... | @@ -79,73 +80,73 @@ for k=1:size(id_ref,1) |
79 | 80 | oseg = oseg+1; |
80 | 81 | end |
81 | 82 | end |
82 | -%**** | |
83 | -% check if the oversegmented detected cells could be assigned to the gt's element with no detected cell! | |
84 | -%find the indices of gt's element with no detected cell | |
85 | -u_idx1 = find(b==0); | |
86 | -%find the coordinates of gt's | |
87 | -ref1 = ref(u_idx1,:); | |
88 | -% find the indices of detected cells which are assigned to a gt's element | |
89 | -ub = unique(b(b>0)); | |
90 | -% set the number of detected cell which are not assigned to a gt's element to zero | |
91 | -mm=0; | |
92 | -% find the indices of detected cell which are not assigned to a gt's element | |
93 | -for j=1:size(cent,1) | |
94 | - z = sum(ub(:)==j); | |
95 | - if z==0 | |
96 | - mm = mm+1; | |
97 | - cent_id1(mm) = j; | |
98 | - end | |
99 | -end | |
100 | -% find the coordinated of of detected cells | |
101 | -cent1 = cent(cent_id1,:); | |
102 | - | |
103 | -% check if there are osme low enough distances | |
104 | -[idx1, dist1] = knnsearch(cent1,ref1); | |
105 | -ind_eq_idx_rc = zeros(numel(idx1), 10); | |
106 | -%set number of cells with the same detected cell center to zero. | |
107 | -noc_rc=0; | |
108 | -for i_rc=1:numel(idx1) | |
109 | - % check the current element hasn't considered yet | |
110 | - s_ind_rc = sum(ind_eq_idx_rc==i_rc); | |
111 | - if s_ind_rc==0 | |
112 | - % check how many of the elemnts assigned to the same detected cell | |
113 | - s_rc = sum(idx1==idx1(i_rc)); | |
114 | - if s_rc>1 | |
115 | - noc_rc = noc_rc+1; | |
116 | - %save the index and number of gt's elements with the same assigned detected cells. | |
117 | - id_ref_rc(noc_rc,1) = i_rc; | |
118 | - id_ref_rc(noc_rc,2) = s_rc; | |
119 | - ind1_rc = find(idx1==idx1(i_rc)); | |
120 | - ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc; | |
121 | - end | |
122 | - end | |
123 | -end | |
124 | -% determine those indices which hs assigned to only one detected cell | |
125 | -b_rc = idx1; | |
126 | -u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0)); | |
127 | -b_rc(u_eq_idx_rc)=0; | |
128 | -% set the number of over sefgmented cells to zero. | |
129 | -oseg_rc=0; | |
130 | -for k_rc=1:size(id_ref_rc,1) | |
131 | - k1_rc = id_ref_rc(k_rc,1); | |
132 | - k2_rc = id_ref_rc(k_rc,2); | |
133 | - %find the minimum distance to the detected cell happened for which of the gt's element | |
134 | - l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc); | |
135 | - [~, local_id_min_rc] = min(dist1(l_rc)); | |
136 | - % set the element with minimum distance to the corresponding detected cell | |
137 | - b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc); | |
138 | - %remove the proper element from the list of indices with same designated detected cells | |
139 | - u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0; | |
140 | - % check if the indices remained in the list has distance less than the value is set for validation | |
141 | - ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc)); | |
142 | - distl_rc = dist1(ly_rc); | |
143 | - sl_rc = sum(distl_rc<D); | |
144 | - % if the distance is low enought, consider the corresponding cell as oversegmented | |
145 | - if sl_rc>0 | |
146 | - oseg_rc = oseg_rc+1; | |
147 | - end | |
148 | -end | |
83 | +% %**** | |
84 | +% % check if the oversegmented detected cells could be assigned to the gt's element with no detected cell! | |
85 | +% %find the indices of gt's element with no detected cell | |
86 | +% u_idx1 = find(b==0); | |
87 | +% %find the coordinates of gt's | |
88 | +% ref1 = ref(u_idx1,:); | |
89 | +% % find the indices of detected cells which are assigned to a gt's element | |
90 | +% ub = unique(b(b>0)); | |
91 | +% % set the number of detected cell which are not assigned to a gt's element to zero | |
92 | +% mm=0; | |
93 | +% % find the indices of detected cell which are not assigned to a gt's element | |
94 | +% for j=1:size(cent,1) | |
95 | +% z = sum(ub(:)==j); | |
96 | +% if z==0 | |
97 | +% mm = mm+1; | |
98 | +% cent_id1(mm) = j; | |
99 | +% end | |
100 | +% end | |
101 | +% % find the coordinated of of detected cells | |
102 | +% cent1 = cent(cent_id1,:); | |
103 | +% | |
104 | +% % check if there are osme low enough distances | |
105 | +% [idx1, dist1] = knnsearch(cent1,ref1); | |
106 | +% ind_eq_idx_rc = zeros(numel(idx1), 10); | |
107 | +% %set number of cells with the same detected cell center to zero. | |
108 | +% noc_rc=0; | |
109 | +% for i_rc=1:numel(idx1) | |
110 | +% % check the current element hasn't considered yet | |
111 | +% s_ind_rc = sum(ind_eq_idx_rc==i_rc); | |
112 | +% if s_ind_rc==0 | |
113 | +% % check how many of the elemnts assigned to the same detected cell | |
114 | +% s_rc = sum(idx1==idx1(i_rc)); | |
115 | +% if s_rc>1 | |
116 | +% noc_rc = noc_rc+1; | |
117 | +% %save the index and number of gt's elements with the same assigned detected cells. | |
118 | +% id_ref_rc(noc_rc,1) = i_rc; | |
119 | +% id_ref_rc(noc_rc,2) = s_rc; | |
120 | +% ind1_rc = find(idx1==idx1(i_rc)); | |
121 | +% ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc; | |
122 | +% end | |
123 | +% end | |
124 | +% end | |
125 | +% % determine those indices which hs assigned to only one detected cell | |
126 | +% b_rc = idx1; | |
127 | +% u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0)); | |
128 | +% b_rc(u_eq_idx_rc)=0; | |
129 | +% % set the number of over sefgmented cells to zero. | |
130 | +% oseg_rc=0; | |
131 | +% for k_rc=1:size(id_ref_rc,1) | |
132 | +% k1_rc = id_ref_rc(k_rc,1); | |
133 | +% k2_rc = id_ref_rc(k_rc,2); | |
134 | +% %find the minimum distance to the detected cell happened for which of the gt's element | |
135 | +% l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc); | |
136 | +% [~, local_id_min_rc] = min(dist1(l_rc)); | |
137 | +% % set the element with minimum distance to the corresponding detected cell | |
138 | +% b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc); | |
139 | +% %remove the proper element from the list of indices with same designated detected cells | |
140 | +% u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0; | |
141 | +% % check if the indices remained in the list has distance less than the value is set for validation | |
142 | +% ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc)); | |
143 | +% distl_rc = dist1(ly_rc); | |
144 | +% sl_rc = sum(distl_rc<D); | |
145 | +% % if the distance is low enought, consider the corresponding cell as oversegmented | |
146 | +% if sl_rc>0 | |
147 | +% oseg_rc = oseg_rc+1; | |
148 | +% end | |
149 | +% end | |
149 | 150 | |
150 | 151 | %****** |
151 | 152 | % b include the gt's element and its detected cells, for those element with no cell detection, b has zero value |
... | ... | @@ -154,11 +155,11 @@ b_dist = dist; |
154 | 155 | % remove the disatances for those elements with no detected cells from distance array |
155 | 156 | b_dist(b_ind)=-1; |
156 | 157 | |
157 | -b_ind_rc = find(b_rc==0); | |
158 | -b_dist_rc = dist1; | |
159 | -b_dist_rc(b_ind_rc)=-1; | |
158 | +% b_ind_rc = find(b_rc==0); | |
159 | +% b_dist_rc = dist1; | |
160 | +% b_dist_rc(b_ind_rc)=-1; | |
160 | 161 | % calculate Ttrue Positive, number of detected cells that have low enough distance to one of the gt's elements. |
161 | -TP = sum(b_dist>=0) - sum(b_dist>D) + (sum(b_dist_rc>=0) - sum(b_dist_rc>D)); | |
162 | +TP = sum(b_dist>=0) - sum(b_dist>D); % + (sum(b_dist_rc>=0) - sum(b_dist_rc>D)); | |
162 | 163 | % calculate False Negative, number of gt's elements with no detected cells. |
163 | 164 | FN = size(ref, 1) - TP; |
164 | 165 | % calculate False Positive, number of detected cells, which their distance to the gt's element is long |
... | ... | @@ -179,3 +180,5 @@ fprintf(fid_text, 'Precision = %f\n', Precision); |
179 | 180 | fprintf(fid_text, 'Accuracy = %f\n', Accuracy); |
180 | 181 | fprintf(fid_text, 'Recall = %f\n', Recall); |
181 | 182 | fclose(fid_text); |
183 | +clear all; | |
184 | +%end | |
182 | 185 | \ No newline at end of file | ... | ... |
cpp/CMakeLists.txt
cpp/cudafunc.cu
1 | +/*#include "circle_check.cuh" | |
2 | + | |
3 | +void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z){ | |
4 | +gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); | |
5 | +} | |
6 | +*/ | |
7 | + | |
8 | + | |
1 | 9 | #include "gaussian_blur3.cuh" |
2 | 10 | #include "gradient3.cuh" |
3 | 11 | #include "mag3.cuh" |
4 | -#include "vote3.cuh" | |
12 | +#include "vote3_atomic_aabb.cuh" | |
5 | 13 | #include "update_dir3.cuh" |
6 | 14 | #include "local_max3.cuh" |
7 | 15 | |
... | ... | @@ -10,11 +18,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, |
10 | 18 | int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ |
11 | 19 | |
12 | 20 | |
13 | - cudaSetDevice(0); | |
21 | + cudaSetDevice(1); | |
14 | 22 | // compute the number of bytes in the input data |
15 | 23 | unsigned int bytes = x * y * z * sizeof(float); |
16 | 24 | |
17 | - //assign memory on gpu for the input data. | |
25 | + //assign memory on gpu for the input data.z | |
18 | 26 | float* gpuI0; |
19 | 27 | cudaMalloc(&gpuI0, bytes); |
20 | 28 | |
... | ... | @@ -41,18 +49,17 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, |
41 | 49 | |
42 | 50 | //call the vote function. |
43 | 51 | for (int i = 0; i < iter; i++){ |
44 | - | |
52 | + | |
53 | + cudaMemset(gpu_vote, 0, bytes); | |
45 | 54 | gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); |
46 | 55 | cudaDeviceSynchronize(); |
47 | - /*if (i==7) | |
48 | - cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);*/ | |
49 | - | |
50 | - if (phi >= d_phi){ | |
56 | + | |
57 | + //if (phi >= d_phi){ | |
51 | 58 | gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); |
52 | 59 | cudaDeviceSynchronize(); |
53 | 60 | phi = phi - d_phi; |
54 | 61 | cos_phi = cos(phi); |
55 | - } | |
62 | + //} | |
56 | 63 | |
57 | 64 | } |
58 | 65 | |
... | ... | @@ -78,6 +85,8 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, |
78 | 85 | void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ |
79 | 86 | unsigned int bytes = x * y * z * sizeof(float); |
80 | 87 | |
88 | + cudaSetDevice(1); | |
89 | + | |
81 | 90 | //assign memory on gpu for the input data. |
82 | 91 | float* gpuV; |
83 | 92 | cudaMalloc(&gpuV, bytes); |
... | ... | @@ -96,4 +105,5 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u |
96 | 105 | |
97 | 106 | cudaFree(gpuV); |
98 | 107 | cudaFree(gpuOut); |
99 | -} | |
100 | 108 | \ No newline at end of file |
109 | +} | |
110 | + | ... | ... |
cpp/main.cpp
... | ... | @@ -9,6 +9,84 @@ |
9 | 9 | #include <stim/image/image.h> |
10 | 10 | #define pi 3.14159 |
11 | 11 | |
12 | +#define M_PI 3.14159 | |
13 | +#include <stim/math/circle.h> | |
14 | +#include <stim/math/vec3.h> | |
15 | +#include <stim/math/plane.h> | |
16 | +#include <stim/math/vector.h> | |
17 | +//#include <cuda.h> | |
18 | +//#include <stim/cuda/cudatools.h> | |
19 | +//#include <stim/cuda/cudatools/error.h> | |
20 | + | |
21 | + | |
22 | +/*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z); | |
23 | + | |
24 | +int main(){ | |
25 | + | |
26 | + int n=20; | |
27 | + float rmax; | |
28 | + float phi_deg; | |
29 | + float phi; | |
30 | + rmax=4; | |
31 | + phi_deg = 15; | |
32 | + phi = phi_deg * pi/180; | |
33 | + int x,y,z; | |
34 | + x=y=z=1; | |
35 | + unsigned int size = x*y*z*sizeof (float); | |
36 | + float* cpu_grad = (float*) malloc(3*size); | |
37 | + float* gpu_grad; | |
38 | + cudaMalloc(&gpu_grad, 3*size); | |
39 | + cpu_grad[0]=1; | |
40 | + cpu_grad[1]=0; | |
41 | + cpu_grad[2]=-0.5; | |
42 | + cudaMemcpy(gpu_grad, cpu_grad, 3*size, cudaMemcpyHostToDevice); | |
43 | + float* cpu_out = (float*) malloc(3*size*(n+1)); | |
44 | + float* gpu_out; | |
45 | + cudaMalloc(&gpu_out, 3*size*(n+1)); | |
46 | + test_3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); | |
47 | + cudaMemcpy(cpu_out, gpu_out, 3*size*(n+1), cudaMemcpyDeviceToHost); | |
48 | + std::ofstream list("circle_check_cuda1.txt"); | |
49 | + if (list.is_open()){ | |
50 | + for (int j=0; j<=n; ++j) | |
51 | + list << cpu_out[3*j] << '\t' << cpu_out[3*j +1] << '\t' << cpu_out[3*j + 2] << '\n'; | |
52 | + } | |
53 | + list.close(); | |
54 | + */ | |
55 | + /* | |
56 | + int n=10; | |
57 | + stim::circle<float> cir; | |
58 | + float* c0= (float*) malloc(3*sizeof(float)); | |
59 | + c0[0] =-4; | |
60 | + c0[1]=0; | |
61 | + c0[2] = 3; | |
62 | + stim::vec3<float> c(c0[0],c0[1],c0[2]); | |
63 | + float len = c.len(); | |
64 | + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len); | |
65 | + std::cout<< len << '\n'; | |
66 | + std::cout<< norm << '\n'; | |
67 | + cir.center(c); | |
68 | + cir.normal(norm); | |
69 | + cir.scale(2); | |
70 | + stim::vec3<float> out = cir.p(45); | |
71 | + std::vector<stim::vec3<float>> out2 = cir.getPoints(n); | |
72 | + | |
73 | + std::cout<< out << '\n'; | |
74 | + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n'; | |
75 | + std::cout<< c << '\n'; | |
76 | + | |
77 | + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i) | |
78 | + std::cout << *i << '\n'; | |
79 | + std::ofstream list("circle_check.txt"); | |
80 | + if (list.is_open()){ | |
81 | + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j) | |
82 | + list << *j << '\n'; | |
83 | + } | |
84 | + list.close(); | |
85 | + std::cin >> n; | |
86 | + | |
87 | +} | |
88 | + | |
89 | +*/ | |
12 | 90 | |
13 | 91 | void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], |
14 | 92 | unsigned int x, unsigned int y, unsigned int z); |
... | ... | @@ -98,7 +176,7 @@ int main(int argc, char** argv){ |
98 | 176 | float t = args["t"].as_float(); |
99 | 177 | //set the anisotropy |
100 | 178 | float anisotropy = args["anisotropy"].as_float(); |
101 | - unsigned int rmax = 10 ; | |
179 | + unsigned int rmax = 10; | |
102 | 180 | unsigned int r[3] = { 12, rmax, rmax}; |
103 | 181 | float std = 5; |
104 | 182 | float sigma[3] = { std, std, std}; |
... | ... | @@ -137,50 +215,51 @@ int main(int argc, char** argv){ |
137 | 215 | std::ofstream fblur("shared2D-v8/vote8.vol", std::ofstream::out | std::ofstream::binary); |
138 | 216 | fblur.write((char*)cpuI, bytes); |
139 | 217 | fblur.close(); |
140 | - /* | |
141 | - stim::image<float>imgrad3; | |
142 | - imgrad3.set_interleaved3(cpu_out, 128,128,128,3); | |
143 | - std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary); | |
144 | - fgx.write((char*)imgrad3.channel(0).data(), bytes); | |
145 | - fgx.close(); | |
146 | - */ | |
218 | + | |
219 | + //stim::image<float>imgrad3; | |
220 | + //imgrad3.set_interleaved3(cpu_out, 128,128,128,3); | |
221 | + //std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary); | |
222 | + //fgx.write((char*)imgrad3.channel(0).data(), bytes); | |
223 | + //fgx.close(); | |
224 | + | |
147 | 225 | //write the output file. |
148 | - //for (int t0=2000; t0<=2500; t0+=100){ | |
226 | + //for (int t0=0; t0<=5000; t0+=100){ | |
227 | + // float t1 = t0; | |
149 | 228 | int t0 = t; |
150 | 229 | lmax(cpu_out, cpuI, t, conn, x, y, z); |
151 | 230 | //std::ofstream fo("shared2D-v8/" + OutName.str(), std::ofstream::out | std::ofstream::binary); |
152 | - std::ofstream fo("shared2D-v8/" + OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary); | |
231 | + std::ofstream fo( OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary); | |
153 | 232 | fo.write((char*)cpu_out, bytes); |
154 | 233 | fo.close(); |
155 | 234 | |
156 | 235 | // creat a file for saving the list centers |
157 | 236 | |
158 | - std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj"); | |
159 | - // set the number of detected cells to zero. | |
160 | - int nod = 0; | |
161 | - if (list.is_open()){ | |
162 | - | |
163 | - for (int iz=0; iz<z; iz++){ | |
164 | - for (int iy=0; iy<y; iy++){ | |
165 | - for (int ix=0; ix<x; ix++){ | |
166 | - | |
167 | - int idx = iz * x * y + iy * x + ix; | |
168 | - if (cpu_out[idx]==1){ | |
169 | - nod++; | |
170 | - list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ; | |
171 | - | |
172 | - } | |
173 | - } | |
174 | - } | |
175 | - } | |
176 | - list << "p" << "\t"; | |
177 | - for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){ | |
178 | - list << i_nod << "\t"; | |
179 | - } | |
237 | + //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj"); | |
238 | + //// set the number of detected cells to zero. | |
239 | + //int nod = 0; | |
240 | + //if (list.is_open()){ | |
180 | 241 | |
181 | - list.close(); | |
182 | - } | |
183 | - | |
242 | + // for (int iz=0; iz<z; iz++){ | |
243 | + // for (int iy=0; iy<y; iy++){ | |
244 | + // for (int ix=0; ix<x; ix++){ | |
245 | + | |
246 | + // int idx = iz * x * y + iy * x + ix; | |
247 | + // if (cpu_out[idx]==1){ | |
248 | + // nod++; | |
249 | + // list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ; | |
250 | + // | |
251 | + // } | |
252 | + // } | |
253 | + // } | |
254 | + // } | |
255 | + // list << "p" << "\t"; | |
256 | + // for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){ | |
257 | + // list << i_nod << "\t"; | |
258 | + // } | |
259 | + | |
260 | + //list.close(); | |
261 | + //} | |
262 | + //} | |
184 | 263 | cudaDeviceReset(); |
185 | 264 | |
186 | 265 | } | ... | ... |
1 | +#ifndef STIM_CUDA_UPDATE_DIR3_AABB_H | |
2 | +#define STIM_CUDA_UPDATE_DIR3_AABB_H | |
3 | + | |
4 | +# include <iostream> | |
5 | +# include <cuda.h> | |
6 | +#include <stim/cuda/cudatools.h> | |
7 | +#include "cpyToshare.cuh" | |
8 | +#define M_PI 3.14159 | |
9 | +#include <stim/math/circle.h> | |
10 | +#include <stim/math/vec3.h> | |
11 | +#include <stim/math/plane.h> | |
12 | +#include <stim/math/vector.h> | |
13 | +#include <stim/visualization/aabb3.h> | |
14 | + | |
15 | + // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. | |
16 | + template<typename T> | |
17 | + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
18 | + //extern __shared__ float s_vote[]; | |
19 | + | |
20 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | |
21 | + | |
22 | + int grid_y = y / blockDim.y; //find the grid size along y | |
23 | + int blockidx_y = blockIdx.y % grid_y; | |
24 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
25 | + int zi = blockIdx.y / grid_y; | |
26 | + if(xi >= x|| yi >= y || zi>= z) return; | |
27 | + int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel | |
28 | + | |
29 | + | |
30 | + // find the starting points for this block along the x and y directions | |
31 | + //int bxi = blockIdx.x * blockDim.x; | |
32 | + //int byi = blockidx_y * blockDim.y; | |
33 | + //find the starting points and the size of the window, which will be copied to the 2D-shared memory | |
34 | + //int bxs = bxi - rx; | |
35 | + //int bys = byi - ry; | |
36 | + //int xwidth = 2 * rx + blockDim.x; | |
37 | + //int ywidth = 2 * ry + blockDim.y; | |
38 | + //compute the coordinations of this pixel in the 2D-shared memory. | |
39 | + //int sx_rx = threadIdx.x + rx; | |
40 | + //int sy_ry = threadIdx.y + ry; | |
41 | + | |
42 | + float rx_sq = rx * rx; // compute the square for rmax | |
43 | + float ry_sq = ry * ry; | |
44 | + float rz_sq = rz * rz; | |
45 | + | |
46 | + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector | |
47 | + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | |
48 | + int n =4; //set the number of points to find the boundaries of the conical voting area | |
49 | + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter | |
50 | + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; | |
51 | + float zc = rz * cos(g_sph[2]) ; | |
52 | + float r = sqrt(xc*xc + yc*yc + zc*zc); | |
53 | + xc+=xi; | |
54 | + yc+=yi; | |
55 | + zc+=zi; | |
56 | + stim::vec3<float> center(xc,yc,zc); | |
57 | + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | |
58 | + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | |
59 | + float step = 360.0/(float) n; | |
60 | + stim::circle<float> cir(center, d, norm); | |
61 | + stim::aabb3<int> bb(xi,yi,zi); | |
62 | + bb.insert(xc,yc,zc); | |
63 | + for(float j = 0; j <360.0; j += step){ | |
64 | + stim::vec3<float> out = cir.p(j); | |
65 | + bb.insert(out[0], out[1], out[2]); | |
66 | + } | |
67 | + | |
68 | + bb.trim_low(0,0,0); | |
69 | + bb.trim_high(x-1, y-1, z-1); | |
70 | + int bx,by,bz; | |
71 | + int dx, dy, dz; | |
72 | + float dx_sq, dy_sq, dz_sq; | |
73 | + | |
74 | + float dist, cos_diff; | |
75 | + int idx_c; | |
76 | + | |
77 | + float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter | |
78 | + float l_vote = 0; | |
79 | + | |
80 | + float id_x = g[0]; // define local variables for the x, y, and z coordinations point to the vote direction | |
81 | + float id_y = g[1]; | |
82 | + float id_z = g[2]; | |
83 | + | |
84 | + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ | |
85 | + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis | |
86 | + dz_sq = dz * dz; | |
87 | + for (by=bb.low[1]; by<=bb.high[1]; by++){ | |
88 | + dy = by - yi; //compute the distance bw the voter and the current counter along y axis | |
89 | + dy_sq = dy * dy; | |
90 | + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){ | |
91 | + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis | |
92 | + dx_sq = dx * dx; | |
93 | + | |
94 | + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter | |
95 | + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter | |
96 | + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter | |
97 | + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter | |
98 | + l_vote = gpu_vote[idx_c]; | |
99 | + if (l_vote>max) { | |
100 | + max = l_vote; | |
101 | + id_x = dx; | |
102 | + id_y = dy; | |
103 | + id_z = dz; | |
104 | + } | |
105 | + } | |
106 | + } | |
107 | + } | |
108 | + } | |
109 | + float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z); | |
110 | + gpu_dir[i * 3 + 0] = g_sph[0] * (id_x/m_id); | |
111 | + gpu_dir[i * 3 + 1] = g_sph[0] * (id_y/m_id); | |
112 | + gpu_dir[i * 3 + 2] = g_sph[0] * (id_z/m_id); | |
113 | + } | |
114 | + | |
115 | + | |
116 | + | |
117 | + // this kernel updates the gradient direction by the calculated voting direction. | |
118 | + template<typename T> | |
119 | + __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){ | |
120 | + | |
121 | + //calculate x,y,z coordinates for this thread | |
122 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; | |
123 | + //find the grid size along y | |
124 | + int grid_y = y / blockDim.y; | |
125 | + int blockidx_y = blockIdx.y % grid_y; | |
126 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
127 | + int zi = blockIdx.y / grid_y; | |
128 | + int i = zi * x * y + yi * x + xi; | |
129 | + | |
130 | + if(xi >= x || yi >= y || zi >= z) return; | |
131 | + //update the gradient image with the new direction direction | |
132 | + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; | |
133 | + gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; | |
134 | + gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; | |
135 | + } | |
136 | + | |
137 | + template<typename T> | |
138 | + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
139 | + | |
140 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | |
141 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | |
142 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | |
143 | + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T); | |
144 | + // allocate space on the GPU for the updated vote direction | |
145 | + T* gpu_dir; | |
146 | + cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); | |
147 | + | |
148 | + //call the kernel to calculate the new voting direction | |
149 | + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z); | |
150 | + | |
151 | + | |
152 | + //call the kernel to update the gradient direction | |
153 | + update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); | |
154 | + | |
155 | + //free allocated memory | |
156 | + cudaFree(gpu_dir); | |
157 | + | |
158 | + } | |
159 | + | |
160 | + template<typename T> | |
161 | + void cpu_update_dir3(T* cpu_grad, T* cpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
162 | + | |
163 | + //calculate the number of bytes in the array | |
164 | + unsigned int bytes = x * y * z * sizeof(T); | |
165 | + | |
166 | + //allocate space on the GPU for the Vote data | |
167 | + T* gpu_vote; | |
168 | + cudaMalloc(&gpu_vote, bytes); | |
169 | + | |
170 | + //copy the input vote data to the GPU | |
171 | + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice); | |
172 | + | |
173 | + //allocate space on the GPU for the Gradient data | |
174 | + T* gpu_grad; | |
175 | + cudaMalloc(&gpu_grad, bytes*3); | |
176 | + | |
177 | + //copy the Gradient data to the GPU | |
178 | + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); | |
179 | + | |
180 | + //call the GPU version of the update direction function | |
181 | + gpu_update_dir3<T>(gpu_grad, gpu_vote, cos_phi, r, x , y, z); | |
182 | + | |
183 | + //copy the new gradient image back to the CPU | |
184 | + cudaMemcpy(cpu_grad, gpu_grad, bytes*3, cudaMemcpyDeviceToHost) ; | |
185 | + | |
186 | + //free allocated memory | |
187 | + cudaFree(gpu_vote); | |
188 | + cudaFree(gpu_grad); | |
189 | + } | |
190 | + | |
191 | + | |
192 | + | |
193 | +#endif | |
0 | 194 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_CUDA_VOTE3_ATOMIC_H | |
2 | +#define STIM_CUDA_VOTE3_ATOMIC_H | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <cuda.h> | |
6 | +#include <stim/cuda/cudatools.h> | |
7 | +#include <stim/cuda/cudatools/error.h> | |
8 | +#include "cpyToshare.cuh" | |
9 | + | |
10 | + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area | |
11 | + template<typename T> | |
12 | + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
13 | + | |
14 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | |
15 | + | |
16 | + int grid_y = y / blockDim.y; //find the grid size along y | |
17 | + int blockidx_y = blockIdx.y % grid_y; | |
18 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
19 | + int zi = blockIdx.y / grid_y; | |
20 | + | |
21 | + if(xi>=x || yi>=y || zi>=z) return; | |
22 | + | |
23 | + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter | |
24 | + float gx_v = gpu_grad[3*i]; // find the gradient information in cartesian coordinate for the voter - global memory fetch | |
25 | + float gy_v = gpu_grad[3*i+1]; | |
26 | + float gz_v = gpu_grad[3*i+2]; | |
27 | + | |
28 | + float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter | |
29 | + | |
30 | + float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter | |
31 | + float gy_v_n = gy_v/mag_v; | |
32 | + float gz_v_n = gz_v/mag_v; | |
33 | + | |
34 | + float rx_sq = rx * rx; // compute the square for rmax | |
35 | + float ry_sq = ry * ry; | |
36 | + float rz_sq = rz * rz; | |
37 | + float x_sq, y_sq, z_sq, d_c, cos_diff; | |
38 | + int xi_c, yi_c, zi_c, idx_c; | |
39 | + | |
40 | + for (int z_c=-rz; z_c<=rz; z_c++){ | |
41 | + zi_c = zi + z_c; // calculate the z position for the current counter | |
42 | + if (zi_c <z && zi_c>=0){ // make sure the current counter is inside the image | |
43 | + z_sq = z_c * z_c; | |
44 | + for (int y_c=-ry; y_c<=ry; y_c++){ | |
45 | + yi_c = yi + y_c; | |
46 | + if (yi_c < y && yi_c>=0){ | |
47 | + y_sq = y_c * y_c; | |
48 | + for (int x_c=-rx; x_c<=rx; x_c++){ | |
49 | + xi_c = xi + x_c; | |
50 | + if (xi_c < x && xi_c>=0){ | |
51 | + x_sq = x_c * x_c; | |
52 | + d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter | |
53 | + cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter | |
54 | + if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ | |
55 | + idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter | |
56 | + atomicAdd (&gpu_vote[idx_c] , mag_v); | |
57 | + } | |
58 | + } | |
59 | + } | |
60 | + } | |
61 | + } | |
62 | + } | |
63 | + } | |
64 | + } | |
65 | + | |
66 | + template<typename T> | |
67 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
68 | + | |
69 | + | |
70 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | |
71 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | |
72 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | |
73 | + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*4*sizeof(T); | |
74 | + //call the kernel to do the voting | |
75 | + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); | |
76 | + | |
77 | + } | |
78 | + | |
79 | + | |
80 | + template<typename T> | |
81 | + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
82 | + | |
83 | + //calculate the number of bytes in the array | |
84 | + unsigned int bytes = x * y * z * sizeof(T); | |
85 | + | |
86 | + | |
87 | + //allocate space on the GPU for the Vote Image | |
88 | + T* gpu_vote; | |
89 | + cudaMalloc(&gpu_vote, bytes); | |
90 | + | |
91 | + //allocate space on the GPU for the input Gradient image | |
92 | + T* gpu_grad; | |
93 | + cudaMalloc(&gpu_grad, bytes*3); | |
94 | + | |
95 | + | |
96 | + //copy the Gradient data to the GPU | |
97 | + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); | |
98 | + | |
99 | + | |
100 | + //call the GPU version of the vote calculation function | |
101 | + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z); | |
102 | + | |
103 | + //copy the Vote Data back to the CPU | |
104 | + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ; | |
105 | + | |
106 | + //free allocated memory | |
107 | + cudaFree(gpu_vote); | |
108 | + cudaFree(gpu_grad); | |
109 | + | |
110 | + } | |
111 | + | |
112 | + | |
113 | +#endif | |
0 | 114 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_CUDA_VOTE3_ATOMIC_AABB_H | |
2 | +#define STIM_CUDA_VOTE3_ATOMIC_AABB_H | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <cuda.h> | |
6 | +#include <stim/cuda/cudatools.h> | |
7 | +#include <stim/cuda/cudatools/error.h> | |
8 | +#include "cpyToshare.cuh" | |
9 | +#define M_PI 3.14159 | |
10 | +#include <stim/math/circle.h> | |
11 | +#include <stim/math/vec3.h> | |
12 | +#include <stim/math/plane.h> | |
13 | +#include <stim/math/vector.h> | |
14 | +#include <stim/visualization/aabb3.h> | |
15 | + | |
16 | + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area | |
17 | + template<typename T> | |
18 | + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
19 | + | |
20 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | |
21 | + | |
22 | + int grid_y = y / blockDim.y; //find the grid size along y | |
23 | + int blockidx_y = blockIdx.y % grid_y; | |
24 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
25 | + int zi = blockIdx.y / grid_y; | |
26 | + | |
27 | + if(xi>=x || yi>=y || zi>=z) return; | |
28 | + | |
29 | + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter | |
30 | + | |
31 | + | |
32 | + float rx_sq = rx * rx; // compute the square for rmax | |
33 | + float ry_sq = ry * ry; | |
34 | + float rz_sq = rz * rz; | |
35 | + float dist, cos_diff; | |
36 | + int idx_c; | |
37 | + | |
38 | + //float rmax = sqrt(rx_sq + ry_sq + rz_sq); | |
39 | + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector | |
40 | + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | |
41 | + int n =4; //set the number of points to find the boundaries of the conical voting area | |
42 | + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter | |
43 | + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; | |
44 | + float zc = rz * cos(g_sph[2]) ; | |
45 | + float r = sqrt(xc*xc + yc*yc + zc*zc); | |
46 | + xc+=xi; | |
47 | + yc+=yi; | |
48 | + zc+=zi; | |
49 | + stim::vec3<float> center(xc,yc,zc); | |
50 | + | |
51 | + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | |
52 | + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | |
53 | + float step = 360.0/(float) n; | |
54 | + stim::circle<float> cir(center, d, norm); | |
55 | + stim::aabb3<int> bb(xi,yi,zi); | |
56 | + bb.insert(xc,yc,zc); | |
57 | + for(float j = 0; j <360.0; j += step){ | |
58 | + stim::vec3<float> out = cir.p(j); | |
59 | + bb.insert(out[0], out[1], out[2]); | |
60 | + } | |
61 | + | |
62 | + bb.trim_low(0,0,0); | |
63 | + bb.trim_high(x-1, y-1, z-1); | |
64 | + int bx,by,bz; | |
65 | + int dx, dy, dz; | |
66 | + float dx_sq, dy_sq, dz_sq; | |
67 | + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ | |
68 | + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis | |
69 | + dz_sq = dz * dz; | |
70 | + for (by=bb.low[1]; by<=bb.high[1]; by++){ | |
71 | + dy = by - yi; //compute the distance bw the voter and the current counter along y axis | |
72 | + dy_sq = dy * dy; | |
73 | + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){ | |
74 | + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis | |
75 | + dx_sq = dx * dx; | |
76 | + | |
77 | + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter | |
78 | + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter | |
79 | + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter | |
80 | + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter | |
81 | + atomicAdd (&gpu_vote[idx_c] , g_sph[0]); | |
82 | + } | |
83 | + } | |
84 | + } | |
85 | + } | |
86 | + } | |
87 | + | |
88 | + template<typename T> | |
89 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
90 | + | |
91 | + | |
92 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | |
93 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | |
94 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | |
95 | + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting | |
96 | + | |
97 | + } | |
98 | + | |
99 | + | |
100 | + template<typename T> | |
101 | + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
102 | + | |
103 | + //calculate the number of bytes in the array | |
104 | + unsigned int bytes = x * y * z * sizeof(T); | |
105 | + | |
106 | + | |
107 | + //allocate space on the GPU for the Vote Image | |
108 | + T* gpu_vote; | |
109 | + cudaMalloc(&gpu_vote, bytes); | |
110 | + | |
111 | + //allocate space on the GPU for the input Gradient image | |
112 | + T* gpu_grad; | |
113 | + cudaMalloc(&gpu_grad, bytes*3); | |
114 | + | |
115 | + | |
116 | + //copy the Gradient data to the GPU | |
117 | + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); | |
118 | + | |
119 | + | |
120 | + //call the GPU version of the vote calculation function | |
121 | + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z); | |
122 | + | |
123 | + //copy the Vote Data back to the CPU | |
124 | + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ; | |
125 | + | |
126 | + //free allocated memory | |
127 | + cudaFree(gpu_vote); | |
128 | + cudaFree(gpu_grad); | |
129 | + | |
130 | + } | |
131 | + | |
132 | + | |
133 | +#endif | |
0 | 134 | \ No newline at end of file | ... | ... |