Commit a744d027c4d9d6583db594dc75811c6ff90eaef6
1 parent
46820b25
upload the ivote3 functions using the new method for finding voting area (bounda…
…ries of voting area)'
Showing
9 changed files
with
657 additions
and
122 deletions
Show diff stats
No preview for this file type
Matlab_3D/validation.m
1 | clear all; | 1 | clear all; |
2 | -disp('***************** NEW RUN *********************'); | 2 | +%for t=100:100:5000 |
3 | +t=2350; | ||
3 | X = 128; | 4 | X = 128; |
4 | Y = 128; | 5 | Y = 128; |
5 | Z = 128; | 6 | Z = 128; |
@@ -7,15 +8,15 @@ D = 10; | @@ -7,15 +8,15 @@ D = 10; | ||
7 | t0=1; | 8 | t0=1; |
8 | r1=12; | 9 | r1=12; |
9 | r2=10; | 10 | r2=10; |
10 | -t=2300; | ||
11 | -itr=8; | ||
12 | -vote=10; | 11 | +% t=2300; |
12 | +itr=5; | ||
13 | +vote=7; | ||
13 | std = [5 5]; | 14 | std = [5 5]; |
14 | gt_filename = 'gt2.vol'; | 15 | gt_filename = 'gt2.vol'; |
15 | % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); | 16 | % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); |
16 | -out_filename = sprintf('D:/build/ivote3-bld/shared2D-v8/centers.%d.vol',t); | 17 | +out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t); |
17 | % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); | 18 | % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); |
18 | -txt_filename = sprintf('0-t%d--1.txt',t); | 19 | +txt_filename = sprintf('D:/build/ivote3-bld/0-t%d-atomic-aabb.txt',t); |
19 | spec = sprintf('Nissl-std%d.%d-r%d.%d-t%d-out%d.%d',std(1), std(2),r1,r2,t,itr,vote); | 20 | spec = sprintf('Nissl-std%d.%d-r%d.%d-t%d-out%d.%d',std(1), std(2),r1,r2,t,itr,vote); |
20 | fid0 = fopen(gt_filename); | 21 | fid0 = fopen(gt_filename); |
21 | gt = fread(fid0,[X Y*Z], 'single'); | 22 | gt = fread(fid0,[X Y*Z], 'single'); |
@@ -79,73 +80,73 @@ for k=1:size(id_ref,1) | @@ -79,73 +80,73 @@ for k=1:size(id_ref,1) | ||
79 | oseg = oseg+1; | 80 | oseg = oseg+1; |
80 | end | 81 | end |
81 | end | 82 | end |
82 | -%**** | ||
83 | -% check if the oversegmented detected cells could be assigned to the gt's element with no detected cell! | ||
84 | -%find the indices of gt's element with no detected cell | ||
85 | -u_idx1 = find(b==0); | ||
86 | -%find the coordinates of gt's | ||
87 | -ref1 = ref(u_idx1,:); | ||
88 | -% find the indices of detected cells which are assigned to a gt's element | ||
89 | -ub = unique(b(b>0)); | ||
90 | -% set the number of detected cell which are not assigned to a gt's element to zero | ||
91 | -mm=0; | ||
92 | -% find the indices of detected cell which are not assigned to a gt's element | ||
93 | -for j=1:size(cent,1) | ||
94 | - z = sum(ub(:)==j); | ||
95 | - if z==0 | ||
96 | - mm = mm+1; | ||
97 | - cent_id1(mm) = j; | ||
98 | - end | ||
99 | -end | ||
100 | -% find the coordinated of of detected cells | ||
101 | -cent1 = cent(cent_id1,:); | ||
102 | - | ||
103 | -% check if there are osme low enough distances | ||
104 | -[idx1, dist1] = knnsearch(cent1,ref1); | ||
105 | -ind_eq_idx_rc = zeros(numel(idx1), 10); | ||
106 | -%set number of cells with the same detected cell center to zero. | ||
107 | -noc_rc=0; | ||
108 | -for i_rc=1:numel(idx1) | ||
109 | - % check the current element hasn't considered yet | ||
110 | - s_ind_rc = sum(ind_eq_idx_rc==i_rc); | ||
111 | - if s_ind_rc==0 | ||
112 | - % check how many of the elemnts assigned to the same detected cell | ||
113 | - s_rc = sum(idx1==idx1(i_rc)); | ||
114 | - if s_rc>1 | ||
115 | - noc_rc = noc_rc+1; | ||
116 | - %save the index and number of gt's elements with the same assigned detected cells. | ||
117 | - id_ref_rc(noc_rc,1) = i_rc; | ||
118 | - id_ref_rc(noc_rc,2) = s_rc; | ||
119 | - ind1_rc = find(idx1==idx1(i_rc)); | ||
120 | - ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc; | ||
121 | - end | ||
122 | - end | ||
123 | -end | ||
124 | -% determine those indices which hs assigned to only one detected cell | ||
125 | -b_rc = idx1; | ||
126 | -u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0)); | ||
127 | -b_rc(u_eq_idx_rc)=0; | ||
128 | -% set the number of over sefgmented cells to zero. | ||
129 | -oseg_rc=0; | ||
130 | -for k_rc=1:size(id_ref_rc,1) | ||
131 | - k1_rc = id_ref_rc(k_rc,1); | ||
132 | - k2_rc = id_ref_rc(k_rc,2); | ||
133 | - %find the minimum distance to the detected cell happened for which of the gt's element | ||
134 | - l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc); | ||
135 | - [~, local_id_min_rc] = min(dist1(l_rc)); | ||
136 | - % set the element with minimum distance to the corresponding detected cell | ||
137 | - b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc); | ||
138 | - %remove the proper element from the list of indices with same designated detected cells | ||
139 | - u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0; | ||
140 | - % check if the indices remained in the list has distance less than the value is set for validation | ||
141 | - ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc)); | ||
142 | - distl_rc = dist1(ly_rc); | ||
143 | - sl_rc = sum(distl_rc<D); | ||
144 | - % if the distance is low enought, consider the corresponding cell as oversegmented | ||
145 | - if sl_rc>0 | ||
146 | - oseg_rc = oseg_rc+1; | ||
147 | - end | ||
148 | -end | 83 | +% %**** |
84 | +% % check if the oversegmented detected cells could be assigned to the gt's element with no detected cell! | ||
85 | +% %find the indices of gt's element with no detected cell | ||
86 | +% u_idx1 = find(b==0); | ||
87 | +% %find the coordinates of gt's | ||
88 | +% ref1 = ref(u_idx1,:); | ||
89 | +% % find the indices of detected cells which are assigned to a gt's element | ||
90 | +% ub = unique(b(b>0)); | ||
91 | +% % set the number of detected cell which are not assigned to a gt's element to zero | ||
92 | +% mm=0; | ||
93 | +% % find the indices of detected cell which are not assigned to a gt's element | ||
94 | +% for j=1:size(cent,1) | ||
95 | +% z = sum(ub(:)==j); | ||
96 | +% if z==0 | ||
97 | +% mm = mm+1; | ||
98 | +% cent_id1(mm) = j; | ||
99 | +% end | ||
100 | +% end | ||
101 | +% % find the coordinated of of detected cells | ||
102 | +% cent1 = cent(cent_id1,:); | ||
103 | +% | ||
104 | +% % check if there are osme low enough distances | ||
105 | +% [idx1, dist1] = knnsearch(cent1,ref1); | ||
106 | +% ind_eq_idx_rc = zeros(numel(idx1), 10); | ||
107 | +% %set number of cells with the same detected cell center to zero. | ||
108 | +% noc_rc=0; | ||
109 | +% for i_rc=1:numel(idx1) | ||
110 | +% % check the current element hasn't considered yet | ||
111 | +% s_ind_rc = sum(ind_eq_idx_rc==i_rc); | ||
112 | +% if s_ind_rc==0 | ||
113 | +% % check how many of the elemnts assigned to the same detected cell | ||
114 | +% s_rc = sum(idx1==idx1(i_rc)); | ||
115 | +% if s_rc>1 | ||
116 | +% noc_rc = noc_rc+1; | ||
117 | +% %save the index and number of gt's elements with the same assigned detected cells. | ||
118 | +% id_ref_rc(noc_rc,1) = i_rc; | ||
119 | +% id_ref_rc(noc_rc,2) = s_rc; | ||
120 | +% ind1_rc = find(idx1==idx1(i_rc)); | ||
121 | +% ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc; | ||
122 | +% end | ||
123 | +% end | ||
124 | +% end | ||
125 | +% % determine those indices which hs assigned to only one detected cell | ||
126 | +% b_rc = idx1; | ||
127 | +% u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0)); | ||
128 | +% b_rc(u_eq_idx_rc)=0; | ||
129 | +% % set the number of over sefgmented cells to zero. | ||
130 | +% oseg_rc=0; | ||
131 | +% for k_rc=1:size(id_ref_rc,1) | ||
132 | +% k1_rc = id_ref_rc(k_rc,1); | ||
133 | +% k2_rc = id_ref_rc(k_rc,2); | ||
134 | +% %find the minimum distance to the detected cell happened for which of the gt's element | ||
135 | +% l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc); | ||
136 | +% [~, local_id_min_rc] = min(dist1(l_rc)); | ||
137 | +% % set the element with minimum distance to the corresponding detected cell | ||
138 | +% b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc); | ||
139 | +% %remove the proper element from the list of indices with same designated detected cells | ||
140 | +% u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0; | ||
141 | +% % check if the indices remained in the list has distance less than the value is set for validation | ||
142 | +% ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc)); | ||
143 | +% distl_rc = dist1(ly_rc); | ||
144 | +% sl_rc = sum(distl_rc<D); | ||
145 | +% % if the distance is low enought, consider the corresponding cell as oversegmented | ||
146 | +% if sl_rc>0 | ||
147 | +% oseg_rc = oseg_rc+1; | ||
148 | +% end | ||
149 | +% end | ||
149 | 150 | ||
150 | %****** | 151 | %****** |
151 | % b include the gt's element and its detected cells, for those element with no cell detection, b has zero value | 152 | % b include the gt's element and its detected cells, for those element with no cell detection, b has zero value |
@@ -154,11 +155,11 @@ b_dist = dist; | @@ -154,11 +155,11 @@ b_dist = dist; | ||
154 | % remove the disatances for those elements with no detected cells from distance array | 155 | % remove the disatances for those elements with no detected cells from distance array |
155 | b_dist(b_ind)=-1; | 156 | b_dist(b_ind)=-1; |
156 | 157 | ||
157 | -b_ind_rc = find(b_rc==0); | ||
158 | -b_dist_rc = dist1; | ||
159 | -b_dist_rc(b_ind_rc)=-1; | 158 | +% b_ind_rc = find(b_rc==0); |
159 | +% b_dist_rc = dist1; | ||
160 | +% b_dist_rc(b_ind_rc)=-1; | ||
160 | % calculate Ttrue Positive, number of detected cells that have low enough distance to one of the gt's elements. | 161 | % calculate Ttrue Positive, number of detected cells that have low enough distance to one of the gt's elements. |
161 | -TP = sum(b_dist>=0) - sum(b_dist>D) + (sum(b_dist_rc>=0) - sum(b_dist_rc>D)); | 162 | +TP = sum(b_dist>=0) - sum(b_dist>D); % + (sum(b_dist_rc>=0) - sum(b_dist_rc>D)); |
162 | % calculate False Negative, number of gt's elements with no detected cells. | 163 | % calculate False Negative, number of gt's elements with no detected cells. |
163 | FN = size(ref, 1) - TP; | 164 | FN = size(ref, 1) - TP; |
164 | % calculate False Positive, number of detected cells, which their distance to the gt's element is long | 165 | % calculate False Positive, number of detected cells, which their distance to the gt's element is long |
@@ -179,3 +180,5 @@ fprintf(fid_text, 'Precision = %f\n', Precision); | @@ -179,3 +180,5 @@ fprintf(fid_text, 'Precision = %f\n', Precision); | ||
179 | fprintf(fid_text, 'Accuracy = %f\n', Accuracy); | 180 | fprintf(fid_text, 'Accuracy = %f\n', Accuracy); |
180 | fprintf(fid_text, 'Recall = %f\n', Recall); | 181 | fprintf(fid_text, 'Recall = %f\n', Recall); |
181 | fclose(fid_text); | 182 | fclose(fid_text); |
183 | +clear all; | ||
184 | +%end | ||
182 | \ No newline at end of file | 185 | \ No newline at end of file |
cpp/CMakeLists.txt
@@ -10,6 +10,9 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") | @@ -10,6 +10,9 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") | ||
10 | #set up CUDA | 10 | #set up CUDA |
11 | find_package(CUDA REQUIRED) | 11 | find_package(CUDA REQUIRED) |
12 | 12 | ||
13 | +#set up opencv | ||
14 | +find_package(opencv) | ||
15 | + | ||
13 | #find the STIM library | 16 | #find the STIM library |
14 | find_package(STIM REQUIRED) | 17 | find_package(STIM REQUIRED) |
15 | 18 |
cpp/cudafunc.cu
1 | +/*#include "circle_check.cuh" | ||
2 | + | ||
3 | +void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z){ | ||
4 | +gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); | ||
5 | +} | ||
6 | +*/ | ||
7 | + | ||
8 | + | ||
1 | #include "gaussian_blur3.cuh" | 9 | #include "gaussian_blur3.cuh" |
2 | #include "gradient3.cuh" | 10 | #include "gradient3.cuh" |
3 | #include "mag3.cuh" | 11 | #include "mag3.cuh" |
4 | -#include "vote3.cuh" | 12 | +#include "vote3_atomic_aabb.cuh" |
5 | #include "update_dir3.cuh" | 13 | #include "update_dir3.cuh" |
6 | #include "local_max3.cuh" | 14 | #include "local_max3.cuh" |
7 | 15 | ||
@@ -10,11 +18,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | @@ -10,11 +18,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | ||
10 | int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ | 18 | int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ |
11 | 19 | ||
12 | 20 | ||
13 | - cudaSetDevice(0); | 21 | + cudaSetDevice(1); |
14 | // compute the number of bytes in the input data | 22 | // compute the number of bytes in the input data |
15 | unsigned int bytes = x * y * z * sizeof(float); | 23 | unsigned int bytes = x * y * z * sizeof(float); |
16 | 24 | ||
17 | - //assign memory on gpu for the input data. | 25 | + //assign memory on gpu for the input data.z |
18 | float* gpuI0; | 26 | float* gpuI0; |
19 | cudaMalloc(&gpuI0, bytes); | 27 | cudaMalloc(&gpuI0, bytes); |
20 | 28 | ||
@@ -41,18 +49,17 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | @@ -41,18 +49,17 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | ||
41 | 49 | ||
42 | //call the vote function. | 50 | //call the vote function. |
43 | for (int i = 0; i < iter; i++){ | 51 | for (int i = 0; i < iter; i++){ |
44 | - | 52 | + |
53 | + cudaMemset(gpu_vote, 0, bytes); | ||
45 | gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); | 54 | gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); |
46 | cudaDeviceSynchronize(); | 55 | cudaDeviceSynchronize(); |
47 | - /*if (i==7) | ||
48 | - cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);*/ | ||
49 | - | ||
50 | - if (phi >= d_phi){ | 56 | + |
57 | + //if (phi >= d_phi){ | ||
51 | gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); | 58 | gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); |
52 | cudaDeviceSynchronize(); | 59 | cudaDeviceSynchronize(); |
53 | phi = phi - d_phi; | 60 | phi = phi - d_phi; |
54 | cos_phi = cos(phi); | 61 | cos_phi = cos(phi); |
55 | - } | 62 | + //} |
56 | 63 | ||
57 | } | 64 | } |
58 | 65 | ||
@@ -78,6 +85,8 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | @@ -78,6 +85,8 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | ||
78 | void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ | 85 | void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ |
79 | unsigned int bytes = x * y * z * sizeof(float); | 86 | unsigned int bytes = x * y * z * sizeof(float); |
80 | 87 | ||
88 | + cudaSetDevice(1); | ||
89 | + | ||
81 | //assign memory on gpu for the input data. | 90 | //assign memory on gpu for the input data. |
82 | float* gpuV; | 91 | float* gpuV; |
83 | cudaMalloc(&gpuV, bytes); | 92 | cudaMalloc(&gpuV, bytes); |
@@ -96,4 +105,5 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u | @@ -96,4 +105,5 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u | ||
96 | 105 | ||
97 | cudaFree(gpuV); | 106 | cudaFree(gpuV); |
98 | cudaFree(gpuOut); | 107 | cudaFree(gpuOut); |
99 | -} | ||
100 | \ No newline at end of file | 108 | \ No newline at end of file |
109 | +} | ||
110 | + |
cpp/main.cpp
@@ -9,6 +9,84 @@ | @@ -9,6 +9,84 @@ | ||
9 | #include <stim/image/image.h> | 9 | #include <stim/image/image.h> |
10 | #define pi 3.14159 | 10 | #define pi 3.14159 |
11 | 11 | ||
12 | +#define M_PI 3.14159 | ||
13 | +#include <stim/math/circle.h> | ||
14 | +#include <stim/math/vec3.h> | ||
15 | +#include <stim/math/plane.h> | ||
16 | +#include <stim/math/vector.h> | ||
17 | +//#include <cuda.h> | ||
18 | +//#include <stim/cuda/cudatools.h> | ||
19 | +//#include <stim/cuda/cudatools/error.h> | ||
20 | + | ||
21 | + | ||
22 | +/*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z); | ||
23 | + | ||
24 | +int main(){ | ||
25 | + | ||
26 | + int n=20; | ||
27 | + float rmax; | ||
28 | + float phi_deg; | ||
29 | + float phi; | ||
30 | + rmax=4; | ||
31 | + phi_deg = 15; | ||
32 | + phi = phi_deg * pi/180; | ||
33 | + int x,y,z; | ||
34 | + x=y=z=1; | ||
35 | + unsigned int size = x*y*z*sizeof (float); | ||
36 | + float* cpu_grad = (float*) malloc(3*size); | ||
37 | + float* gpu_grad; | ||
38 | + cudaMalloc(&gpu_grad, 3*size); | ||
39 | + cpu_grad[0]=1; | ||
40 | + cpu_grad[1]=0; | ||
41 | + cpu_grad[2]=-0.5; | ||
42 | + cudaMemcpy(gpu_grad, cpu_grad, 3*size, cudaMemcpyHostToDevice); | ||
43 | + float* cpu_out = (float*) malloc(3*size*(n+1)); | ||
44 | + float* gpu_out; | ||
45 | + cudaMalloc(&gpu_out, 3*size*(n+1)); | ||
46 | + test_3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); | ||
47 | + cudaMemcpy(cpu_out, gpu_out, 3*size*(n+1), cudaMemcpyDeviceToHost); | ||
48 | + std::ofstream list("circle_check_cuda1.txt"); | ||
49 | + if (list.is_open()){ | ||
50 | + for (int j=0; j<=n; ++j) | ||
51 | + list << cpu_out[3*j] << '\t' << cpu_out[3*j +1] << '\t' << cpu_out[3*j + 2] << '\n'; | ||
52 | + } | ||
53 | + list.close(); | ||
54 | + */ | ||
55 | + /* | ||
56 | + int n=10; | ||
57 | + stim::circle<float> cir; | ||
58 | + float* c0= (float*) malloc(3*sizeof(float)); | ||
59 | + c0[0] =-4; | ||
60 | + c0[1]=0; | ||
61 | + c0[2] = 3; | ||
62 | + stim::vec3<float> c(c0[0],c0[1],c0[2]); | ||
63 | + float len = c.len(); | ||
64 | + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len); | ||
65 | + std::cout<< len << '\n'; | ||
66 | + std::cout<< norm << '\n'; | ||
67 | + cir.center(c); | ||
68 | + cir.normal(norm); | ||
69 | + cir.scale(2); | ||
70 | + stim::vec3<float> out = cir.p(45); | ||
71 | + std::vector<stim::vec3<float>> out2 = cir.getPoints(n); | ||
72 | + | ||
73 | + std::cout<< out << '\n'; | ||
74 | + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n'; | ||
75 | + std::cout<< c << '\n'; | ||
76 | + | ||
77 | + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i) | ||
78 | + std::cout << *i << '\n'; | ||
79 | + std::ofstream list("circle_check.txt"); | ||
80 | + if (list.is_open()){ | ||
81 | + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j) | ||
82 | + list << *j << '\n'; | ||
83 | + } | ||
84 | + list.close(); | ||
85 | + std::cin >> n; | ||
86 | + | ||
87 | +} | ||
88 | + | ||
89 | +*/ | ||
12 | 90 | ||
13 | void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], | 91 | void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], |
14 | unsigned int x, unsigned int y, unsigned int z); | 92 | unsigned int x, unsigned int y, unsigned int z); |
@@ -98,7 +176,7 @@ int main(int argc, char** argv){ | @@ -98,7 +176,7 @@ int main(int argc, char** argv){ | ||
98 | float t = args["t"].as_float(); | 176 | float t = args["t"].as_float(); |
99 | //set the anisotropy | 177 | //set the anisotropy |
100 | float anisotropy = args["anisotropy"].as_float(); | 178 | float anisotropy = args["anisotropy"].as_float(); |
101 | - unsigned int rmax = 10 ; | 179 | + unsigned int rmax = 10; |
102 | unsigned int r[3] = { 12, rmax, rmax}; | 180 | unsigned int r[3] = { 12, rmax, rmax}; |
103 | float std = 5; | 181 | float std = 5; |
104 | float sigma[3] = { std, std, std}; | 182 | float sigma[3] = { std, std, std}; |
@@ -137,50 +215,51 @@ int main(int argc, char** argv){ | @@ -137,50 +215,51 @@ int main(int argc, char** argv){ | ||
137 | std::ofstream fblur("shared2D-v8/vote8.vol", std::ofstream::out | std::ofstream::binary); | 215 | std::ofstream fblur("shared2D-v8/vote8.vol", std::ofstream::out | std::ofstream::binary); |
138 | fblur.write((char*)cpuI, bytes); | 216 | fblur.write((char*)cpuI, bytes); |
139 | fblur.close(); | 217 | fblur.close(); |
140 | - /* | ||
141 | - stim::image<float>imgrad3; | ||
142 | - imgrad3.set_interleaved3(cpu_out, 128,128,128,3); | ||
143 | - std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary); | ||
144 | - fgx.write((char*)imgrad3.channel(0).data(), bytes); | ||
145 | - fgx.close(); | ||
146 | - */ | 218 | + |
219 | + //stim::image<float>imgrad3; | ||
220 | + //imgrad3.set_interleaved3(cpu_out, 128,128,128,3); | ||
221 | + //std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary); | ||
222 | + //fgx.write((char*)imgrad3.channel(0).data(), bytes); | ||
223 | + //fgx.close(); | ||
224 | + | ||
147 | //write the output file. | 225 | //write the output file. |
148 | - //for (int t0=2000; t0<=2500; t0+=100){ | 226 | + //for (int t0=0; t0<=5000; t0+=100){ |
227 | + // float t1 = t0; | ||
149 | int t0 = t; | 228 | int t0 = t; |
150 | lmax(cpu_out, cpuI, t, conn, x, y, z); | 229 | lmax(cpu_out, cpuI, t, conn, x, y, z); |
151 | //std::ofstream fo("shared2D-v8/" + OutName.str(), std::ofstream::out | std::ofstream::binary); | 230 | //std::ofstream fo("shared2D-v8/" + OutName.str(), std::ofstream::out | std::ofstream::binary); |
152 | - std::ofstream fo("shared2D-v8/" + OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary); | 231 | + std::ofstream fo( OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary); |
153 | fo.write((char*)cpu_out, bytes); | 232 | fo.write((char*)cpu_out, bytes); |
154 | fo.close(); | 233 | fo.close(); |
155 | 234 | ||
156 | // creat a file for saving the list centers | 235 | // creat a file for saving the list centers |
157 | 236 | ||
158 | - std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj"); | ||
159 | - // set the number of detected cells to zero. | ||
160 | - int nod = 0; | ||
161 | - if (list.is_open()){ | ||
162 | - | ||
163 | - for (int iz=0; iz<z; iz++){ | ||
164 | - for (int iy=0; iy<y; iy++){ | ||
165 | - for (int ix=0; ix<x; ix++){ | ||
166 | - | ||
167 | - int idx = iz * x * y + iy * x + ix; | ||
168 | - if (cpu_out[idx]==1){ | ||
169 | - nod++; | ||
170 | - list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ; | ||
171 | - | ||
172 | - } | ||
173 | - } | ||
174 | - } | ||
175 | - } | ||
176 | - list << "p" << "\t"; | ||
177 | - for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){ | ||
178 | - list << i_nod << "\t"; | ||
179 | - } | 237 | + //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj"); |
238 | + //// set the number of detected cells to zero. | ||
239 | + //int nod = 0; | ||
240 | + //if (list.is_open()){ | ||
180 | 241 | ||
181 | - list.close(); | ||
182 | - } | ||
183 | - | 242 | + // for (int iz=0; iz<z; iz++){ |
243 | + // for (int iy=0; iy<y; iy++){ | ||
244 | + // for (int ix=0; ix<x; ix++){ | ||
245 | + | ||
246 | + // int idx = iz * x * y + iy * x + ix; | ||
247 | + // if (cpu_out[idx]==1){ | ||
248 | + // nod++; | ||
249 | + // list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ; | ||
250 | + // | ||
251 | + // } | ||
252 | + // } | ||
253 | + // } | ||
254 | + // } | ||
255 | + // list << "p" << "\t"; | ||
256 | + // for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){ | ||
257 | + // list << i_nod << "\t"; | ||
258 | + // } | ||
259 | + | ||
260 | + //list.close(); | ||
261 | + //} | ||
262 | + //} | ||
184 | cudaDeviceReset(); | 263 | cudaDeviceReset(); |
185 | 264 | ||
186 | } | 265 | } |
1 | +#ifndef STIM_CUDA_UPDATE_DIR3_AABB_H | ||
2 | +#define STIM_CUDA_UPDATE_DIR3_AABB_H | ||
3 | + | ||
4 | +# include <iostream> | ||
5 | +# include <cuda.h> | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | +#include "cpyToshare.cuh" | ||
8 | +#define M_PI 3.14159 | ||
9 | +#include <stim/math/circle.h> | ||
10 | +#include <stim/math/vec3.h> | ||
11 | +#include <stim/math/plane.h> | ||
12 | +#include <stim/math/vector.h> | ||
13 | +#include <stim/visualization/aabb3.h> | ||
14 | + | ||
15 | + // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. | ||
16 | + template<typename T> | ||
17 | + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | ||
18 | + //extern __shared__ float s_vote[]; | ||
19 | + | ||
20 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | ||
21 | + | ||
22 | + int grid_y = y / blockDim.y; //find the grid size along y | ||
23 | + int blockidx_y = blockIdx.y % grid_y; | ||
24 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
25 | + int zi = blockIdx.y / grid_y; | ||
26 | + if(xi >= x|| yi >= y || zi>= z) return; | ||
27 | + int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel | ||
28 | + | ||
29 | + | ||
30 | + // find the starting points for this block along the x and y directions | ||
31 | + //int bxi = blockIdx.x * blockDim.x; | ||
32 | + //int byi = blockidx_y * blockDim.y; | ||
33 | + //find the starting points and the size of the window, which will be copied to the 2D-shared memory | ||
34 | + //int bxs = bxi - rx; | ||
35 | + //int bys = byi - ry; | ||
36 | + //int xwidth = 2 * rx + blockDim.x; | ||
37 | + //int ywidth = 2 * ry + blockDim.y; | ||
38 | + //compute the coordinations of this pixel in the 2D-shared memory. | ||
39 | + //int sx_rx = threadIdx.x + rx; | ||
40 | + //int sy_ry = threadIdx.y + ry; | ||
41 | + | ||
42 | + float rx_sq = rx * rx; // compute the square for rmax | ||
43 | + float ry_sq = ry * ry; | ||
44 | + float rz_sq = rz * rz; | ||
45 | + | ||
46 | + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector | ||
47 | + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | ||
48 | + int n =4; //set the number of points to find the boundaries of the conical voting area | ||
49 | + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter | ||
50 | + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; | ||
51 | + float zc = rz * cos(g_sph[2]) ; | ||
52 | + float r = sqrt(xc*xc + yc*yc + zc*zc); | ||
53 | + xc+=xi; | ||
54 | + yc+=yi; | ||
55 | + zc+=zi; | ||
56 | + stim::vec3<float> center(xc,yc,zc); | ||
57 | + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | ||
58 | + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | ||
59 | + float step = 360.0/(float) n; | ||
60 | + stim::circle<float> cir(center, d, norm); | ||
61 | + stim::aabb3<int> bb(xi,yi,zi); | ||
62 | + bb.insert(xc,yc,zc); | ||
63 | + for(float j = 0; j <360.0; j += step){ | ||
64 | + stim::vec3<float> out = cir.p(j); | ||
65 | + bb.insert(out[0], out[1], out[2]); | ||
66 | + } | ||
67 | + | ||
68 | + bb.trim_low(0,0,0); | ||
69 | + bb.trim_high(x-1, y-1, z-1); | ||
70 | + int bx,by,bz; | ||
71 | + int dx, dy, dz; | ||
72 | + float dx_sq, dy_sq, dz_sq; | ||
73 | + | ||
74 | + float dist, cos_diff; | ||
75 | + int idx_c; | ||
76 | + | ||
77 | + float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter | ||
78 | + float l_vote = 0; | ||
79 | + | ||
80 | + float id_x = g[0]; // define local variables for the x, y, and z coordinations point to the vote direction | ||
81 | + float id_y = g[1]; | ||
82 | + float id_z = g[2]; | ||
83 | + | ||
84 | + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ | ||
85 | + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis | ||
86 | + dz_sq = dz * dz; | ||
87 | + for (by=bb.low[1]; by<=bb.high[1]; by++){ | ||
88 | + dy = by - yi; //compute the distance bw the voter and the current counter along y axis | ||
89 | + dy_sq = dy * dy; | ||
90 | + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){ | ||
91 | + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis | ||
92 | + dx_sq = dx * dx; | ||
93 | + | ||
94 | + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter | ||
95 | + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter | ||
96 | + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter | ||
97 | + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter | ||
98 | + l_vote = gpu_vote[idx_c]; | ||
99 | + if (l_vote>max) { | ||
100 | + max = l_vote; | ||
101 | + id_x = dx; | ||
102 | + id_y = dy; | ||
103 | + id_z = dz; | ||
104 | + } | ||
105 | + } | ||
106 | + } | ||
107 | + } | ||
108 | + } | ||
109 | + float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z); | ||
110 | + gpu_dir[i * 3 + 0] = g_sph[0] * (id_x/m_id); | ||
111 | + gpu_dir[i * 3 + 1] = g_sph[0] * (id_y/m_id); | ||
112 | + gpu_dir[i * 3 + 2] = g_sph[0] * (id_z/m_id); | ||
113 | + } | ||
114 | + | ||
115 | + | ||
116 | + | ||
117 | + // this kernel updates the gradient direction by the calculated voting direction. | ||
118 | + template<typename T> | ||
119 | + __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){ | ||
120 | + | ||
121 | + //calculate x,y,z coordinates for this thread | ||
122 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
123 | + //find the grid size along y | ||
124 | + int grid_y = y / blockDim.y; | ||
125 | + int blockidx_y = blockIdx.y % grid_y; | ||
126 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
127 | + int zi = blockIdx.y / grid_y; | ||
128 | + int i = zi * x * y + yi * x + xi; | ||
129 | + | ||
130 | + if(xi >= x || yi >= y || zi >= z) return; | ||
131 | + //update the gradient image with the new direction direction | ||
132 | + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; | ||
133 | + gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; | ||
134 | + gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; | ||
135 | + } | ||
136 | + | ||
137 | + template<typename T> | ||
138 | + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | ||
139 | + | ||
140 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
141 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | ||
142 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | ||
143 | + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T); | ||
144 | + // allocate space on the GPU for the updated vote direction | ||
145 | + T* gpu_dir; | ||
146 | + cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); | ||
147 | + | ||
148 | + //call the kernel to calculate the new voting direction | ||
149 | + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z); | ||
150 | + | ||
151 | + | ||
152 | + //call the kernel to update the gradient direction | ||
153 | + update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); | ||
154 | + | ||
155 | + //free allocated memory | ||
156 | + cudaFree(gpu_dir); | ||
157 | + | ||
158 | + } | ||
159 | + | ||
160 | + template<typename T> | ||
161 | + void cpu_update_dir3(T* cpu_grad, T* cpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | ||
162 | + | ||
163 | + //calculate the number of bytes in the array | ||
164 | + unsigned int bytes = x * y * z * sizeof(T); | ||
165 | + | ||
166 | + //allocate space on the GPU for the Vote data | ||
167 | + T* gpu_vote; | ||
168 | + cudaMalloc(&gpu_vote, bytes); | ||
169 | + | ||
170 | + //copy the input vote data to the GPU | ||
171 | + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice); | ||
172 | + | ||
173 | + //allocate space on the GPU for the Gradient data | ||
174 | + T* gpu_grad; | ||
175 | + cudaMalloc(&gpu_grad, bytes*3); | ||
176 | + | ||
177 | + //copy the Gradient data to the GPU | ||
178 | + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); | ||
179 | + | ||
180 | + //call the GPU version of the update direction function | ||
181 | + gpu_update_dir3<T>(gpu_grad, gpu_vote, cos_phi, r, x , y, z); | ||
182 | + | ||
183 | + //copy the new gradient image back to the CPU | ||
184 | + cudaMemcpy(cpu_grad, gpu_grad, bytes*3, cudaMemcpyDeviceToHost) ; | ||
185 | + | ||
186 | + //free allocated memory | ||
187 | + cudaFree(gpu_vote); | ||
188 | + cudaFree(gpu_grad); | ||
189 | + } | ||
190 | + | ||
191 | + | ||
192 | + | ||
193 | +#endif | ||
0 | \ No newline at end of file | 194 | \ No newline at end of file |
1 | +#ifndef STIM_CUDA_VOTE3_ATOMIC_H | ||
2 | +#define STIM_CUDA_VOTE3_ATOMIC_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | +#include <stim/cuda/cudatools/error.h> | ||
8 | +#include "cpyToshare.cuh" | ||
9 | + | ||
10 | + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area | ||
11 | + template<typename T> | ||
12 | + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | ||
13 | + | ||
14 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | ||
15 | + | ||
16 | + int grid_y = y / blockDim.y; //find the grid size along y | ||
17 | + int blockidx_y = blockIdx.y % grid_y; | ||
18 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
19 | + int zi = blockIdx.y / grid_y; | ||
20 | + | ||
21 | + if(xi>=x || yi>=y || zi>=z) return; | ||
22 | + | ||
23 | + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter | ||
24 | + float gx_v = gpu_grad[3*i]; // find the gradient information in cartesian coordinate for the voter - global memory fetch | ||
25 | + float gy_v = gpu_grad[3*i+1]; | ||
26 | + float gz_v = gpu_grad[3*i+2]; | ||
27 | + | ||
28 | + float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter | ||
29 | + | ||
30 | + float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter | ||
31 | + float gy_v_n = gy_v/mag_v; | ||
32 | + float gz_v_n = gz_v/mag_v; | ||
33 | + | ||
34 | + float rx_sq = rx * rx; // compute the square for rmax | ||
35 | + float ry_sq = ry * ry; | ||
36 | + float rz_sq = rz * rz; | ||
37 | + float x_sq, y_sq, z_sq, d_c, cos_diff; | ||
38 | + int xi_c, yi_c, zi_c, idx_c; | ||
39 | + | ||
40 | + for (int z_c=-rz; z_c<=rz; z_c++){ | ||
41 | + zi_c = zi + z_c; // calculate the z position for the current counter | ||
42 | + if (zi_c <z && zi_c>=0){ // make sure the current counter is inside the image | ||
43 | + z_sq = z_c * z_c; | ||
44 | + for (int y_c=-ry; y_c<=ry; y_c++){ | ||
45 | + yi_c = yi + y_c; | ||
46 | + if (yi_c < y && yi_c>=0){ | ||
47 | + y_sq = y_c * y_c; | ||
48 | + for (int x_c=-rx; x_c<=rx; x_c++){ | ||
49 | + xi_c = xi + x_c; | ||
50 | + if (xi_c < x && xi_c>=0){ | ||
51 | + x_sq = x_c * x_c; | ||
52 | + d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter | ||
53 | + cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter | ||
54 | + if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ | ||
55 | + idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter | ||
56 | + atomicAdd (&gpu_vote[idx_c] , mag_v); | ||
57 | + } | ||
58 | + } | ||
59 | + } | ||
60 | + } | ||
61 | + } | ||
62 | + } | ||
63 | + } | ||
64 | + } | ||
65 | + | ||
66 | + template<typename T> | ||
67 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | ||
68 | + | ||
69 | + | ||
70 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
71 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | ||
72 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | ||
73 | + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*4*sizeof(T); | ||
74 | + //call the kernel to do the voting | ||
75 | + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); | ||
76 | + | ||
77 | + } | ||
78 | + | ||
79 | + | ||
80 | + template<typename T> | ||
81 | + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | ||
82 | + | ||
83 | + //calculate the number of bytes in the array | ||
84 | + unsigned int bytes = x * y * z * sizeof(T); | ||
85 | + | ||
86 | + | ||
87 | + //allocate space on the GPU for the Vote Image | ||
88 | + T* gpu_vote; | ||
89 | + cudaMalloc(&gpu_vote, bytes); | ||
90 | + | ||
91 | + //allocate space on the GPU for the input Gradient image | ||
92 | + T* gpu_grad; | ||
93 | + cudaMalloc(&gpu_grad, bytes*3); | ||
94 | + | ||
95 | + | ||
96 | + //copy the Gradient data to the GPU | ||
97 | + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); | ||
98 | + | ||
99 | + | ||
100 | + //call the GPU version of the vote calculation function | ||
101 | + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z); | ||
102 | + | ||
103 | + //copy the Vote Data back to the CPU | ||
104 | + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ; | ||
105 | + | ||
106 | + //free allocated memory | ||
107 | + cudaFree(gpu_vote); | ||
108 | + cudaFree(gpu_grad); | ||
109 | + | ||
110 | + } | ||
111 | + | ||
112 | + | ||
113 | +#endif | ||
0 | \ No newline at end of file | 114 | \ No newline at end of file |
1 | +#ifndef STIM_CUDA_VOTE3_ATOMIC_AABB_H | ||
2 | +#define STIM_CUDA_VOTE3_ATOMIC_AABB_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | +#include <stim/cuda/cudatools/error.h> | ||
8 | +#include "cpyToshare.cuh" | ||
9 | +#define M_PI 3.14159 | ||
10 | +#include <stim/math/circle.h> | ||
11 | +#include <stim/math/vec3.h> | ||
12 | +#include <stim/math/plane.h> | ||
13 | +#include <stim/math/vector.h> | ||
14 | +#include <stim/visualization/aabb3.h> | ||
15 | + | ||
16 | + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area | ||
17 | + template<typename T> | ||
18 | + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | ||
19 | + | ||
20 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | ||
21 | + | ||
22 | + int grid_y = y / blockDim.y; //find the grid size along y | ||
23 | + int blockidx_y = blockIdx.y % grid_y; | ||
24 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
25 | + int zi = blockIdx.y / grid_y; | ||
26 | + | ||
27 | + if(xi>=x || yi>=y || zi>=z) return; | ||
28 | + | ||
29 | + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter | ||
30 | + | ||
31 | + | ||
32 | + float rx_sq = rx * rx; // compute the square for rmax | ||
33 | + float ry_sq = ry * ry; | ||
34 | + float rz_sq = rz * rz; | ||
35 | + float dist, cos_diff; | ||
36 | + int idx_c; | ||
37 | + | ||
38 | + //float rmax = sqrt(rx_sq + ry_sq + rz_sq); | ||
39 | + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector | ||
40 | + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | ||
41 | + int n =4; //set the number of points to find the boundaries of the conical voting area | ||
42 | + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter | ||
43 | + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; | ||
44 | + float zc = rz * cos(g_sph[2]) ; | ||
45 | + float r = sqrt(xc*xc + yc*yc + zc*zc); | ||
46 | + xc+=xi; | ||
47 | + yc+=yi; | ||
48 | + zc+=zi; | ||
49 | + stim::vec3<float> center(xc,yc,zc); | ||
50 | + | ||
51 | + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | ||
52 | + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | ||
53 | + float step = 360.0/(float) n; | ||
54 | + stim::circle<float> cir(center, d, norm); | ||
55 | + stim::aabb3<int> bb(xi,yi,zi); | ||
56 | + bb.insert(xc,yc,zc); | ||
57 | + for(float j = 0; j <360.0; j += step){ | ||
58 | + stim::vec3<float> out = cir.p(j); | ||
59 | + bb.insert(out[0], out[1], out[2]); | ||
60 | + } | ||
61 | + | ||
62 | + bb.trim_low(0,0,0); | ||
63 | + bb.trim_high(x-1, y-1, z-1); | ||
64 | + int bx,by,bz; | ||
65 | + int dx, dy, dz; | ||
66 | + float dx_sq, dy_sq, dz_sq; | ||
67 | + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ | ||
68 | + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis | ||
69 | + dz_sq = dz * dz; | ||
70 | + for (by=bb.low[1]; by<=bb.high[1]; by++){ | ||
71 | + dy = by - yi; //compute the distance bw the voter and the current counter along y axis | ||
72 | + dy_sq = dy * dy; | ||
73 | + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){ | ||
74 | + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis | ||
75 | + dx_sq = dx * dx; | ||
76 | + | ||
77 | + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter | ||
78 | + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter | ||
79 | + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter | ||
80 | + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter | ||
81 | + atomicAdd (&gpu_vote[idx_c] , g_sph[0]); | ||
82 | + } | ||
83 | + } | ||
84 | + } | ||
85 | + } | ||
86 | + } | ||
87 | + | ||
88 | + template<typename T> | ||
89 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | ||
90 | + | ||
91 | + | ||
92 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
93 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | ||
94 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | ||
95 | + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting | ||
96 | + | ||
97 | + } | ||
98 | + | ||
99 | + | ||
100 | + template<typename T> | ||
101 | + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | ||
102 | + | ||
103 | + //calculate the number of bytes in the array | ||
104 | + unsigned int bytes = x * y * z * sizeof(T); | ||
105 | + | ||
106 | + | ||
107 | + //allocate space on the GPU for the Vote Image | ||
108 | + T* gpu_vote; | ||
109 | + cudaMalloc(&gpu_vote, bytes); | ||
110 | + | ||
111 | + //allocate space on the GPU for the input Gradient image | ||
112 | + T* gpu_grad; | ||
113 | + cudaMalloc(&gpu_grad, bytes*3); | ||
114 | + | ||
115 | + | ||
116 | + //copy the Gradient data to the GPU | ||
117 | + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); | ||
118 | + | ||
119 | + | ||
120 | + //call the GPU version of the vote calculation function | ||
121 | + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z); | ||
122 | + | ||
123 | + //copy the Vote Data back to the CPU | ||
124 | + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ; | ||
125 | + | ||
126 | + //free allocated memory | ||
127 | + cudaFree(gpu_vote); | ||
128 | + cudaFree(gpu_grad); | ||
129 | + | ||
130 | + } | ||
131 | + | ||
132 | + | ||
133 | +#endif | ||
0 | \ No newline at end of file | 134 | \ No newline at end of file |