Commit a744d027c4d9d6583db594dc75811c6ff90eaef6

Authored by Laila Saadatifard
1 parent 46820b25

upload the ivote3 functions using the new method for finding voting area (bounda…

…ries of voting area)'
Matlab_3D/gt2.vol 0 → 100644
No preview for this file type
Matlab_3D/validation.m
1 clear all; 1 clear all;
2 -disp('***************** NEW RUN *********************'); 2 +%for t=100:100:5000
  3 +t=2350;
3 X = 128; 4 X = 128;
4 Y = 128; 5 Y = 128;
5 Z = 128; 6 Z = 128;
@@ -7,15 +8,15 @@ D = 10; @@ -7,15 +8,15 @@ D = 10;
7 t0=1; 8 t0=1;
8 r1=12; 9 r1=12;
9 r2=10; 10 r2=10;
10 -t=2300;  
11 -itr=8;  
12 -vote=10; 11 +% t=2300;
  12 +itr=5;
  13 +vote=7;
13 std = [5 5]; 14 std = [5 5];
14 gt_filename = 'gt2.vol'; 15 gt_filename = 'gt2.vol';
15 % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); 16 % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote);
16 -out_filename = sprintf('D:/build/ivote3-bld/shared2D-v8/centers.%d.vol',t); 17 +out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t);
17 % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); 18 % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D);
18 -txt_filename = sprintf('0-t%d--1.txt',t); 19 +txt_filename = sprintf('D:/build/ivote3-bld/0-t%d-atomic-aabb.txt',t);
19 spec = sprintf('Nissl-std%d.%d-r%d.%d-t%d-out%d.%d',std(1), std(2),r1,r2,t,itr,vote); 20 spec = sprintf('Nissl-std%d.%d-r%d.%d-t%d-out%d.%d',std(1), std(2),r1,r2,t,itr,vote);
20 fid0 = fopen(gt_filename); 21 fid0 = fopen(gt_filename);
21 gt = fread(fid0,[X Y*Z], 'single'); 22 gt = fread(fid0,[X Y*Z], 'single');
@@ -79,73 +80,73 @@ for k=1:size(id_ref,1) @@ -79,73 +80,73 @@ for k=1:size(id_ref,1)
79 oseg = oseg+1; 80 oseg = oseg+1;
80 end 81 end
81 end 82 end
82 -%****  
83 -% check if the oversegmented detected cells could be assigned to the gt's element with no detected cell!  
84 -%find the indices of gt's element with no detected cell  
85 -u_idx1 = find(b==0);  
86 -%find the coordinates of gt's  
87 -ref1 = ref(u_idx1,:);  
88 -% find the indices of detected cells which are assigned to a gt's element  
89 -ub = unique(b(b>0));  
90 -% set the number of detected cell which are not assigned to a gt's element to zero  
91 -mm=0;  
92 -% find the indices of detected cell which are not assigned to a gt's element  
93 -for j=1:size(cent,1)  
94 - z = sum(ub(:)==j);  
95 - if z==0  
96 - mm = mm+1;  
97 - cent_id1(mm) = j;  
98 - end  
99 -end  
100 -% find the coordinated of of detected cells  
101 -cent1 = cent(cent_id1,:);  
102 -  
103 -% check if there are osme low enough distances  
104 -[idx1, dist1] = knnsearch(cent1,ref1);  
105 -ind_eq_idx_rc = zeros(numel(idx1), 10);  
106 -%set number of cells with the same detected cell center to zero.  
107 -noc_rc=0;  
108 -for i_rc=1:numel(idx1)  
109 - % check the current element hasn't considered yet  
110 - s_ind_rc = sum(ind_eq_idx_rc==i_rc);  
111 - if s_ind_rc==0  
112 - % check how many of the elemnts assigned to the same detected cell  
113 - s_rc = sum(idx1==idx1(i_rc));  
114 - if s_rc>1  
115 - noc_rc = noc_rc+1;  
116 - %save the index and number of gt's elements with the same assigned detected cells.  
117 - id_ref_rc(noc_rc,1) = i_rc;  
118 - id_ref_rc(noc_rc,2) = s_rc;  
119 - ind1_rc = find(idx1==idx1(i_rc));  
120 - ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc;  
121 - end  
122 - end  
123 -end  
124 -% determine those indices which hs assigned to only one detected cell  
125 -b_rc = idx1;  
126 -u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0));  
127 -b_rc(u_eq_idx_rc)=0;  
128 -% set the number of over sefgmented cells to zero.  
129 -oseg_rc=0;  
130 -for k_rc=1:size(id_ref_rc,1)  
131 - k1_rc = id_ref_rc(k_rc,1);  
132 - k2_rc = id_ref_rc(k_rc,2);  
133 - %find the minimum distance to the detected cell happened for which of the gt's element  
134 - l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc);  
135 - [~, local_id_min_rc] = min(dist1(l_rc));  
136 - % set the element with minimum distance to the corresponding detected cell  
137 - b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc);  
138 - %remove the proper element from the list of indices with same designated detected cells  
139 - u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0;  
140 - % check if the indices remained in the list has distance less than the value is set for validation  
141 - ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc));  
142 - distl_rc = dist1(ly_rc);  
143 - sl_rc = sum(distl_rc<D);  
144 - % if the distance is low enought, consider the corresponding cell as oversegmented  
145 - if sl_rc>0  
146 - oseg_rc = oseg_rc+1;  
147 - end  
148 -end 83 +% %****
  84 +% % check if the oversegmented detected cells could be assigned to the gt's element with no detected cell!
  85 +% %find the indices of gt's element with no detected cell
  86 +% u_idx1 = find(b==0);
  87 +% %find the coordinates of gt's
  88 +% ref1 = ref(u_idx1,:);
  89 +% % find the indices of detected cells which are assigned to a gt's element
  90 +% ub = unique(b(b>0));
  91 +% % set the number of detected cell which are not assigned to a gt's element to zero
  92 +% mm=0;
  93 +% % find the indices of detected cell which are not assigned to a gt's element
  94 +% for j=1:size(cent,1)
  95 +% z = sum(ub(:)==j);
  96 +% if z==0
  97 +% mm = mm+1;
  98 +% cent_id1(mm) = j;
  99 +% end
  100 +% end
  101 +% % find the coordinated of of detected cells
  102 +% cent1 = cent(cent_id1,:);
  103 +%
  104 +% % check if there are osme low enough distances
  105 +% [idx1, dist1] = knnsearch(cent1,ref1);
  106 +% ind_eq_idx_rc = zeros(numel(idx1), 10);
  107 +% %set number of cells with the same detected cell center to zero.
  108 +% noc_rc=0;
  109 +% for i_rc=1:numel(idx1)
  110 +% % check the current element hasn't considered yet
  111 +% s_ind_rc = sum(ind_eq_idx_rc==i_rc);
  112 +% if s_ind_rc==0
  113 +% % check how many of the elemnts assigned to the same detected cell
  114 +% s_rc = sum(idx1==idx1(i_rc));
  115 +% if s_rc>1
  116 +% noc_rc = noc_rc+1;
  117 +% %save the index and number of gt's elements with the same assigned detected cells.
  118 +% id_ref_rc(noc_rc,1) = i_rc;
  119 +% id_ref_rc(noc_rc,2) = s_rc;
  120 +% ind1_rc = find(idx1==idx1(i_rc));
  121 +% ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc;
  122 +% end
  123 +% end
  124 +% end
  125 +% % determine those indices which hs assigned to only one detected cell
  126 +% b_rc = idx1;
  127 +% u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0));
  128 +% b_rc(u_eq_idx_rc)=0;
  129 +% % set the number of over sefgmented cells to zero.
  130 +% oseg_rc=0;
  131 +% for k_rc=1:size(id_ref_rc,1)
  132 +% k1_rc = id_ref_rc(k_rc,1);
  133 +% k2_rc = id_ref_rc(k_rc,2);
  134 +% %find the minimum distance to the detected cell happened for which of the gt's element
  135 +% l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc);
  136 +% [~, local_id_min_rc] = min(dist1(l_rc));
  137 +% % set the element with minimum distance to the corresponding detected cell
  138 +% b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc);
  139 +% %remove the proper element from the list of indices with same designated detected cells
  140 +% u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0;
  141 +% % check if the indices remained in the list has distance less than the value is set for validation
  142 +% ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc));
  143 +% distl_rc = dist1(ly_rc);
  144 +% sl_rc = sum(distl_rc<D);
  145 +% % if the distance is low enought, consider the corresponding cell as oversegmented
  146 +% if sl_rc>0
  147 +% oseg_rc = oseg_rc+1;
  148 +% end
  149 +% end
149 150
150 %****** 151 %******
151 % b include the gt's element and its detected cells, for those element with no cell detection, b has zero value 152 % b include the gt's element and its detected cells, for those element with no cell detection, b has zero value
@@ -154,11 +155,11 @@ b_dist = dist; @@ -154,11 +155,11 @@ b_dist = dist;
154 % remove the disatances for those elements with no detected cells from distance array 155 % remove the disatances for those elements with no detected cells from distance array
155 b_dist(b_ind)=-1; 156 b_dist(b_ind)=-1;
156 157
157 -b_ind_rc = find(b_rc==0);  
158 -b_dist_rc = dist1;  
159 -b_dist_rc(b_ind_rc)=-1; 158 +% b_ind_rc = find(b_rc==0);
  159 +% b_dist_rc = dist1;
  160 +% b_dist_rc(b_ind_rc)=-1;
160 % calculate Ttrue Positive, number of detected cells that have low enough distance to one of the gt's elements. 161 % calculate Ttrue Positive, number of detected cells that have low enough distance to one of the gt's elements.
161 -TP = sum(b_dist>=0) - sum(b_dist>D) + (sum(b_dist_rc>=0) - sum(b_dist_rc>D)); 162 +TP = sum(b_dist>=0) - sum(b_dist>D); % + (sum(b_dist_rc>=0) - sum(b_dist_rc>D));
162 % calculate False Negative, number of gt's elements with no detected cells. 163 % calculate False Negative, number of gt's elements with no detected cells.
163 FN = size(ref, 1) - TP; 164 FN = size(ref, 1) - TP;
164 % calculate False Positive, number of detected cells, which their distance to the gt's element is long 165 % calculate False Positive, number of detected cells, which their distance to the gt's element is long
@@ -179,3 +180,5 @@ fprintf(fid_text, &#39;Precision = %f\n&#39;, Precision); @@ -179,3 +180,5 @@ fprintf(fid_text, &#39;Precision = %f\n&#39;, Precision);
179 fprintf(fid_text, 'Accuracy = %f\n', Accuracy); 180 fprintf(fid_text, 'Accuracy = %f\n', Accuracy);
180 fprintf(fid_text, 'Recall = %f\n', Recall); 181 fprintf(fid_text, 'Recall = %f\n', Recall);
181 fclose(fid_text); 182 fclose(fid_text);
  183 +clear all;
  184 +%end
182 \ No newline at end of file 185 \ No newline at end of file
cpp/CMakeLists.txt
@@ -10,6 +10,9 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} &quot;${CMAKE_SOURCE_DIR}&quot;) @@ -10,6 +10,9 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} &quot;${CMAKE_SOURCE_DIR}&quot;)
10 #set up CUDA 10 #set up CUDA
11 find_package(CUDA REQUIRED) 11 find_package(CUDA REQUIRED)
12 12
  13 +#set up opencv
  14 +find_package(opencv)
  15 +
13 #find the STIM library 16 #find the STIM library
14 find_package(STIM REQUIRED) 17 find_package(STIM REQUIRED)
15 18
cpp/args.txt 0 → 100644
  1 +nissl-float-128.128.128.vol 0-out --t 2350 --x 128 --y 128 --z 128 --anisotropy 1 --invert
0 \ No newline at end of file 2 \ No newline at end of file
  1 +/*#include "circle_check.cuh"
  2 +
  3 +void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z){
  4 +gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z);
  5 +}
  6 +*/
  7 +
  8 +
1 #include "gaussian_blur3.cuh" 9 #include "gaussian_blur3.cuh"
2 #include "gradient3.cuh" 10 #include "gradient3.cuh"
3 #include "mag3.cuh" 11 #include "mag3.cuh"
4 -#include "vote3.cuh" 12 +#include "vote3_atomic_aabb.cuh"
5 #include "update_dir3.cuh" 13 #include "update_dir3.cuh"
6 #include "local_max3.cuh" 14 #include "local_max3.cuh"
7 15
@@ -10,11 +18,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, @@ -10,11 +18,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
10 int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ 18 int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
11 19
12 20
13 - cudaSetDevice(0); 21 + cudaSetDevice(1);
14 // compute the number of bytes in the input data 22 // compute the number of bytes in the input data
15 unsigned int bytes = x * y * z * sizeof(float); 23 unsigned int bytes = x * y * z * sizeof(float);
16 24
17 - //assign memory on gpu for the input data. 25 + //assign memory on gpu for the input data.z
18 float* gpuI0; 26 float* gpuI0;
19 cudaMalloc(&gpuI0, bytes); 27 cudaMalloc(&gpuI0, bytes);
20 28
@@ -41,18 +49,17 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, @@ -41,18 +49,17 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
41 49
42 //call the vote function. 50 //call the vote function.
43 for (int i = 0; i < iter; i++){ 51 for (int i = 0; i < iter; i++){
44 - 52 +
  53 + cudaMemset(gpu_vote, 0, bytes);
45 gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); 54 gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z);
46 cudaDeviceSynchronize(); 55 cudaDeviceSynchronize();
47 - /*if (i==7)  
48 - cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);*/  
49 -  
50 - if (phi >= d_phi){ 56 +
  57 + //if (phi >= d_phi){
51 gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); 58 gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z);
52 cudaDeviceSynchronize(); 59 cudaDeviceSynchronize();
53 phi = phi - d_phi; 60 phi = phi - d_phi;
54 cos_phi = cos(phi); 61 cos_phi = cos(phi);
55 - } 62 + //}
56 63
57 } 64 }
58 65
@@ -78,6 +85,8 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, @@ -78,6 +85,8 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
78 void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ 85 void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
79 unsigned int bytes = x * y * z * sizeof(float); 86 unsigned int bytes = x * y * z * sizeof(float);
80 87
  88 + cudaSetDevice(1);
  89 +
81 //assign memory on gpu for the input data. 90 //assign memory on gpu for the input data.
82 float* gpuV; 91 float* gpuV;
83 cudaMalloc(&gpuV, bytes); 92 cudaMalloc(&gpuV, bytes);
@@ -96,4 +105,5 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u @@ -96,4 +105,5 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u
96 105
97 cudaFree(gpuV); 106 cudaFree(gpuV);
98 cudaFree(gpuOut); 107 cudaFree(gpuOut);
99 -}  
100 \ No newline at end of file 108 \ No newline at end of file
  109 +}
  110 +
@@ -9,6 +9,84 @@ @@ -9,6 +9,84 @@
9 #include <stim/image/image.h> 9 #include <stim/image/image.h>
10 #define pi 3.14159 10 #define pi 3.14159
11 11
  12 +#define M_PI 3.14159
  13 +#include <stim/math/circle.h>
  14 +#include <stim/math/vec3.h>
  15 +#include <stim/math/plane.h>
  16 +#include <stim/math/vector.h>
  17 +//#include <cuda.h>
  18 +//#include <stim/cuda/cudatools.h>
  19 +//#include <stim/cuda/cudatools/error.h>
  20 +
  21 +
  22 +/*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z);
  23 +
  24 +int main(){
  25 +
  26 + int n=20;
  27 + float rmax;
  28 + float phi_deg;
  29 + float phi;
  30 + rmax=4;
  31 + phi_deg = 15;
  32 + phi = phi_deg * pi/180;
  33 + int x,y,z;
  34 + x=y=z=1;
  35 + unsigned int size = x*y*z*sizeof (float);
  36 + float* cpu_grad = (float*) malloc(3*size);
  37 + float* gpu_grad;
  38 + cudaMalloc(&gpu_grad, 3*size);
  39 + cpu_grad[0]=1;
  40 + cpu_grad[1]=0;
  41 + cpu_grad[2]=-0.5;
  42 + cudaMemcpy(gpu_grad, cpu_grad, 3*size, cudaMemcpyHostToDevice);
  43 + float* cpu_out = (float*) malloc(3*size*(n+1));
  44 + float* gpu_out;
  45 + cudaMalloc(&gpu_out, 3*size*(n+1));
  46 + test_3(gpu_out, gpu_grad, rmax, phi, n, x, y, z);
  47 + cudaMemcpy(cpu_out, gpu_out, 3*size*(n+1), cudaMemcpyDeviceToHost);
  48 + std::ofstream list("circle_check_cuda1.txt");
  49 + if (list.is_open()){
  50 + for (int j=0; j<=n; ++j)
  51 + list << cpu_out[3*j] << '\t' << cpu_out[3*j +1] << '\t' << cpu_out[3*j + 2] << '\n';
  52 + }
  53 + list.close();
  54 + */
  55 + /*
  56 + int n=10;
  57 + stim::circle<float> cir;
  58 + float* c0= (float*) malloc(3*sizeof(float));
  59 + c0[0] =-4;
  60 + c0[1]=0;
  61 + c0[2] = 3;
  62 + stim::vec3<float> c(c0[0],c0[1],c0[2]);
  63 + float len = c.len();
  64 + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len);
  65 + std::cout<< len << '\n';
  66 + std::cout<< norm << '\n';
  67 + cir.center(c);
  68 + cir.normal(norm);
  69 + cir.scale(2);
  70 + stim::vec3<float> out = cir.p(45);
  71 + std::vector<stim::vec3<float>> out2 = cir.getPoints(n);
  72 +
  73 + std::cout<< out << '\n';
  74 + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n';
  75 + std::cout<< c << '\n';
  76 +
  77 + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i)
  78 + std::cout << *i << '\n';
  79 + std::ofstream list("circle_check.txt");
  80 + if (list.is_open()){
  81 + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j)
  82 + list << *j << '\n';
  83 + }
  84 + list.close();
  85 + std::cin >> n;
  86 +
  87 +}
  88 +
  89 +*/
12 90
13 void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], 91 void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[],
14 unsigned int x, unsigned int y, unsigned int z); 92 unsigned int x, unsigned int y, unsigned int z);
@@ -98,7 +176,7 @@ int main(int argc, char** argv){ @@ -98,7 +176,7 @@ int main(int argc, char** argv){
98 float t = args["t"].as_float(); 176 float t = args["t"].as_float();
99 //set the anisotropy 177 //set the anisotropy
100 float anisotropy = args["anisotropy"].as_float(); 178 float anisotropy = args["anisotropy"].as_float();
101 - unsigned int rmax = 10 ; 179 + unsigned int rmax = 10;
102 unsigned int r[3] = { 12, rmax, rmax}; 180 unsigned int r[3] = { 12, rmax, rmax};
103 float std = 5; 181 float std = 5;
104 float sigma[3] = { std, std, std}; 182 float sigma[3] = { std, std, std};
@@ -137,50 +215,51 @@ int main(int argc, char** argv){ @@ -137,50 +215,51 @@ int main(int argc, char** argv){
137 std::ofstream fblur("shared2D-v8/vote8.vol", std::ofstream::out | std::ofstream::binary); 215 std::ofstream fblur("shared2D-v8/vote8.vol", std::ofstream::out | std::ofstream::binary);
138 fblur.write((char*)cpuI, bytes); 216 fblur.write((char*)cpuI, bytes);
139 fblur.close(); 217 fblur.close();
140 - /*  
141 - stim::image<float>imgrad3;  
142 - imgrad3.set_interleaved3(cpu_out, 128,128,128,3);  
143 - std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary);  
144 - fgx.write((char*)imgrad3.channel(0).data(), bytes);  
145 - fgx.close();  
146 - */ 218 +
  219 + //stim::image<float>imgrad3;
  220 + //imgrad3.set_interleaved3(cpu_out, 128,128,128,3);
  221 + //std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary);
  222 + //fgx.write((char*)imgrad3.channel(0).data(), bytes);
  223 + //fgx.close();
  224 +
147 //write the output file. 225 //write the output file.
148 - //for (int t0=2000; t0<=2500; t0+=100){ 226 + //for (int t0=0; t0<=5000; t0+=100){
  227 + // float t1 = t0;
149 int t0 = t; 228 int t0 = t;
150 lmax(cpu_out, cpuI, t, conn, x, y, z); 229 lmax(cpu_out, cpuI, t, conn, x, y, z);
151 //std::ofstream fo("shared2D-v8/" + OutName.str(), std::ofstream::out | std::ofstream::binary); 230 //std::ofstream fo("shared2D-v8/" + OutName.str(), std::ofstream::out | std::ofstream::binary);
152 - std::ofstream fo("shared2D-v8/" + OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary); 231 + std::ofstream fo( OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary);
153 fo.write((char*)cpu_out, bytes); 232 fo.write((char*)cpu_out, bytes);
154 fo.close(); 233 fo.close();
155 234
156 // creat a file for saving the list centers 235 // creat a file for saving the list centers
157 236
158 - std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj");  
159 - // set the number of detected cells to zero.  
160 - int nod = 0;  
161 - if (list.is_open()){  
162 -  
163 - for (int iz=0; iz<z; iz++){  
164 - for (int iy=0; iy<y; iy++){  
165 - for (int ix=0; ix<x; ix++){  
166 -  
167 - int idx = iz * x * y + iy * x + ix;  
168 - if (cpu_out[idx]==1){  
169 - nod++;  
170 - list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ;  
171 -  
172 - }  
173 - }  
174 - }  
175 - }  
176 - list << "p" << "\t";  
177 - for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){  
178 - list << i_nod << "\t";  
179 - } 237 + //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj");
  238 + //// set the number of detected cells to zero.
  239 + //int nod = 0;
  240 + //if (list.is_open()){
180 241
181 - list.close();  
182 - }  
183 - 242 + // for (int iz=0; iz<z; iz++){
  243 + // for (int iy=0; iy<y; iy++){
  244 + // for (int ix=0; ix<x; ix++){
  245 +
  246 + // int idx = iz * x * y + iy * x + ix;
  247 + // if (cpu_out[idx]==1){
  248 + // nod++;
  249 + // list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ;
  250 + //
  251 + // }
  252 + // }
  253 + // }
  254 + // }
  255 + // list << "p" << "\t";
  256 + // for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){
  257 + // list << i_nod << "\t";
  258 + // }
  259 +
  260 + //list.close();
  261 + //}
  262 + //}
184 cudaDeviceReset(); 263 cudaDeviceReset();
185 264
186 } 265 }
cpp/update_dir3_aabb.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_UPDATE_DIR3_AABB_H
  2 +#define STIM_CUDA_UPDATE_DIR3_AABB_H
  3 +
  4 +# include <iostream>
  5 +# include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include "cpyToshare.cuh"
  8 +#define M_PI 3.14159
  9 +#include <stim/math/circle.h>
  10 +#include <stim/math/vec3.h>
  11 +#include <stim/math/plane.h>
  12 +#include <stim/math/vector.h>
  13 +#include <stim/visualization/aabb3.h>
  14 +
  15 + // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
  16 + template<typename T>
  17 + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  18 + //extern __shared__ float s_vote[];
  19 +
  20 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  21 +
  22 + int grid_y = y / blockDim.y; //find the grid size along y
  23 + int blockidx_y = blockIdx.y % grid_y;
  24 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  25 + int zi = blockIdx.y / grid_y;
  26 + if(xi >= x|| yi >= y || zi>= z) return;
  27 + int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel
  28 +
  29 +
  30 + // find the starting points for this block along the x and y directions
  31 + //int bxi = blockIdx.x * blockDim.x;
  32 + //int byi = blockidx_y * blockDim.y;
  33 + //find the starting points and the size of the window, which will be copied to the 2D-shared memory
  34 + //int bxs = bxi - rx;
  35 + //int bys = byi - ry;
  36 + //int xwidth = 2 * rx + blockDim.x;
  37 + //int ywidth = 2 * ry + blockDim.y;
  38 + //compute the coordinations of this pixel in the 2D-shared memory.
  39 + //int sx_rx = threadIdx.x + rx;
  40 + //int sy_ry = threadIdx.y + ry;
  41 +
  42 + float rx_sq = rx * rx; // compute the square for rmax
  43 + float ry_sq = ry * ry;
  44 + float rz_sq = rz * rz;
  45 +
  46 + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
  47 + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
  48 + int n =4; //set the number of points to find the boundaries of the conical voting area
  49 + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter
  50 + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
  51 + float zc = rz * cos(g_sph[2]) ;
  52 + float r = sqrt(xc*xc + yc*yc + zc*zc);
  53 + xc+=xi;
  54 + yc+=yi;
  55 + zc+=zi;
  56 + stim::vec3<float> center(xc,yc,zc);
  57 + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area
  58 + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
  59 + float step = 360.0/(float) n;
  60 + stim::circle<float> cir(center, d, norm);
  61 + stim::aabb3<int> bb(xi,yi,zi);
  62 + bb.insert(xc,yc,zc);
  63 + for(float j = 0; j <360.0; j += step){
  64 + stim::vec3<float> out = cir.p(j);
  65 + bb.insert(out[0], out[1], out[2]);
  66 + }
  67 +
  68 + bb.trim_low(0,0,0);
  69 + bb.trim_high(x-1, y-1, z-1);
  70 + int bx,by,bz;
  71 + int dx, dy, dz;
  72 + float dx_sq, dy_sq, dz_sq;
  73 +
  74 + float dist, cos_diff;
  75 + int idx_c;
  76 +
  77 + float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter
  78 + float l_vote = 0;
  79 +
  80 + float id_x = g[0]; // define local variables for the x, y, and z coordinations point to the vote direction
  81 + float id_y = g[1];
  82 + float id_z = g[2];
  83 +
  84 + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){
  85 + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis
  86 + dz_sq = dz * dz;
  87 + for (by=bb.low[1]; by<=bb.high[1]; by++){
  88 + dy = by - yi; //compute the distance bw the voter and the current counter along y axis
  89 + dy_sq = dy * dy;
  90 + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){
  91 + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis
  92 + dx_sq = dx * dx;
  93 +
  94 + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter
  95 + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter
  96 + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter
  97 + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter
  98 + l_vote = gpu_vote[idx_c];
  99 + if (l_vote>max) {
  100 + max = l_vote;
  101 + id_x = dx;
  102 + id_y = dy;
  103 + id_z = dz;
  104 + }
  105 + }
  106 + }
  107 + }
  108 + }
  109 + float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z);
  110 + gpu_dir[i * 3 + 0] = g_sph[0] * (id_x/m_id);
  111 + gpu_dir[i * 3 + 1] = g_sph[0] * (id_y/m_id);
  112 + gpu_dir[i * 3 + 2] = g_sph[0] * (id_z/m_id);
  113 + }
  114 +
  115 +
  116 +
  117 + // this kernel updates the gradient direction by the calculated voting direction.
  118 + template<typename T>
  119 + __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){
  120 +
  121 + //calculate x,y,z coordinates for this thread
  122 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  123 + //find the grid size along y
  124 + int grid_y = y / blockDim.y;
  125 + int blockidx_y = blockIdx.y % grid_y;
  126 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  127 + int zi = blockIdx.y / grid_y;
  128 + int i = zi * x * y + yi * x + xi;
  129 +
  130 + if(xi >= x || yi >= y || zi >= z) return;
  131 + //update the gradient image with the new direction direction
  132 + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0];
  133 + gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1];
  134 + gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2];
  135 + }
  136 +
  137 + template<typename T>
  138 + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  139 +
  140 + unsigned int max_threads = stim::maxThreadsPerBlock();
  141 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  142 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  143 + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T);
  144 + // allocate space on the GPU for the updated vote direction
  145 + T* gpu_dir;
  146 + cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3);
  147 +
  148 + //call the kernel to calculate the new voting direction
  149 + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z);
  150 +
  151 +
  152 + //call the kernel to update the gradient direction
  153 + update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z);
  154 +
  155 + //free allocated memory
  156 + cudaFree(gpu_dir);
  157 +
  158 + }
  159 +
  160 + template<typename T>
  161 + void cpu_update_dir3(T* cpu_grad, T* cpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  162 +
  163 + //calculate the number of bytes in the array
  164 + unsigned int bytes = x * y * z * sizeof(T);
  165 +
  166 + //allocate space on the GPU for the Vote data
  167 + T* gpu_vote;
  168 + cudaMalloc(&gpu_vote, bytes);
  169 +
  170 + //copy the input vote data to the GPU
  171 + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice);
  172 +
  173 + //allocate space on the GPU for the Gradient data
  174 + T* gpu_grad;
  175 + cudaMalloc(&gpu_grad, bytes*3);
  176 +
  177 + //copy the Gradient data to the GPU
  178 + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
  179 +
  180 + //call the GPU version of the update direction function
  181 + gpu_update_dir3<T>(gpu_grad, gpu_vote, cos_phi, r, x , y, z);
  182 +
  183 + //copy the new gradient image back to the CPU
  184 + cudaMemcpy(cpu_grad, gpu_grad, bytes*3, cudaMemcpyDeviceToHost) ;
  185 +
  186 + //free allocated memory
  187 + cudaFree(gpu_vote);
  188 + cudaFree(gpu_grad);
  189 + }
  190 +
  191 +
  192 +
  193 +#endif
0 \ No newline at end of file 194 \ No newline at end of file
cpp/vote3_atomic.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE3_ATOMIC_H
  2 +#define STIM_CUDA_VOTE3_ATOMIC_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/cudatools/error.h>
  8 +#include "cpyToshare.cuh"
  9 +
  10 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  11 + template<typename T>
  12 + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  13 +
  14 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  15 +
  16 + int grid_y = y / blockDim.y; //find the grid size along y
  17 + int blockidx_y = blockIdx.y % grid_y;
  18 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  19 + int zi = blockIdx.y / grid_y;
  20 +
  21 + if(xi>=x || yi>=y || zi>=z) return;
  22 +
  23 + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter
  24 + float gx_v = gpu_grad[3*i]; // find the gradient information in cartesian coordinate for the voter - global memory fetch
  25 + float gy_v = gpu_grad[3*i+1];
  26 + float gz_v = gpu_grad[3*i+2];
  27 +
  28 + float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter
  29 +
  30 + float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter
  31 + float gy_v_n = gy_v/mag_v;
  32 + float gz_v_n = gz_v/mag_v;
  33 +
  34 + float rx_sq = rx * rx; // compute the square for rmax
  35 + float ry_sq = ry * ry;
  36 + float rz_sq = rz * rz;
  37 + float x_sq, y_sq, z_sq, d_c, cos_diff;
  38 + int xi_c, yi_c, zi_c, idx_c;
  39 +
  40 + for (int z_c=-rz; z_c<=rz; z_c++){
  41 + zi_c = zi + z_c; // calculate the z position for the current counter
  42 + if (zi_c <z && zi_c>=0){ // make sure the current counter is inside the image
  43 + z_sq = z_c * z_c;
  44 + for (int y_c=-ry; y_c<=ry; y_c++){
  45 + yi_c = yi + y_c;
  46 + if (yi_c < y && yi_c>=0){
  47 + y_sq = y_c * y_c;
  48 + for (int x_c=-rx; x_c<=rx; x_c++){
  49 + xi_c = xi + x_c;
  50 + if (xi_c < x && xi_c>=0){
  51 + x_sq = x_c * x_c;
  52 + d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter
  53 + cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter
  54 + if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){
  55 + idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter
  56 + atomicAdd (&gpu_vote[idx_c] , mag_v);
  57 + }
  58 + }
  59 + }
  60 + }
  61 + }
  62 + }
  63 + }
  64 + }
  65 +
  66 + template<typename T>
  67 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  68 +
  69 +
  70 + unsigned int max_threads = stim::maxThreadsPerBlock();
  71 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  72 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  73 + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*4*sizeof(T);
  74 + //call the kernel to do the voting
  75 + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z);
  76 +
  77 + }
  78 +
  79 +
  80 + template<typename T>
  81 + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  82 +
  83 + //calculate the number of bytes in the array
  84 + unsigned int bytes = x * y * z * sizeof(T);
  85 +
  86 +
  87 + //allocate space on the GPU for the Vote Image
  88 + T* gpu_vote;
  89 + cudaMalloc(&gpu_vote, bytes);
  90 +
  91 + //allocate space on the GPU for the input Gradient image
  92 + T* gpu_grad;
  93 + cudaMalloc(&gpu_grad, bytes*3);
  94 +
  95 +
  96 + //copy the Gradient data to the GPU
  97 + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
  98 +
  99 +
  100 + //call the GPU version of the vote calculation function
  101 + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z);
  102 +
  103 + //copy the Vote Data back to the CPU
  104 + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ;
  105 +
  106 + //free allocated memory
  107 + cudaFree(gpu_vote);
  108 + cudaFree(gpu_grad);
  109 +
  110 + }
  111 +
  112 +
  113 +#endif
0 \ No newline at end of file 114 \ No newline at end of file
cpp/vote3_atomic_aabb.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE3_ATOMIC_AABB_H
  2 +#define STIM_CUDA_VOTE3_ATOMIC_AABB_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/cudatools/error.h>
  8 +#include "cpyToshare.cuh"
  9 +#define M_PI 3.14159
  10 +#include <stim/math/circle.h>
  11 +#include <stim/math/vec3.h>
  12 +#include <stim/math/plane.h>
  13 +#include <stim/math/vector.h>
  14 +#include <stim/visualization/aabb3.h>
  15 +
  16 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  17 + template<typename T>
  18 + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  19 +
  20 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  21 +
  22 + int grid_y = y / blockDim.y; //find the grid size along y
  23 + int blockidx_y = blockIdx.y % grid_y;
  24 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  25 + int zi = blockIdx.y / grid_y;
  26 +
  27 + if(xi>=x || yi>=y || zi>=z) return;
  28 +
  29 + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter
  30 +
  31 +
  32 + float rx_sq = rx * rx; // compute the square for rmax
  33 + float ry_sq = ry * ry;
  34 + float rz_sq = rz * rz;
  35 + float dist, cos_diff;
  36 + int idx_c;
  37 +
  38 + //float rmax = sqrt(rx_sq + ry_sq + rz_sq);
  39 + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
  40 + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
  41 + int n =4; //set the number of points to find the boundaries of the conical voting area
  42 + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter
  43 + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
  44 + float zc = rz * cos(g_sph[2]) ;
  45 + float r = sqrt(xc*xc + yc*yc + zc*zc);
  46 + xc+=xi;
  47 + yc+=yi;
  48 + zc+=zi;
  49 + stim::vec3<float> center(xc,yc,zc);
  50 +
  51 + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area
  52 + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
  53 + float step = 360.0/(float) n;
  54 + stim::circle<float> cir(center, d, norm);
  55 + stim::aabb3<int> bb(xi,yi,zi);
  56 + bb.insert(xc,yc,zc);
  57 + for(float j = 0; j <360.0; j += step){
  58 + stim::vec3<float> out = cir.p(j);
  59 + bb.insert(out[0], out[1], out[2]);
  60 + }
  61 +
  62 + bb.trim_low(0,0,0);
  63 + bb.trim_high(x-1, y-1, z-1);
  64 + int bx,by,bz;
  65 + int dx, dy, dz;
  66 + float dx_sq, dy_sq, dz_sq;
  67 + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){
  68 + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis
  69 + dz_sq = dz * dz;
  70 + for (by=bb.low[1]; by<=bb.high[1]; by++){
  71 + dy = by - yi; //compute the distance bw the voter and the current counter along y axis
  72 + dy_sq = dy * dy;
  73 + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){
  74 + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis
  75 + dx_sq = dx * dx;
  76 +
  77 + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter
  78 + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter
  79 + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter
  80 + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter
  81 + atomicAdd (&gpu_vote[idx_c] , g_sph[0]);
  82 + }
  83 + }
  84 + }
  85 + }
  86 + }
  87 +
  88 + template<typename T>
  89 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  90 +
  91 +
  92 + unsigned int max_threads = stim::maxThreadsPerBlock();
  93 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  94 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  95 + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting
  96 +
  97 + }
  98 +
  99 +
  100 + template<typename T>
  101 + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  102 +
  103 + //calculate the number of bytes in the array
  104 + unsigned int bytes = x * y * z * sizeof(T);
  105 +
  106 +
  107 + //allocate space on the GPU for the Vote Image
  108 + T* gpu_vote;
  109 + cudaMalloc(&gpu_vote, bytes);
  110 +
  111 + //allocate space on the GPU for the input Gradient image
  112 + T* gpu_grad;
  113 + cudaMalloc(&gpu_grad, bytes*3);
  114 +
  115 +
  116 + //copy the Gradient data to the GPU
  117 + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
  118 +
  119 +
  120 + //call the GPU version of the vote calculation function
  121 + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z);
  122 +
  123 + //copy the Vote Data back to the CPU
  124 + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ;
  125 +
  126 + //free allocated memory
  127 + cudaFree(gpu_vote);
  128 + cudaFree(gpu_grad);
  129 +
  130 + }
  131 +
  132 +
  133 +#endif
0 \ No newline at end of file 134 \ No newline at end of file