Commit a744d027c4d9d6583db594dc75811c6ff90eaef6

Authored by Laila Saadatifard
1 parent 46820b25

upload the ivote3 functions using the new method for finding voting area (bounda…

…ries of voting area)'
Matlab_3D/gt2.vol 0 → 100644
No preview for this file type
Matlab_3D/validation.m
1 1 clear all;
2   -disp('***************** NEW RUN *********************');
  2 +%for t=100:100:5000
  3 +t=2350;
3 4 X = 128;
4 5 Y = 128;
5 6 Z = 128;
... ... @@ -7,15 +8,15 @@ D = 10;
7 8 t0=1;
8 9 r1=12;
9 10 r2=10;
10   -t=2300;
11   -itr=8;
12   -vote=10;
  11 +% t=2300;
  12 +itr=5;
  13 +vote=7;
13 14 std = [5 5];
14 15 gt_filename = 'gt2.vol';
15 16 % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote);
16   -out_filename = sprintf('D:/build/ivote3-bld/shared2D-v8/centers.%d.vol',t);
  17 +out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t);
17 18 % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D);
18   -txt_filename = sprintf('0-t%d--1.txt',t);
  19 +txt_filename = sprintf('D:/build/ivote3-bld/0-t%d-atomic-aabb.txt',t);
19 20 spec = sprintf('Nissl-std%d.%d-r%d.%d-t%d-out%d.%d',std(1), std(2),r1,r2,t,itr,vote);
20 21 fid0 = fopen(gt_filename);
21 22 gt = fread(fid0,[X Y*Z], 'single');
... ... @@ -79,73 +80,73 @@ for k=1:size(id_ref,1)
79 80 oseg = oseg+1;
80 81 end
81 82 end
82   -%****
83   -% check if the oversegmented detected cells could be assigned to the gt's element with no detected cell!
84   -%find the indices of gt's element with no detected cell
85   -u_idx1 = find(b==0);
86   -%find the coordinates of gt's
87   -ref1 = ref(u_idx1,:);
88   -% find the indices of detected cells which are assigned to a gt's element
89   -ub = unique(b(b>0));
90   -% set the number of detected cell which are not assigned to a gt's element to zero
91   -mm=0;
92   -% find the indices of detected cell which are not assigned to a gt's element
93   -for j=1:size(cent,1)
94   - z = sum(ub(:)==j);
95   - if z==0
96   - mm = mm+1;
97   - cent_id1(mm) = j;
98   - end
99   -end
100   -% find the coordinated of of detected cells
101   -cent1 = cent(cent_id1,:);
102   -
103   -% check if there are osme low enough distances
104   -[idx1, dist1] = knnsearch(cent1,ref1);
105   -ind_eq_idx_rc = zeros(numel(idx1), 10);
106   -%set number of cells with the same detected cell center to zero.
107   -noc_rc=0;
108   -for i_rc=1:numel(idx1)
109   - % check the current element hasn't considered yet
110   - s_ind_rc = sum(ind_eq_idx_rc==i_rc);
111   - if s_ind_rc==0
112   - % check how many of the elemnts assigned to the same detected cell
113   - s_rc = sum(idx1==idx1(i_rc));
114   - if s_rc>1
115   - noc_rc = noc_rc+1;
116   - %save the index and number of gt's elements with the same assigned detected cells.
117   - id_ref_rc(noc_rc,1) = i_rc;
118   - id_ref_rc(noc_rc,2) = s_rc;
119   - ind1_rc = find(idx1==idx1(i_rc));
120   - ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc;
121   - end
122   - end
123   -end
124   -% determine those indices which hs assigned to only one detected cell
125   -b_rc = idx1;
126   -u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0));
127   -b_rc(u_eq_idx_rc)=0;
128   -% set the number of over sefgmented cells to zero.
129   -oseg_rc=0;
130   -for k_rc=1:size(id_ref_rc,1)
131   - k1_rc = id_ref_rc(k_rc,1);
132   - k2_rc = id_ref_rc(k_rc,2);
133   - %find the minimum distance to the detected cell happened for which of the gt's element
134   - l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc);
135   - [~, local_id_min_rc] = min(dist1(l_rc));
136   - % set the element with minimum distance to the corresponding detected cell
137   - b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc);
138   - %remove the proper element from the list of indices with same designated detected cells
139   - u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0;
140   - % check if the indices remained in the list has distance less than the value is set for validation
141   - ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc));
142   - distl_rc = dist1(ly_rc);
143   - sl_rc = sum(distl_rc<D);
144   - % if the distance is low enought, consider the corresponding cell as oversegmented
145   - if sl_rc>0
146   - oseg_rc = oseg_rc+1;
147   - end
148   -end
  83 +% %****
  84 +% % check if the oversegmented detected cells could be assigned to the gt's element with no detected cell!
  85 +% %find the indices of gt's element with no detected cell
  86 +% u_idx1 = find(b==0);
  87 +% %find the coordinates of gt's
  88 +% ref1 = ref(u_idx1,:);
  89 +% % find the indices of detected cells which are assigned to a gt's element
  90 +% ub = unique(b(b>0));
  91 +% % set the number of detected cell which are not assigned to a gt's element to zero
  92 +% mm=0;
  93 +% % find the indices of detected cell which are not assigned to a gt's element
  94 +% for j=1:size(cent,1)
  95 +% z = sum(ub(:)==j);
  96 +% if z==0
  97 +% mm = mm+1;
  98 +% cent_id1(mm) = j;
  99 +% end
  100 +% end
  101 +% % find the coordinated of of detected cells
  102 +% cent1 = cent(cent_id1,:);
  103 +%
  104 +% % check if there are osme low enough distances
  105 +% [idx1, dist1] = knnsearch(cent1,ref1);
  106 +% ind_eq_idx_rc = zeros(numel(idx1), 10);
  107 +% %set number of cells with the same detected cell center to zero.
  108 +% noc_rc=0;
  109 +% for i_rc=1:numel(idx1)
  110 +% % check the current element hasn't considered yet
  111 +% s_ind_rc = sum(ind_eq_idx_rc==i_rc);
  112 +% if s_ind_rc==0
  113 +% % check how many of the elemnts assigned to the same detected cell
  114 +% s_rc = sum(idx1==idx1(i_rc));
  115 +% if s_rc>1
  116 +% noc_rc = noc_rc+1;
  117 +% %save the index and number of gt's elements with the same assigned detected cells.
  118 +% id_ref_rc(noc_rc,1) = i_rc;
  119 +% id_ref_rc(noc_rc,2) = s_rc;
  120 +% ind1_rc = find(idx1==idx1(i_rc));
  121 +% ind_eq_idx_rc(i_rc, 1:numel(ind1_rc)) = ind1_rc;
  122 +% end
  123 +% end
  124 +% end
  125 +% % determine those indices which hs assigned to only one detected cell
  126 +% b_rc = idx1;
  127 +% u_eq_idx_rc = unique(ind_eq_idx_rc(ind_eq_idx_rc>0));
  128 +% b_rc(u_eq_idx_rc)=0;
  129 +% % set the number of over sefgmented cells to zero.
  130 +% oseg_rc=0;
  131 +% for k_rc=1:size(id_ref_rc,1)
  132 +% k1_rc = id_ref_rc(k_rc,1);
  133 +% k2_rc = id_ref_rc(k_rc,2);
  134 +% %find the minimum distance to the detected cell happened for which of the gt's element
  135 +% l_rc = ind_eq_idx_rc(k1_rc,1:k2_rc);
  136 +% [~, local_id_min_rc] = min(dist1(l_rc));
  137 +% % set the element with minimum distance to the corresponding detected cell
  138 +% b_rc(l_rc(local_id_min_rc)) = idx1(k1_rc);
  139 +% %remove the proper element from the list of indices with same designated detected cells
  140 +% u_eq_idx_rc (u_eq_idx_rc == l_rc(local_id_min_rc))=0;
  141 +% % check if the indices remained in the list has distance less than the value is set for validation
  142 +% ly_rc = l_rc (l_rc~=l_rc(local_id_min_rc));
  143 +% distl_rc = dist1(ly_rc);
  144 +% sl_rc = sum(distl_rc<D);
  145 +% % if the distance is low enought, consider the corresponding cell as oversegmented
  146 +% if sl_rc>0
  147 +% oseg_rc = oseg_rc+1;
  148 +% end
  149 +% end
149 150  
150 151 %******
151 152 % b include the gt's element and its detected cells, for those element with no cell detection, b has zero value
... ... @@ -154,11 +155,11 @@ b_dist = dist;
154 155 % remove the disatances for those elements with no detected cells from distance array
155 156 b_dist(b_ind)=-1;
156 157  
157   -b_ind_rc = find(b_rc==0);
158   -b_dist_rc = dist1;
159   -b_dist_rc(b_ind_rc)=-1;
  158 +% b_ind_rc = find(b_rc==0);
  159 +% b_dist_rc = dist1;
  160 +% b_dist_rc(b_ind_rc)=-1;
160 161 % calculate Ttrue Positive, number of detected cells that have low enough distance to one of the gt's elements.
161   -TP = sum(b_dist>=0) - sum(b_dist>D) + (sum(b_dist_rc>=0) - sum(b_dist_rc>D));
  162 +TP = sum(b_dist>=0) - sum(b_dist>D); % + (sum(b_dist_rc>=0) - sum(b_dist_rc>D));
162 163 % calculate False Negative, number of gt's elements with no detected cells.
163 164 FN = size(ref, 1) - TP;
164 165 % calculate False Positive, number of detected cells, which their distance to the gt's element is long
... ... @@ -179,3 +180,5 @@ fprintf(fid_text, &#39;Precision = %f\n&#39;, Precision);
179 180 fprintf(fid_text, 'Accuracy = %f\n', Accuracy);
180 181 fprintf(fid_text, 'Recall = %f\n', Recall);
181 182 fclose(fid_text);
  183 +clear all;
  184 +%end
182 185 \ No newline at end of file
... ...
cpp/CMakeLists.txt
... ... @@ -10,6 +10,9 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} &quot;${CMAKE_SOURCE_DIR}&quot;)
10 10 #set up CUDA
11 11 find_package(CUDA REQUIRED)
12 12  
  13 +#set up opencv
  14 +find_package(opencv)
  15 +
13 16 #find the STIM library
14 17 find_package(STIM REQUIRED)
15 18  
... ...
cpp/args.txt 0 → 100644
  1 +nissl-float-128.128.128.vol 0-out --t 2350 --x 128 --y 128 --z 128 --anisotropy 1 --invert
0 2 \ No newline at end of file
... ...
cpp/cudafunc.cu
  1 +/*#include "circle_check.cuh"
  2 +
  3 +void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z){
  4 +gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z);
  5 +}
  6 +*/
  7 +
  8 +
1 9 #include "gaussian_blur3.cuh"
2 10 #include "gradient3.cuh"
3 11 #include "mag3.cuh"
4   -#include "vote3.cuh"
  12 +#include "vote3_atomic_aabb.cuh"
5 13 #include "update_dir3.cuh"
6 14 #include "local_max3.cuh"
7 15  
... ... @@ -10,11 +18,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
10 18 int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
11 19  
12 20  
13   - cudaSetDevice(0);
  21 + cudaSetDevice(1);
14 22 // compute the number of bytes in the input data
15 23 unsigned int bytes = x * y * z * sizeof(float);
16 24  
17   - //assign memory on gpu for the input data.
  25 + //assign memory on gpu for the input data.z
18 26 float* gpuI0;
19 27 cudaMalloc(&gpuI0, bytes);
20 28  
... ... @@ -41,18 +49,17 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
41 49  
42 50 //call the vote function.
43 51 for (int i = 0; i < iter; i++){
44   -
  52 +
  53 + cudaMemset(gpu_vote, 0, bytes);
45 54 gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z);
46 55 cudaDeviceSynchronize();
47   - /*if (i==7)
48   - cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);*/
49   -
50   - if (phi >= d_phi){
  56 +
  57 + //if (phi >= d_phi){
51 58 gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z);
52 59 cudaDeviceSynchronize();
53 60 phi = phi - d_phi;
54 61 cos_phi = cos(phi);
55   - }
  62 + //}
56 63  
57 64 }
58 65  
... ... @@ -78,6 +85,8 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
78 85 void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
79 86 unsigned int bytes = x * y * z * sizeof(float);
80 87  
  88 + cudaSetDevice(1);
  89 +
81 90 //assign memory on gpu for the input data.
82 91 float* gpuV;
83 92 cudaMalloc(&gpuV, bytes);
... ... @@ -96,4 +105,5 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u
96 105  
97 106 cudaFree(gpuV);
98 107 cudaFree(gpuOut);
99   -}
100 108 \ No newline at end of file
  109 +}
  110 +
... ...
cpp/main.cpp
... ... @@ -9,6 +9,84 @@
9 9 #include <stim/image/image.h>
10 10 #define pi 3.14159
11 11  
  12 +#define M_PI 3.14159
  13 +#include <stim/math/circle.h>
  14 +#include <stim/math/vec3.h>
  15 +#include <stim/math/plane.h>
  16 +#include <stim/math/vector.h>
  17 +//#include <cuda.h>
  18 +//#include <stim/cuda/cudatools.h>
  19 +//#include <stim/cuda/cudatools/error.h>
  20 +
  21 +
  22 +/*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z);
  23 +
  24 +int main(){
  25 +
  26 + int n=20;
  27 + float rmax;
  28 + float phi_deg;
  29 + float phi;
  30 + rmax=4;
  31 + phi_deg = 15;
  32 + phi = phi_deg * pi/180;
  33 + int x,y,z;
  34 + x=y=z=1;
  35 + unsigned int size = x*y*z*sizeof (float);
  36 + float* cpu_grad = (float*) malloc(3*size);
  37 + float* gpu_grad;
  38 + cudaMalloc(&gpu_grad, 3*size);
  39 + cpu_grad[0]=1;
  40 + cpu_grad[1]=0;
  41 + cpu_grad[2]=-0.5;
  42 + cudaMemcpy(gpu_grad, cpu_grad, 3*size, cudaMemcpyHostToDevice);
  43 + float* cpu_out = (float*) malloc(3*size*(n+1));
  44 + float* gpu_out;
  45 + cudaMalloc(&gpu_out, 3*size*(n+1));
  46 + test_3(gpu_out, gpu_grad, rmax, phi, n, x, y, z);
  47 + cudaMemcpy(cpu_out, gpu_out, 3*size*(n+1), cudaMemcpyDeviceToHost);
  48 + std::ofstream list("circle_check_cuda1.txt");
  49 + if (list.is_open()){
  50 + for (int j=0; j<=n; ++j)
  51 + list << cpu_out[3*j] << '\t' << cpu_out[3*j +1] << '\t' << cpu_out[3*j + 2] << '\n';
  52 + }
  53 + list.close();
  54 + */
  55 + /*
  56 + int n=10;
  57 + stim::circle<float> cir;
  58 + float* c0= (float*) malloc(3*sizeof(float));
  59 + c0[0] =-4;
  60 + c0[1]=0;
  61 + c0[2] = 3;
  62 + stim::vec3<float> c(c0[0],c0[1],c0[2]);
  63 + float len = c.len();
  64 + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len);
  65 + std::cout<< len << '\n';
  66 + std::cout<< norm << '\n';
  67 + cir.center(c);
  68 + cir.normal(norm);
  69 + cir.scale(2);
  70 + stim::vec3<float> out = cir.p(45);
  71 + std::vector<stim::vec3<float>> out2 = cir.getPoints(n);
  72 +
  73 + std::cout<< out << '\n';
  74 + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n';
  75 + std::cout<< c << '\n';
  76 +
  77 + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i)
  78 + std::cout << *i << '\n';
  79 + std::ofstream list("circle_check.txt");
  80 + if (list.is_open()){
  81 + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j)
  82 + list << *j << '\n';
  83 + }
  84 + list.close();
  85 + std::cin >> n;
  86 +
  87 +}
  88 +
  89 +*/
12 90  
13 91 void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[],
14 92 unsigned int x, unsigned int y, unsigned int z);
... ... @@ -98,7 +176,7 @@ int main(int argc, char** argv){
98 176 float t = args["t"].as_float();
99 177 //set the anisotropy
100 178 float anisotropy = args["anisotropy"].as_float();
101   - unsigned int rmax = 10 ;
  179 + unsigned int rmax = 10;
102 180 unsigned int r[3] = { 12, rmax, rmax};
103 181 float std = 5;
104 182 float sigma[3] = { std, std, std};
... ... @@ -137,50 +215,51 @@ int main(int argc, char** argv){
137 215 std::ofstream fblur("shared2D-v8/vote8.vol", std::ofstream::out | std::ofstream::binary);
138 216 fblur.write((char*)cpuI, bytes);
139 217 fblur.close();
140   - /*
141   - stim::image<float>imgrad3;
142   - imgrad3.set_interleaved3(cpu_out, 128,128,128,3);
143   - std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary);
144   - fgx.write((char*)imgrad3.channel(0).data(), bytes);
145   - fgx.close();
146   - */
  218 +
  219 + //stim::image<float>imgrad3;
  220 + //imgrad3.set_interleaved3(cpu_out, 128,128,128,3);
  221 + //std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary);
  222 + //fgx.write((char*)imgrad3.channel(0).data(), bytes);
  223 + //fgx.close();
  224 +
147 225 //write the output file.
148   - //for (int t0=2000; t0<=2500; t0+=100){
  226 + //for (int t0=0; t0<=5000; t0+=100){
  227 + // float t1 = t0;
149 228 int t0 = t;
150 229 lmax(cpu_out, cpuI, t, conn, x, y, z);
151 230 //std::ofstream fo("shared2D-v8/" + OutName.str(), std::ofstream::out | std::ofstream::binary);
152   - std::ofstream fo("shared2D-v8/" + OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary);
  231 + std::ofstream fo( OutName.str()+std::to_string(t0)+".vol", std::ofstream::out | std::ofstream::binary);
153 232 fo.write((char*)cpu_out, bytes);
154 233 fo.close();
155 234  
156 235 // creat a file for saving the list centers
157 236  
158   - std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj");
159   - // set the number of detected cells to zero.
160   - int nod = 0;
161   - if (list.is_open()){
162   -
163   - for (int iz=0; iz<z; iz++){
164   - for (int iy=0; iy<y; iy++){
165   - for (int ix=0; ix<x; ix++){
166   -
167   - int idx = iz * x * y + iy * x + ix;
168   - if (cpu_out[idx]==1){
169   - nod++;
170   - list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ;
171   -
172   - }
173   - }
174   - }
175   - }
176   - list << "p" << "\t";
177   - for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){
178   - list << i_nod << "\t";
179   - }
  237 + //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj");
  238 + //// set the number of detected cells to zero.
  239 + //int nod = 0;
  240 + //if (list.is_open()){
180 241  
181   - list.close();
182   - }
183   -
  242 + // for (int iz=0; iz<z; iz++){
  243 + // for (int iy=0; iy<y; iy++){
  244 + // for (int ix=0; ix<x; ix++){
  245 +
  246 + // int idx = iz * x * y + iy * x + ix;
  247 + // if (cpu_out[idx]==1){
  248 + // nod++;
  249 + // list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ;
  250 + //
  251 + // }
  252 + // }
  253 + // }
  254 + // }
  255 + // list << "p" << "\t";
  256 + // for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){
  257 + // list << i_nod << "\t";
  258 + // }
  259 +
  260 + //list.close();
  261 + //}
  262 + //}
184 263 cudaDeviceReset();
185 264  
186 265 }
... ...
cpp/update_dir3_aabb.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_UPDATE_DIR3_AABB_H
  2 +#define STIM_CUDA_UPDATE_DIR3_AABB_H
  3 +
  4 +# include <iostream>
  5 +# include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include "cpyToshare.cuh"
  8 +#define M_PI 3.14159
  9 +#include <stim/math/circle.h>
  10 +#include <stim/math/vec3.h>
  11 +#include <stim/math/plane.h>
  12 +#include <stim/math/vector.h>
  13 +#include <stim/visualization/aabb3.h>
  14 +
  15 + // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
  16 + template<typename T>
  17 + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  18 + //extern __shared__ float s_vote[];
  19 +
  20 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  21 +
  22 + int grid_y = y / blockDim.y; //find the grid size along y
  23 + int blockidx_y = blockIdx.y % grid_y;
  24 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  25 + int zi = blockIdx.y / grid_y;
  26 + if(xi >= x|| yi >= y || zi>= z) return;
  27 + int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel
  28 +
  29 +
  30 + // find the starting points for this block along the x and y directions
  31 + //int bxi = blockIdx.x * blockDim.x;
  32 + //int byi = blockidx_y * blockDim.y;
  33 + //find the starting points and the size of the window, which will be copied to the 2D-shared memory
  34 + //int bxs = bxi - rx;
  35 + //int bys = byi - ry;
  36 + //int xwidth = 2 * rx + blockDim.x;
  37 + //int ywidth = 2 * ry + blockDim.y;
  38 + //compute the coordinations of this pixel in the 2D-shared memory.
  39 + //int sx_rx = threadIdx.x + rx;
  40 + //int sy_ry = threadIdx.y + ry;
  41 +
  42 + float rx_sq = rx * rx; // compute the square for rmax
  43 + float ry_sq = ry * ry;
  44 + float rz_sq = rz * rz;
  45 +
  46 + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
  47 + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
  48 + int n =4; //set the number of points to find the boundaries of the conical voting area
  49 + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter
  50 + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
  51 + float zc = rz * cos(g_sph[2]) ;
  52 + float r = sqrt(xc*xc + yc*yc + zc*zc);
  53 + xc+=xi;
  54 + yc+=yi;
  55 + zc+=zi;
  56 + stim::vec3<float> center(xc,yc,zc);
  57 + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area
  58 + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
  59 + float step = 360.0/(float) n;
  60 + stim::circle<float> cir(center, d, norm);
  61 + stim::aabb3<int> bb(xi,yi,zi);
  62 + bb.insert(xc,yc,zc);
  63 + for(float j = 0; j <360.0; j += step){
  64 + stim::vec3<float> out = cir.p(j);
  65 + bb.insert(out[0], out[1], out[2]);
  66 + }
  67 +
  68 + bb.trim_low(0,0,0);
  69 + bb.trim_high(x-1, y-1, z-1);
  70 + int bx,by,bz;
  71 + int dx, dy, dz;
  72 + float dx_sq, dy_sq, dz_sq;
  73 +
  74 + float dist, cos_diff;
  75 + int idx_c;
  76 +
  77 + float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter
  78 + float l_vote = 0;
  79 +
  80 + float id_x = g[0]; // define local variables for the x, y, and z coordinations point to the vote direction
  81 + float id_y = g[1];
  82 + float id_z = g[2];
  83 +
  84 + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){
  85 + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis
  86 + dz_sq = dz * dz;
  87 + for (by=bb.low[1]; by<=bb.high[1]; by++){
  88 + dy = by - yi; //compute the distance bw the voter and the current counter along y axis
  89 + dy_sq = dy * dy;
  90 + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){
  91 + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis
  92 + dx_sq = dx * dx;
  93 +
  94 + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter
  95 + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter
  96 + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter
  97 + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter
  98 + l_vote = gpu_vote[idx_c];
  99 + if (l_vote>max) {
  100 + max = l_vote;
  101 + id_x = dx;
  102 + id_y = dy;
  103 + id_z = dz;
  104 + }
  105 + }
  106 + }
  107 + }
  108 + }
  109 + float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z);
  110 + gpu_dir[i * 3 + 0] = g_sph[0] * (id_x/m_id);
  111 + gpu_dir[i * 3 + 1] = g_sph[0] * (id_y/m_id);
  112 + gpu_dir[i * 3 + 2] = g_sph[0] * (id_z/m_id);
  113 + }
  114 +
  115 +
  116 +
  117 + // this kernel updates the gradient direction by the calculated voting direction.
  118 + template<typename T>
  119 + __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){
  120 +
  121 + //calculate x,y,z coordinates for this thread
  122 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  123 + //find the grid size along y
  124 + int grid_y = y / blockDim.y;
  125 + int blockidx_y = blockIdx.y % grid_y;
  126 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  127 + int zi = blockIdx.y / grid_y;
  128 + int i = zi * x * y + yi * x + xi;
  129 +
  130 + if(xi >= x || yi >= y || zi >= z) return;
  131 + //update the gradient image with the new direction direction
  132 + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0];
  133 + gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1];
  134 + gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2];
  135 + }
  136 +
  137 + template<typename T>
  138 + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  139 +
  140 + unsigned int max_threads = stim::maxThreadsPerBlock();
  141 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  142 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  143 + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T);
  144 + // allocate space on the GPU for the updated vote direction
  145 + T* gpu_dir;
  146 + cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3);
  147 +
  148 + //call the kernel to calculate the new voting direction
  149 + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z);
  150 +
  151 +
  152 + //call the kernel to update the gradient direction
  153 + update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z);
  154 +
  155 + //free allocated memory
  156 + cudaFree(gpu_dir);
  157 +
  158 + }
  159 +
  160 + template<typename T>
  161 + void cpu_update_dir3(T* cpu_grad, T* cpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  162 +
  163 + //calculate the number of bytes in the array
  164 + unsigned int bytes = x * y * z * sizeof(T);
  165 +
  166 + //allocate space on the GPU for the Vote data
  167 + T* gpu_vote;
  168 + cudaMalloc(&gpu_vote, bytes);
  169 +
  170 + //copy the input vote data to the GPU
  171 + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice);
  172 +
  173 + //allocate space on the GPU for the Gradient data
  174 + T* gpu_grad;
  175 + cudaMalloc(&gpu_grad, bytes*3);
  176 +
  177 + //copy the Gradient data to the GPU
  178 + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
  179 +
  180 + //call the GPU version of the update direction function
  181 + gpu_update_dir3<T>(gpu_grad, gpu_vote, cos_phi, r, x , y, z);
  182 +
  183 + //copy the new gradient image back to the CPU
  184 + cudaMemcpy(cpu_grad, gpu_grad, bytes*3, cudaMemcpyDeviceToHost) ;
  185 +
  186 + //free allocated memory
  187 + cudaFree(gpu_vote);
  188 + cudaFree(gpu_grad);
  189 + }
  190 +
  191 +
  192 +
  193 +#endif
0 194 \ No newline at end of file
... ...
cpp/vote3_atomic.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE3_ATOMIC_H
  2 +#define STIM_CUDA_VOTE3_ATOMIC_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/cudatools/error.h>
  8 +#include "cpyToshare.cuh"
  9 +
  10 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  11 + template<typename T>
  12 + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  13 +
  14 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  15 +
  16 + int grid_y = y / blockDim.y; //find the grid size along y
  17 + int blockidx_y = blockIdx.y % grid_y;
  18 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  19 + int zi = blockIdx.y / grid_y;
  20 +
  21 + if(xi>=x || yi>=y || zi>=z) return;
  22 +
  23 + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter
  24 + float gx_v = gpu_grad[3*i]; // find the gradient information in cartesian coordinate for the voter - global memory fetch
  25 + float gy_v = gpu_grad[3*i+1];
  26 + float gz_v = gpu_grad[3*i+2];
  27 +
  28 + float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter
  29 +
  30 + float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter
  31 + float gy_v_n = gy_v/mag_v;
  32 + float gz_v_n = gz_v/mag_v;
  33 +
  34 + float rx_sq = rx * rx; // compute the square for rmax
  35 + float ry_sq = ry * ry;
  36 + float rz_sq = rz * rz;
  37 + float x_sq, y_sq, z_sq, d_c, cos_diff;
  38 + int xi_c, yi_c, zi_c, idx_c;
  39 +
  40 + for (int z_c=-rz; z_c<=rz; z_c++){
  41 + zi_c = zi + z_c; // calculate the z position for the current counter
  42 + if (zi_c <z && zi_c>=0){ // make sure the current counter is inside the image
  43 + z_sq = z_c * z_c;
  44 + for (int y_c=-ry; y_c<=ry; y_c++){
  45 + yi_c = yi + y_c;
  46 + if (yi_c < y && yi_c>=0){
  47 + y_sq = y_c * y_c;
  48 + for (int x_c=-rx; x_c<=rx; x_c++){
  49 + xi_c = xi + x_c;
  50 + if (xi_c < x && xi_c>=0){
  51 + x_sq = x_c * x_c;
  52 + d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter
  53 + cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter
  54 + if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){
  55 + idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter
  56 + atomicAdd (&gpu_vote[idx_c] , mag_v);
  57 + }
  58 + }
  59 + }
  60 + }
  61 + }
  62 + }
  63 + }
  64 + }
  65 +
  66 + template<typename T>
  67 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  68 +
  69 +
  70 + unsigned int max_threads = stim::maxThreadsPerBlock();
  71 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  72 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  73 + //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*4*sizeof(T);
  74 + //call the kernel to do the voting
  75 + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z);
  76 +
  77 + }
  78 +
  79 +
  80 + template<typename T>
  81 + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  82 +
  83 + //calculate the number of bytes in the array
  84 + unsigned int bytes = x * y * z * sizeof(T);
  85 +
  86 +
  87 + //allocate space on the GPU for the Vote Image
  88 + T* gpu_vote;
  89 + cudaMalloc(&gpu_vote, bytes);
  90 +
  91 + //allocate space on the GPU for the input Gradient image
  92 + T* gpu_grad;
  93 + cudaMalloc(&gpu_grad, bytes*3);
  94 +
  95 +
  96 + //copy the Gradient data to the GPU
  97 + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
  98 +
  99 +
  100 + //call the GPU version of the vote calculation function
  101 + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z);
  102 +
  103 + //copy the Vote Data back to the CPU
  104 + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ;
  105 +
  106 + //free allocated memory
  107 + cudaFree(gpu_vote);
  108 + cudaFree(gpu_grad);
  109 +
  110 + }
  111 +
  112 +
  113 +#endif
0 114 \ No newline at end of file
... ...
cpp/vote3_atomic_aabb.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE3_ATOMIC_AABB_H
  2 +#define STIM_CUDA_VOTE3_ATOMIC_AABB_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/cudatools/error.h>
  8 +#include "cpyToshare.cuh"
  9 +#define M_PI 3.14159
  10 +#include <stim/math/circle.h>
  11 +#include <stim/math/vec3.h>
  12 +#include <stim/math/plane.h>
  13 +#include <stim/math/vector.h>
  14 +#include <stim/visualization/aabb3.h>
  15 +
  16 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  17 + template<typename T>
  18 + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  19 +
  20 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  21 +
  22 + int grid_y = y / blockDim.y; //find the grid size along y
  23 + int blockidx_y = blockIdx.y % grid_y;
  24 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  25 + int zi = blockIdx.y / grid_y;
  26 +
  27 + if(xi>=x || yi>=y || zi>=z) return;
  28 +
  29 + int i = zi * x * y + yi * x + xi; // calculate the 1D index of the voter
  30 +
  31 +
  32 + float rx_sq = rx * rx; // compute the square for rmax
  33 + float ry_sq = ry * ry;
  34 + float rz_sq = rz * rz;
  35 + float dist, cos_diff;
  36 + int idx_c;
  37 +
  38 + //float rmax = sqrt(rx_sq + ry_sq + rz_sq);
  39 + stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
  40 + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
  41 + int n =4; //set the number of points to find the boundaries of the conical voting area
  42 + float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter
  43 + float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
  44 + float zc = rz * cos(g_sph[2]) ;
  45 + float r = sqrt(xc*xc + yc*yc + zc*zc);
  46 + xc+=xi;
  47 + yc+=yi;
  48 + zc+=zi;
  49 + stim::vec3<float> center(xc,yc,zc);
  50 +
  51 + float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area
  52 + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
  53 + float step = 360.0/(float) n;
  54 + stim::circle<float> cir(center, d, norm);
  55 + stim::aabb3<int> bb(xi,yi,zi);
  56 + bb.insert(xc,yc,zc);
  57 + for(float j = 0; j <360.0; j += step){
  58 + stim::vec3<float> out = cir.p(j);
  59 + bb.insert(out[0], out[1], out[2]);
  60 + }
  61 +
  62 + bb.trim_low(0,0,0);
  63 + bb.trim_high(x-1, y-1, z-1);
  64 + int bx,by,bz;
  65 + int dx, dy, dz;
  66 + float dx_sq, dy_sq, dz_sq;
  67 + for (bz=bb.low[2]; bz<=bb.high[2]; bz++){
  68 + dz = bz - zi; //compute the distance bw the voter and the current counter along z axis
  69 + dz_sq = dz * dz;
  70 + for (by=bb.low[1]; by<=bb.high[1]; by++){
  71 + dy = by - yi; //compute the distance bw the voter and the current counter along y axis
  72 + dy_sq = dy * dy;
  73 + for (bx=bb.low[0]; bx<=bb.high[0]; bx++){
  74 + dx = bx - xi; //compute the distance bw the voter and the current counter along x axis
  75 + dx_sq = dx * dx;
  76 +
  77 + dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter
  78 + cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter
  79 + if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter
  80 + idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter
  81 + atomicAdd (&gpu_vote[idx_c] , g_sph[0]);
  82 + }
  83 + }
  84 + }
  85 + }
  86 + }
  87 +
  88 + template<typename T>
  89 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  90 +
  91 +
  92 + unsigned int max_threads = stim::maxThreadsPerBlock();
  93 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  94 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  95 + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting
  96 +
  97 + }
  98 +
  99 +
  100 + template<typename T>
  101 + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  102 +
  103 + //calculate the number of bytes in the array
  104 + unsigned int bytes = x * y * z * sizeof(T);
  105 +
  106 +
  107 + //allocate space on the GPU for the Vote Image
  108 + T* gpu_vote;
  109 + cudaMalloc(&gpu_vote, bytes);
  110 +
  111 + //allocate space on the GPU for the input Gradient image
  112 + T* gpu_grad;
  113 + cudaMalloc(&gpu_grad, bytes*3);
  114 +
  115 +
  116 + //copy the Gradient data to the GPU
  117 + cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
  118 +
  119 +
  120 + //call the GPU version of the vote calculation function
  121 + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z);
  122 +
  123 + //copy the Vote Data back to the CPU
  124 + cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ;
  125 +
  126 + //free allocated memory
  127 + cudaFree(gpu_vote);
  128 + cudaFree(gpu_grad);
  129 +
  130 + }
  131 +
  132 +
  133 +#endif
0 134 \ No newline at end of file
... ...