Commit 1f55a87438cefd956c4a8cd420fa8c3af4c01c6a

Authored by Laila Saadatifard
1 parent a744d027

upload the fixed ivote3 working with the bounding box method

Matlab_3D/gt2.vol renamed to Matlab_3D/gt-128.vol
No preview for this file type
Matlab_3D/nissl-float-64.64.64.vol 0 → 100644
No preview for this file type
Matlab_3D/validation.m
1 1 clear all;
2 2 %for t=100:100:5000
3   -t=2350;
  3 +t=2200;
4 4 X = 128;
5 5 Y = 128;
6 6 Z = 128;
... ... @@ -12,7 +12,7 @@ r2=10;
12 12 itr=5;
13 13 vote=7;
14 14 std = [5 5];
15   -gt_filename = 'gt2.vol';
  15 +gt_filename = 'gt-128.vol';
16 16 % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote);
17 17 out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t);
18 18 % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D);
... ...
cpp/CMakeLists.txt
... ... @@ -51,3 +51,4 @@ target_link_libraries(ivote3
51 51 #copy an image test case
52 52 configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY)
53 53 configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY)
  54 +configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY)
... ...
cpp/cudafunc.cu
... ... @@ -10,7 +10,7 @@ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z);
10 10 #include "gradient3.cuh"
11 11 #include "mag3.cuh"
12 12 #include "vote3_atomic_aabb.cuh"
13   -#include "update_dir3.cuh"
  13 +#include "update_dir3_aabb.cuh"
14 14 #include "local_max3.cuh"
15 15  
16 16  
... ... @@ -19,27 +19,21 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
19 19  
20 20  
21 21 cudaSetDevice(1);
22   - // compute the number of bytes in the input data
23   - unsigned int bytes = x * y * z * sizeof(float);
  22 +
  23 + unsigned int bytes = x * y * z * sizeof(float); // compute the number of bytes in the input data
24 24  
25   - //assign memory on gpu for the input data.z
26   - float* gpuI0;
  25 + float* gpuI0; //assign memory on gpu for the input data
27 26 cudaMalloc(&gpuI0, bytes);
  27 + cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU.
28 28  
29   - //copy the image data to the GPU.
30   - cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice);
31   -
32   - //call the blurring function from the gpu.
33   - gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z);
34   - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost);
  29 +
  30 + gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); //call the blurring function from the gpu.
35 31 cudaDeviceSynchronize();
36 32  
37   - //assign memory on the gpu for the gradient along the X, y, z.
38   - float* gpu_grad;
  33 + float* gpu_grad; //assign memory on the gpu for the gradient along the X, y, z.
39 34 cudaMalloc(&gpu_grad, bytes*3);
40 35  
41   - //call the gradient function from the gpu.
42   - gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z);
  36 + gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z); //call the gradient function from the gpu.
43 37 cudaFree(gpuI0);
44 38  
45 39 float* gpu_vote;
... ... @@ -51,11 +45,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
51 45 for (int i = 0; i < iter; i++){
52 46  
53 47 cudaMemset(gpu_vote, 0, bytes);
54   - gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z);
  48 + gpu_vote3<float>(gpu_vote, gpu_grad, phi, cos_phi, r, x, y, z);
55 49 cudaDeviceSynchronize();
56 50  
57 51 //if (phi >= d_phi){
58   - gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z);
  52 + gpu_update_dir3<float>(gpu_grad, gpu_vote, phi, cos_phi, r, x, y, z);
59 53 cudaDeviceSynchronize();
60 54 phi = phi - d_phi;
61 55 cos_phi = cos(phi);
... ... @@ -87,21 +81,18 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u
87 81  
88 82 cudaSetDevice(1);
89 83  
90   - //assign memory on gpu for the input data.
91   - float* gpuV;
  84 +
  85 + float* gpuV; //assign memory on gpu for the input data.
92 86 cudaMalloc(&gpuV, bytes);
93 87  
94   - //copy the image data to the GPU.
95   - cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice);
  88 + cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU.
96 89  
97 90 float* gpuOut;
98 91 cudaMalloc(&gpuOut, bytes);
99 92  
100   - //call the local max function
101   - gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z);
  93 + gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); //call the local max function
102 94  
103   - //copy the final result to the cpu.
104   - cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost);
  95 + cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); //copy the final result to the cpu.
105 96  
106 97 cudaFree(gpuV);
107 98 cudaFree(gpuOut);
... ...
cpp/main.cpp
... ... @@ -9,14 +9,13 @@
9 9 #include <stim/image/image.h>
10 10 #define pi 3.14159
11 11  
12   -#define M_PI 3.14159
13   -#include <stim/math/circle.h>
14   -#include <stim/math/vec3.h>
15   -#include <stim/math/plane.h>
16   -#include <stim/math/vector.h>
17   -//#include <cuda.h>
18   -//#include <stim/cuda/cudatools.h>
19   -//#include <stim/cuda/cudatools/error.h>
  12 +//#define M_PI 3.14159
  13 +//#include <stim/math/circle.h>
  14 +//#include <stim/math/vec3.h>
  15 +//#include <stim/math/plane.h>
  16 +//#include <stim/math/vector.h>
  17 +//#include <stim/visualization/aabb3.h>
  18 +
20 19  
21 20  
22 21 /*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z);
... ... @@ -52,42 +51,78 @@ int main(){
52 51 }
53 52 list.close();
54 53 */
55   - /*
56   - int n=10;
57   - stim::circle<float> cir;
58   - float* c0= (float*) malloc(3*sizeof(float));
59   - c0[0] =-4;
60   - c0[1]=0;
61   - c0[2] = 3;
62   - stim::vec3<float> c(c0[0],c0[1],c0[2]);
63   - float len = c.len();
64   - stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len);
65   - std::cout<< len << '\n';
66   - std::cout<< norm << '\n';
67   - cir.center(c);
68   - cir.normal(norm);
69   - cir.scale(2);
70   - stim::vec3<float> out = cir.p(45);
71   - std::vector<stim::vec3<float>> out2 = cir.getPoints(n);
  54 +/*
  55 + int main(){
  56 +
  57 +
  58 + stim::vec3<float> g(-44,-3.4,-0.005); // form a vec3 variable for the gradient vector
  59 + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
  60 + int n =36; //set the number of points to find the boundaries of the conical voting area
  61 + int xi = 105;
  62 + int yi = 17;
  63 + int zi = 23;
  64 + float xc = 12 * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter
  65 + float yc = 10 * sin(g_sph[1]) * sin(g_sph[2]) ;
  66 + float zc = 10 * cos(g_sph[2]) ;
  67 + float r = sqrt(xc*xc + yc*yc + zc*zc);
  68 + xc+=xi;
  69 + yc+=yi;
  70 + zc+=zi;
  71 + stim::vec3<float> center(xc,yc,zc);
  72 +
  73 + float d = 2 * r * tan(25*pi/180 ); //find the diameter of the conical voting area
  74 + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
  75 + float step = 360.0/(float) n;
  76 + stim::circle<float> cir(center, d, norm);
  77 + stim::aabb3<int> bb(xi,yi,zi);
  78 + bb.insert(xc,yc,zc);
  79 + for(float j = 0; j <360.0; j += step){
  80 + stim::vec3<float> out = cir.p(j);
  81 + bb.insert(out[0], out[1], out[2]);
  82 + }
  83 +
  84 + bb.trim_low(0,0,0);
  85 + bb.trim_high(128-1, 128-1, 128-1);
  86 +
  87 + std::cout<< bb.low[0] << '\t' << bb.low[1] << '\t' << bb.low[2] << '\n';
  88 + std::cout<< bb.high[0] << '\t' << bb.high[1] << '\t' << bb.high[2] << '\n';
  89 + std::cin >> n;
  90 +*/
  91 + /*int n=10;
  92 + stim::circle<float> cir;
  93 + float* c0= (float*) malloc(3*sizeof(float));
  94 + c0[0] =-4;
  95 + c0[1]=0;
  96 + c0[2] = 3;
  97 + stim::vec3<float> c(c0[0],c0[1],c0[2]);
  98 + float len = c.len();
  99 + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len);
  100 + std::cout<< len << '\n';
  101 + std::cout<< norm << '\n';
  102 + cir.center(c);
  103 + cir.normal(norm);
  104 + cir.scale(2);
  105 + stim::vec3<float> out = cir.p(45);
  106 + std::vector<stim::vec3<float>> out2 = cir.getPoints(n);
72 107  
73   - std::cout<< out << '\n';
74   - std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n';
75   - std::cout<< c << '\n';
  108 + std::cout<< out << '\n';
  109 + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n';
  110 + std::cout<< c << '\n';
76 111  
77   - for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i)
78   - std::cout << *i << '\n';
79   - std::ofstream list("circle_check.txt");
80   - if (list.is_open()){
81   - for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j)
82   - list << *j << '\n';
83   - }
84   - list.close();
85   - std::cin >> n;
  112 + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i)
  113 + std::cout << *i << '\n';
  114 + std::ofstream list("circle_check.txt");
  115 + if (list.is_open()){
  116 + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j)
  117 + list << *j << '\n';
  118 + }
  119 + list.close();
  120 + std::cin >> n;
86 121  
87 122 }
88   -
89 123 */
90 124  
  125 +
91 126 void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[],
92 127 unsigned int x, unsigned int y, unsigned int z);
93 128 void lmax(float* center, float* vote, float t1, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z);
... ... @@ -140,7 +175,7 @@ int main(int argc, char** argv){
140 175 args.add("z", "size of the dataset along Z axis", "positive value");
141 176 args.add("t", "threshold value for the final result", "positive valu");
142 177 args.add("invert", "to invert the input data set", "string");
143   - args.add("anisotropy", "anisotropy value of the imaging", "positive value");
  178 + args.add("anisotropy", "anisotropy value of the imaging", "1");
144 179 //parse the command line arguments.
145 180 args.parse(argc, argv);
146 181  
... ...
cpp/nissl-raw-data/nissl-float-64.64.64.vol 0 → 100644
No preview for this file type
cpp/update_dir3.cuh
... ... @@ -129,7 +129,7 @@
129 129 }
130 130  
131 131 template<typename T>
132   - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  132 + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
133 133  
134 134 unsigned int max_threads = stim::maxThreadsPerBlock();
135 135 dim3 threads(sqrt (max_threads),sqrt (max_threads));
... ...
cpp/update_dir3_aabb.cuh
... ... @@ -14,8 +14,7 @@
14 14  
15 15 // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
16 16 template<typename T>
17   - __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
18   - //extern __shared__ float s_vote[];
  17 + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
19 18  
20 19 int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
21 20  
... ... @@ -26,26 +25,13 @@
26 25 if(xi >= x|| yi >= y || zi>= z) return;
27 26 int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel
28 27  
29   -
30   - // find the starting points for this block along the x and y directions
31   - //int bxi = blockIdx.x * blockDim.x;
32   - //int byi = blockidx_y * blockDim.y;
33   - //find the starting points and the size of the window, which will be copied to the 2D-shared memory
34   - //int bxs = bxi - rx;
35   - //int bys = byi - ry;
36   - //int xwidth = 2 * rx + blockDim.x;
37   - //int ywidth = 2 * ry + blockDim.y;
38   - //compute the coordinations of this pixel in the 2D-shared memory.
39   - //int sx_rx = threadIdx.x + rx;
40   - //int sy_ry = threadIdx.y + ry;
41   -
42 28 float rx_sq = rx * rx; // compute the square for rmax
43 29 float ry_sq = ry * ry;
44 30 float rz_sq = rz * rz;
45 31  
46 32 stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
47 33 stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
48   - int n =4; //set the number of points to find the boundaries of the conical voting area
  34 + float n =8; //set the number of points to find the boundaries of the conical voting area
49 35 float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter
50 36 float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
51 37 float zc = rz * cos(g_sph[2]) ;
... ... @@ -54,9 +40,10 @@
54 40 yc+=yi;
55 41 zc+=zi;
56 42 stim::vec3<float> center(xc,yc,zc);
57   - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area
  43 +
  44 + float d = 2 * r * tan(phi); //find the diameter of the conical voting area
58 45 stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
59   - float step = 360.0/(float) n;
  46 + float step = 360.0/n;
60 47 stim::circle<float> cir(center, d, norm);
61 48 stim::aabb3<int> bb(xi,yi,zi);
62 49 bb.insert(xc,yc,zc);
... ... @@ -64,13 +51,13 @@
64 51 stim::vec3<float> out = cir.p(j);
65 52 bb.insert(out[0], out[1], out[2]);
66 53 }
67   -
  54 + bb.trim_low(xi-rx, yi-ry, zi-rz);
68 55 bb.trim_low(0,0,0);
  56 + bb.trim_high(xi+rx, yi+ry, zi+rz);
69 57 bb.trim_high(x-1, y-1, z-1);
70 58 int bx,by,bz;
71 59 int dx, dy, dz;
72 60 float dx_sq, dy_sq, dz_sq;
73   -
74 61 float dist, cos_diff;
75 62 int idx_c;
76 63  
... ... @@ -118,42 +105,38 @@
118 105 template<typename T>
119 106 __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){
120 107  
121   - //calculate x,y,z coordinates for this thread
122   - int xi = blockIdx.x * blockDim.x + threadIdx.x;
123   - //find the grid size along y
124   - int grid_y = y / blockDim.y;
  108 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  109 +
  110 + int grid_y = y / blockDim.y; //find the grid size along y
125 111 int blockidx_y = blockIdx.y % grid_y;
126 112 int yi = blockidx_y * blockDim.y + threadIdx.y;
127 113 int zi = blockIdx.y / grid_y;
128 114 int i = zi * x * y + yi * x + xi;
129 115  
130 116 if(xi >= x || yi >= y || zi >= z) return;
131   - //update the gradient image with the new direction direction
132   - gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0];
  117 +
  118 + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; //update the gradient image with the new direction direction
133 119 gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1];
134 120 gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2];
135 121 }
136 122  
137 123 template<typename T>
138   - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  124 + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
139 125  
140 126 unsigned int max_threads = stim::maxThreadsPerBlock();
141 127 dim3 threads(sqrt (max_threads),sqrt (max_threads));
142 128 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
143   - //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T);
144   - // allocate space on the GPU for the updated vote direction
145   - T* gpu_dir;
  129 +
  130 + T* gpu_dir; // allocate space on the GPU for the updated vote direction
146 131 cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3);
147 132  
148 133 //call the kernel to calculate the new voting direction
149   - update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z);
150   -
  134 + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, phi, cos_phi, r[0], r[1], r[2], x , y, z);
151 135  
152 136 //call the kernel to update the gradient direction
153 137 update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z);
154   -
155   - //free allocated memory
156   - cudaFree(gpu_dir);
  138 +
  139 + cudaFree(gpu_dir); //free allocated memory
157 140  
158 141 }
159 142  
... ...
cpp/vote3.cuh
... ... @@ -101,7 +101,7 @@
101 101 }
102 102  
103 103 template<typename T>
104   - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  104 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
105 105  
106 106  
107 107 unsigned int max_threads = stim::maxThreadsPerBlock();
... ...
cpp/vote3_atomic.cuh
... ... @@ -27,14 +27,14 @@
27 27  
28 28 float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter
29 29  
30   - float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter
31   - float gy_v_n = gy_v/mag_v;
32   - float gz_v_n = gz_v/mag_v;
  30 + //float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter
  31 + //float gy_v_n = gy_v/mag_v;
  32 + //float gz_v_n = gz_v/mag_v;
33 33  
34 34 float rx_sq = rx * rx; // compute the square for rmax
35 35 float ry_sq = ry * ry;
36 36 float rz_sq = rz * rz;
37   - float x_sq, y_sq, z_sq, d_c, cos_diff;
  37 + float x_sq, y_sq, z_sq, dist, cos_diff;
38 38 int xi_c, yi_c, zi_c, idx_c;
39 39  
40 40 for (int z_c=-rz; z_c<=rz; z_c++){
... ... @@ -49,8 +49,8 @@
49 49 xi_c = xi + x_c;
50 50 if (xi_c < x && xi_c>=0){
51 51 x_sq = x_c * x_c;
52   - d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter
53   - cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter
  52 + dist = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter
  53 + cos_diff = (gx_v * x_c + gy_v * y_c + gz_v * z_c)/(dist * mag_v); // calculate the cosine of angle between the voter and the current counter
54 54 if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){
55 55 idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter
56 56 atomicAdd (&gpu_vote[idx_c] , mag_v);
... ... @@ -64,7 +64,7 @@
64 64 }
65 65  
66 66 template<typename T>
67   - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  67 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
68 68  
69 69  
70 70 unsigned int max_threads = stim::maxThreadsPerBlock();
... ...
cpp/vote3_atomic_aabb.cuh
... ... @@ -15,7 +15,7 @@
15 15  
16 16 // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
17 17 template<typename T>
18   - __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  18 + __global__ void vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
19 19  
20 20 int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
21 21  
... ... @@ -32,13 +32,10 @@
32 32 float rx_sq = rx * rx; // compute the square for rmax
33 33 float ry_sq = ry * ry;
34 34 float rz_sq = rz * rz;
35   - float dist, cos_diff;
36   - int idx_c;
37   -
38   - //float rmax = sqrt(rx_sq + ry_sq + rz_sq);
  35 +
39 36 stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
40 37 stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
41   - int n =4; //set the number of points to find the boundaries of the conical voting area
  38 + float n =8; //set the number of points to find the boundaries of the conical voting area
42 39 float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter
43 40 float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
44 41 float zc = rz * cos(g_sph[2]) ;
... ... @@ -48,9 +45,9 @@
48 45 zc+=zi;
49 46 stim::vec3<float> center(xc,yc,zc);
50 47  
51   - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area
  48 + float d = 2 * r * tan(phi); //find the diameter of the conical voting area
52 49 stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
53   - float step = 360.0/(float) n;
  50 + float step = 360.0/n;
54 51 stim::circle<float> cir(center, d, norm);
55 52 stim::aabb3<int> bb(xi,yi,zi);
56 53 bb.insert(xc,yc,zc);
... ... @@ -58,12 +55,15 @@
58 55 stim::vec3<float> out = cir.p(j);
59 56 bb.insert(out[0], out[1], out[2]);
60 57 }
61   -
  58 + bb.trim_low(xi-rx, yi-ry, zi-rz);
62 59 bb.trim_low(0,0,0);
  60 + bb.trim_high(xi+rx, yi+ry, zi+rz);
63 61 bb.trim_high(x-1, y-1, z-1);
64 62 int bx,by,bz;
65 63 int dx, dy, dz;
66 64 float dx_sq, dy_sq, dz_sq;
  65 + float dist, cos_diff;
  66 + int idx_c;
67 67 for (bz=bb.low[2]; bz<=bb.high[2]; bz++){
68 68 dz = bz - zi; //compute the distance bw the voter and the current counter along z axis
69 69 dz_sq = dz * dz;
... ... @@ -86,13 +86,13 @@
86 86 }
87 87  
88 88 template<typename T>
89   - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  89 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
90 90  
91 91  
92 92 unsigned int max_threads = stim::maxThreadsPerBlock();
93 93 dim3 threads(sqrt (max_threads),sqrt (max_threads));
94 94 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
95   - vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting
  95 + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, phi, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting
96 96  
97 97 }
98 98  
... ...