Commit 1f55a87438cefd956c4a8cd420fa8c3af4c01c6a

Authored by Laila Saadatifard
1 parent a744d027

upload the fixed ivote3 working with the bounding box method

Matlab_3D/gt2.vol renamed to Matlab_3D/gt-128.vol
No preview for this file type
Matlab_3D/nissl-float-64.64.64.vol 0 → 100644
No preview for this file type
Matlab_3D/validation.m
1 clear all; 1 clear all;
2 %for t=100:100:5000 2 %for t=100:100:5000
3 -t=2350; 3 +t=2200;
4 X = 128; 4 X = 128;
5 Y = 128; 5 Y = 128;
6 Z = 128; 6 Z = 128;
@@ -12,7 +12,7 @@ r2=10; @@ -12,7 +12,7 @@ r2=10;
12 itr=5; 12 itr=5;
13 vote=7; 13 vote=7;
14 std = [5 5]; 14 std = [5 5];
15 -gt_filename = 'gt2.vol'; 15 +gt_filename = 'gt-128.vol';
16 % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); 16 % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote);
17 out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t); 17 out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t);
18 % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); 18 % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D);
cpp/CMakeLists.txt
@@ -51,3 +51,4 @@ target_link_libraries(ivote3 @@ -51,3 +51,4 @@ target_link_libraries(ivote3
51 #copy an image test case 51 #copy an image test case
52 configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) 52 configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY)
53 configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) 53 configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY)
  54 +configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY)
@@ -10,7 +10,7 @@ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); @@ -10,7 +10,7 @@ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z);
10 #include "gradient3.cuh" 10 #include "gradient3.cuh"
11 #include "mag3.cuh" 11 #include "mag3.cuh"
12 #include "vote3_atomic_aabb.cuh" 12 #include "vote3_atomic_aabb.cuh"
13 -#include "update_dir3.cuh" 13 +#include "update_dir3_aabb.cuh"
14 #include "local_max3.cuh" 14 #include "local_max3.cuh"
15 15
16 16
@@ -19,27 +19,21 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, @@ -19,27 +19,21 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
19 19
20 20
21 cudaSetDevice(1); 21 cudaSetDevice(1);
22 - // compute the number of bytes in the input data  
23 - unsigned int bytes = x * y * z * sizeof(float); 22 +
  23 + unsigned int bytes = x * y * z * sizeof(float); // compute the number of bytes in the input data
24 24
25 - //assign memory on gpu for the input data.z  
26 - float* gpuI0; 25 + float* gpuI0; //assign memory on gpu for the input data
27 cudaMalloc(&gpuI0, bytes); 26 cudaMalloc(&gpuI0, bytes);
  27 + cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU.
28 28
29 - //copy the image data to the GPU.  
30 - cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice);  
31 -  
32 - //call the blurring function from the gpu.  
33 - gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z);  
34 - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); 29 +
  30 + gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); //call the blurring function from the gpu.
35 cudaDeviceSynchronize(); 31 cudaDeviceSynchronize();
36 32
37 - //assign memory on the gpu for the gradient along the X, y, z.  
38 - float* gpu_grad; 33 + float* gpu_grad; //assign memory on the gpu for the gradient along the X, y, z.
39 cudaMalloc(&gpu_grad, bytes*3); 34 cudaMalloc(&gpu_grad, bytes*3);
40 35
41 - //call the gradient function from the gpu.  
42 - gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z); 36 + gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z); //call the gradient function from the gpu.
43 cudaFree(gpuI0); 37 cudaFree(gpuI0);
44 38
45 float* gpu_vote; 39 float* gpu_vote;
@@ -51,11 +45,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, @@ -51,11 +45,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi,
51 for (int i = 0; i < iter; i++){ 45 for (int i = 0; i < iter; i++){
52 46
53 cudaMemset(gpu_vote, 0, bytes); 47 cudaMemset(gpu_vote, 0, bytes);
54 - gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); 48 + gpu_vote3<float>(gpu_vote, gpu_grad, phi, cos_phi, r, x, y, z);
55 cudaDeviceSynchronize(); 49 cudaDeviceSynchronize();
56 50
57 //if (phi >= d_phi){ 51 //if (phi >= d_phi){
58 - gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); 52 + gpu_update_dir3<float>(gpu_grad, gpu_vote, phi, cos_phi, r, x, y, z);
59 cudaDeviceSynchronize(); 53 cudaDeviceSynchronize();
60 phi = phi - d_phi; 54 phi = phi - d_phi;
61 cos_phi = cos(phi); 55 cos_phi = cos(phi);
@@ -87,21 +81,18 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u @@ -87,21 +81,18 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u
87 81
88 cudaSetDevice(1); 82 cudaSetDevice(1);
89 83
90 - //assign memory on gpu for the input data.  
91 - float* gpuV; 84 +
  85 + float* gpuV; //assign memory on gpu for the input data.
92 cudaMalloc(&gpuV, bytes); 86 cudaMalloc(&gpuV, bytes);
93 87
94 - //copy the image data to the GPU.  
95 - cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); 88 + cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU.
96 89
97 float* gpuOut; 90 float* gpuOut;
98 cudaMalloc(&gpuOut, bytes); 91 cudaMalloc(&gpuOut, bytes);
99 92
100 - //call the local max function  
101 - gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); 93 + gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); //call the local max function
102 94
103 - //copy the final result to the cpu.  
104 - cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); 95 + cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); //copy the final result to the cpu.
105 96
106 cudaFree(gpuV); 97 cudaFree(gpuV);
107 cudaFree(gpuOut); 98 cudaFree(gpuOut);
@@ -9,14 +9,13 @@ @@ -9,14 +9,13 @@
9 #include <stim/image/image.h> 9 #include <stim/image/image.h>
10 #define pi 3.14159 10 #define pi 3.14159
11 11
12 -#define M_PI 3.14159  
13 -#include <stim/math/circle.h>  
14 -#include <stim/math/vec3.h>  
15 -#include <stim/math/plane.h>  
16 -#include <stim/math/vector.h>  
17 -//#include <cuda.h>  
18 -//#include <stim/cuda/cudatools.h>  
19 -//#include <stim/cuda/cudatools/error.h> 12 +//#define M_PI 3.14159
  13 +//#include <stim/math/circle.h>
  14 +//#include <stim/math/vec3.h>
  15 +//#include <stim/math/plane.h>
  16 +//#include <stim/math/vector.h>
  17 +//#include <stim/visualization/aabb3.h>
  18 +
20 19
21 20
22 /*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z); 21 /*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z);
@@ -52,42 +51,78 @@ int main(){ @@ -52,42 +51,78 @@ int main(){
52 } 51 }
53 list.close(); 52 list.close();
54 */ 53 */
55 - /*  
56 - int n=10;  
57 - stim::circle<float> cir;  
58 - float* c0= (float*) malloc(3*sizeof(float));  
59 - c0[0] =-4;  
60 - c0[1]=0;  
61 - c0[2] = 3;  
62 - stim::vec3<float> c(c0[0],c0[1],c0[2]);  
63 - float len = c.len();  
64 - stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len);  
65 - std::cout<< len << '\n';  
66 - std::cout<< norm << '\n';  
67 - cir.center(c);  
68 - cir.normal(norm);  
69 - cir.scale(2);  
70 - stim::vec3<float> out = cir.p(45);  
71 - std::vector<stim::vec3<float>> out2 = cir.getPoints(n); 54 +/*
  55 + int main(){
  56 +
  57 +
  58 + stim::vec3<float> g(-44,-3.4,-0.005); // form a vec3 variable for the gradient vector
  59 + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
  60 + int n =36; //set the number of points to find the boundaries of the conical voting area
  61 + int xi = 105;
  62 + int yi = 17;
  63 + int zi = 23;
  64 + float xc = 12 * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter
  65 + float yc = 10 * sin(g_sph[1]) * sin(g_sph[2]) ;
  66 + float zc = 10 * cos(g_sph[2]) ;
  67 + float r = sqrt(xc*xc + yc*yc + zc*zc);
  68 + xc+=xi;
  69 + yc+=yi;
  70 + zc+=zi;
  71 + stim::vec3<float> center(xc,yc,zc);
  72 +
  73 + float d = 2 * r * tan(25*pi/180 ); //find the diameter of the conical voting area
  74 + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
  75 + float step = 360.0/(float) n;
  76 + stim::circle<float> cir(center, d, norm);
  77 + stim::aabb3<int> bb(xi,yi,zi);
  78 + bb.insert(xc,yc,zc);
  79 + for(float j = 0; j <360.0; j += step){
  80 + stim::vec3<float> out = cir.p(j);
  81 + bb.insert(out[0], out[1], out[2]);
  82 + }
  83 +
  84 + bb.trim_low(0,0,0);
  85 + bb.trim_high(128-1, 128-1, 128-1);
  86 +
  87 + std::cout<< bb.low[0] << '\t' << bb.low[1] << '\t' << bb.low[2] << '\n';
  88 + std::cout<< bb.high[0] << '\t' << bb.high[1] << '\t' << bb.high[2] << '\n';
  89 + std::cin >> n;
  90 +*/
  91 + /*int n=10;
  92 + stim::circle<float> cir;
  93 + float* c0= (float*) malloc(3*sizeof(float));
  94 + c0[0] =-4;
  95 + c0[1]=0;
  96 + c0[2] = 3;
  97 + stim::vec3<float> c(c0[0],c0[1],c0[2]);
  98 + float len = c.len();
  99 + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len);
  100 + std::cout<< len << '\n';
  101 + std::cout<< norm << '\n';
  102 + cir.center(c);
  103 + cir.normal(norm);
  104 + cir.scale(2);
  105 + stim::vec3<float> out = cir.p(45);
  106 + std::vector<stim::vec3<float>> out2 = cir.getPoints(n);
72 107
73 - std::cout<< out << '\n';  
74 - std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n';  
75 - std::cout<< c << '\n'; 108 + std::cout<< out << '\n';
  109 + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n';
  110 + std::cout<< c << '\n';
76 111
77 - for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i)  
78 - std::cout << *i << '\n';  
79 - std::ofstream list("circle_check.txt");  
80 - if (list.is_open()){  
81 - for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j)  
82 - list << *j << '\n';  
83 - }  
84 - list.close();  
85 - std::cin >> n; 112 + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i)
  113 + std::cout << *i << '\n';
  114 + std::ofstream list("circle_check.txt");
  115 + if (list.is_open()){
  116 + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j)
  117 + list << *j << '\n';
  118 + }
  119 + list.close();
  120 + std::cin >> n;
86 121
87 } 122 }
88 -  
89 */ 123 */
90 124
  125 +
91 void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], 126 void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[],
92 unsigned int x, unsigned int y, unsigned int z); 127 unsigned int x, unsigned int y, unsigned int z);
93 void lmax(float* center, float* vote, float t1, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z); 128 void lmax(float* center, float* vote, float t1, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z);
@@ -140,7 +175,7 @@ int main(int argc, char** argv){ @@ -140,7 +175,7 @@ int main(int argc, char** argv){
140 args.add("z", "size of the dataset along Z axis", "positive value"); 175 args.add("z", "size of the dataset along Z axis", "positive value");
141 args.add("t", "threshold value for the final result", "positive valu"); 176 args.add("t", "threshold value for the final result", "positive valu");
142 args.add("invert", "to invert the input data set", "string"); 177 args.add("invert", "to invert the input data set", "string");
143 - args.add("anisotropy", "anisotropy value of the imaging", "positive value"); 178 + args.add("anisotropy", "anisotropy value of the imaging", "1");
144 //parse the command line arguments. 179 //parse the command line arguments.
145 args.parse(argc, argv); 180 args.parse(argc, argv);
146 181
cpp/nissl-raw-data/nissl-float-64.64.64.vol 0 → 100644
No preview for this file type
cpp/update_dir3.cuh
@@ -129,7 +129,7 @@ @@ -129,7 +129,7 @@
129 } 129 }
130 130
131 template<typename T> 131 template<typename T>
132 - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ 132 + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
133 133
134 unsigned int max_threads = stim::maxThreadsPerBlock(); 134 unsigned int max_threads = stim::maxThreadsPerBlock();
135 dim3 threads(sqrt (max_threads),sqrt (max_threads)); 135 dim3 threads(sqrt (max_threads),sqrt (max_threads));
cpp/update_dir3_aabb.cuh
@@ -14,8 +14,7 @@ @@ -14,8 +14,7 @@
14 14
15 // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. 15 // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
16 template<typename T> 16 template<typename T>
17 - __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){  
18 - //extern __shared__ float s_vote[]; 17 + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
19 18
20 int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread 19 int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
21 20
@@ -26,26 +25,13 @@ @@ -26,26 +25,13 @@
26 if(xi >= x|| yi >= y || zi>= z) return; 25 if(xi >= x|| yi >= y || zi>= z) return;
27 int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel 26 int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel
28 27
29 -  
30 - // find the starting points for this block along the x and y directions  
31 - //int bxi = blockIdx.x * blockDim.x;  
32 - //int byi = blockidx_y * blockDim.y;  
33 - //find the starting points and the size of the window, which will be copied to the 2D-shared memory  
34 - //int bxs = bxi - rx;  
35 - //int bys = byi - ry;  
36 - //int xwidth = 2 * rx + blockDim.x;  
37 - //int ywidth = 2 * ry + blockDim.y;  
38 - //compute the coordinations of this pixel in the 2D-shared memory.  
39 - //int sx_rx = threadIdx.x + rx;  
40 - //int sy_ry = threadIdx.y + ry;  
41 -  
42 float rx_sq = rx * rx; // compute the square for rmax 28 float rx_sq = rx * rx; // compute the square for rmax
43 float ry_sq = ry * ry; 29 float ry_sq = ry * ry;
44 float rz_sq = rz * rz; 30 float rz_sq = rz * rz;
45 31
46 stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector 32 stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
47 stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector 33 stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
48 - int n =4; //set the number of points to find the boundaries of the conical voting area 34 + float n =8; //set the number of points to find the boundaries of the conical voting area
49 float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter 35 float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter
50 float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; 36 float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
51 float zc = rz * cos(g_sph[2]) ; 37 float zc = rz * cos(g_sph[2]) ;
@@ -54,9 +40,10 @@ @@ -54,9 +40,10 @@
54 yc+=yi; 40 yc+=yi;
55 zc+=zi; 41 zc+=zi;
56 stim::vec3<float> center(xc,yc,zc); 42 stim::vec3<float> center(xc,yc,zc);
57 - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area 43 +
  44 + float d = 2 * r * tan(phi); //find the diameter of the conical voting area
58 stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector 45 stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
59 - float step = 360.0/(float) n; 46 + float step = 360.0/n;
60 stim::circle<float> cir(center, d, norm); 47 stim::circle<float> cir(center, d, norm);
61 stim::aabb3<int> bb(xi,yi,zi); 48 stim::aabb3<int> bb(xi,yi,zi);
62 bb.insert(xc,yc,zc); 49 bb.insert(xc,yc,zc);
@@ -64,13 +51,13 @@ @@ -64,13 +51,13 @@
64 stim::vec3<float> out = cir.p(j); 51 stim::vec3<float> out = cir.p(j);
65 bb.insert(out[0], out[1], out[2]); 52 bb.insert(out[0], out[1], out[2]);
66 } 53 }
67 - 54 + bb.trim_low(xi-rx, yi-ry, zi-rz);
68 bb.trim_low(0,0,0); 55 bb.trim_low(0,0,0);
  56 + bb.trim_high(xi+rx, yi+ry, zi+rz);
69 bb.trim_high(x-1, y-1, z-1); 57 bb.trim_high(x-1, y-1, z-1);
70 int bx,by,bz; 58 int bx,by,bz;
71 int dx, dy, dz; 59 int dx, dy, dz;
72 float dx_sq, dy_sq, dz_sq; 60 float dx_sq, dy_sq, dz_sq;
73 -  
74 float dist, cos_diff; 61 float dist, cos_diff;
75 int idx_c; 62 int idx_c;
76 63
@@ -118,42 +105,38 @@ @@ -118,42 +105,38 @@
118 template<typename T> 105 template<typename T>
119 __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){ 106 __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){
120 107
121 - //calculate x,y,z coordinates for this thread  
122 - int xi = blockIdx.x * blockDim.x + threadIdx.x;  
123 - //find the grid size along y  
124 - int grid_y = y / blockDim.y; 108 + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
  109 +
  110 + int grid_y = y / blockDim.y; //find the grid size along y
125 int blockidx_y = blockIdx.y % grid_y; 111 int blockidx_y = blockIdx.y % grid_y;
126 int yi = blockidx_y * blockDim.y + threadIdx.y; 112 int yi = blockidx_y * blockDim.y + threadIdx.y;
127 int zi = blockIdx.y / grid_y; 113 int zi = blockIdx.y / grid_y;
128 int i = zi * x * y + yi * x + xi; 114 int i = zi * x * y + yi * x + xi;
129 115
130 if(xi >= x || yi >= y || zi >= z) return; 116 if(xi >= x || yi >= y || zi >= z) return;
131 - //update the gradient image with the new direction direction  
132 - gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; 117 +
  118 + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; //update the gradient image with the new direction direction
133 gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; 119 gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1];
134 gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; 120 gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2];
135 } 121 }
136 122
137 template<typename T> 123 template<typename T>
138 - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ 124 + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
139 125
140 unsigned int max_threads = stim::maxThreadsPerBlock(); 126 unsigned int max_threads = stim::maxThreadsPerBlock();
141 dim3 threads(sqrt (max_threads),sqrt (max_threads)); 127 dim3 threads(sqrt (max_threads),sqrt (max_threads));
142 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); 128 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
143 - //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T);  
144 - // allocate space on the GPU for the updated vote direction  
145 - T* gpu_dir; 129 +
  130 + T* gpu_dir; // allocate space on the GPU for the updated vote direction
146 cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); 131 cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3);
147 132
148 //call the kernel to calculate the new voting direction 133 //call the kernel to calculate the new voting direction
149 - update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z);  
150 - 134 + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, phi, cos_phi, r[0], r[1], r[2], x , y, z);
151 135
152 //call the kernel to update the gradient direction 136 //call the kernel to update the gradient direction
153 update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); 137 update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z);
154 -  
155 - //free allocated memory  
156 - cudaFree(gpu_dir); 138 +
  139 + cudaFree(gpu_dir); //free allocated memory
157 140
158 } 141 }
159 142
@@ -101,7 +101,7 @@ @@ -101,7 +101,7 @@
101 } 101 }
102 102
103 template<typename T> 103 template<typename T>
104 - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ 104 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
105 105
106 106
107 unsigned int max_threads = stim::maxThreadsPerBlock(); 107 unsigned int max_threads = stim::maxThreadsPerBlock();
cpp/vote3_atomic.cuh
@@ -27,14 +27,14 @@ @@ -27,14 +27,14 @@
27 27
28 float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter 28 float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter
29 29
30 - float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter  
31 - float gy_v_n = gy_v/mag_v;  
32 - float gz_v_n = gz_v/mag_v; 30 + //float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter
  31 + //float gy_v_n = gy_v/mag_v;
  32 + //float gz_v_n = gz_v/mag_v;
33 33
34 float rx_sq = rx * rx; // compute the square for rmax 34 float rx_sq = rx * rx; // compute the square for rmax
35 float ry_sq = ry * ry; 35 float ry_sq = ry * ry;
36 float rz_sq = rz * rz; 36 float rz_sq = rz * rz;
37 - float x_sq, y_sq, z_sq, d_c, cos_diff; 37 + float x_sq, y_sq, z_sq, dist, cos_diff;
38 int xi_c, yi_c, zi_c, idx_c; 38 int xi_c, yi_c, zi_c, idx_c;
39 39
40 for (int z_c=-rz; z_c<=rz; z_c++){ 40 for (int z_c=-rz; z_c<=rz; z_c++){
@@ -49,8 +49,8 @@ @@ -49,8 +49,8 @@
49 xi_c = xi + x_c; 49 xi_c = xi + x_c;
50 if (xi_c < x && xi_c>=0){ 50 if (xi_c < x && xi_c>=0){
51 x_sq = x_c * x_c; 51 x_sq = x_c * x_c;
52 - d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter  
53 - cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter 52 + dist = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter
  53 + cos_diff = (gx_v * x_c + gy_v * y_c + gz_v * z_c)/(dist * mag_v); // calculate the cosine of angle between the voter and the current counter
54 if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ 54 if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){
55 idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter 55 idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter
56 atomicAdd (&gpu_vote[idx_c] , mag_v); 56 atomicAdd (&gpu_vote[idx_c] , mag_v);
@@ -64,7 +64,7 @@ @@ -64,7 +64,7 @@
64 } 64 }
65 65
66 template<typename T> 66 template<typename T>
67 - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ 67 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
68 68
69 69
70 unsigned int max_threads = stim::maxThreadsPerBlock(); 70 unsigned int max_threads = stim::maxThreadsPerBlock();
cpp/vote3_atomic_aabb.cuh
@@ -15,7 +15,7 @@ @@ -15,7 +15,7 @@
15 15
16 // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area 16 // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
17 template<typename T> 17 template<typename T>
18 - __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ 18 + __global__ void vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
19 19
20 int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread 20 int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread
21 21
@@ -32,13 +32,10 @@ @@ -32,13 +32,10 @@
32 float rx_sq = rx * rx; // compute the square for rmax 32 float rx_sq = rx * rx; // compute the square for rmax
33 float ry_sq = ry * ry; 33 float ry_sq = ry * ry;
34 float rz_sq = rz * rz; 34 float rz_sq = rz * rz;
35 - float dist, cos_diff;  
36 - int idx_c;  
37 -  
38 - //float rmax = sqrt(rx_sq + ry_sq + rz_sq); 35 +
39 stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector 36 stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector
40 stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector 37 stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector
41 - int n =4; //set the number of points to find the boundaries of the conical voting area 38 + float n =8; //set the number of points to find the boundaries of the conical voting area
42 float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter 39 float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter
43 float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; 40 float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
44 float zc = rz * cos(g_sph[2]) ; 41 float zc = rz * cos(g_sph[2]) ;
@@ -48,9 +45,9 @@ @@ -48,9 +45,9 @@
48 zc+=zi; 45 zc+=zi;
49 stim::vec3<float> center(xc,yc,zc); 46 stim::vec3<float> center(xc,yc,zc);
50 47
51 - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area 48 + float d = 2 * r * tan(phi); //find the diameter of the conical voting area
52 stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector 49 stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector
53 - float step = 360.0/(float) n; 50 + float step = 360.0/n;
54 stim::circle<float> cir(center, d, norm); 51 stim::circle<float> cir(center, d, norm);
55 stim::aabb3<int> bb(xi,yi,zi); 52 stim::aabb3<int> bb(xi,yi,zi);
56 bb.insert(xc,yc,zc); 53 bb.insert(xc,yc,zc);
@@ -58,12 +55,15 @@ @@ -58,12 +55,15 @@
58 stim::vec3<float> out = cir.p(j); 55 stim::vec3<float> out = cir.p(j);
59 bb.insert(out[0], out[1], out[2]); 56 bb.insert(out[0], out[1], out[2]);
60 } 57 }
61 - 58 + bb.trim_low(xi-rx, yi-ry, zi-rz);
62 bb.trim_low(0,0,0); 59 bb.trim_low(0,0,0);
  60 + bb.trim_high(xi+rx, yi+ry, zi+rz);
63 bb.trim_high(x-1, y-1, z-1); 61 bb.trim_high(x-1, y-1, z-1);
64 int bx,by,bz; 62 int bx,by,bz;
65 int dx, dy, dz; 63 int dx, dy, dz;
66 float dx_sq, dy_sq, dz_sq; 64 float dx_sq, dy_sq, dz_sq;
  65 + float dist, cos_diff;
  66 + int idx_c;
67 for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ 67 for (bz=bb.low[2]; bz<=bb.high[2]; bz++){
68 dz = bz - zi; //compute the distance bw the voter and the current counter along z axis 68 dz = bz - zi; //compute the distance bw the voter and the current counter along z axis
69 dz_sq = dz * dz; 69 dz_sq = dz * dz;
@@ -86,13 +86,13 @@ @@ -86,13 +86,13 @@
86 } 86 }
87 87
88 template<typename T> 88 template<typename T>
89 - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ 89 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
90 90
91 91
92 unsigned int max_threads = stim::maxThreadsPerBlock(); 92 unsigned int max_threads = stim::maxThreadsPerBlock();
93 dim3 threads(sqrt (max_threads),sqrt (max_threads)); 93 dim3 threads(sqrt (max_threads),sqrt (max_threads));
94 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); 94 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
95 - vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting 95 + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, phi, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting
96 96
97 } 97 }
98 98