Commit 1f55a87438cefd956c4a8cd420fa8c3af4c01c6a
1 parent
a744d027
upload the fixed ivote3 working with the bounding box method
Showing
12 changed files
with
131 additions
and
121 deletions
Show diff stats
Matlab_3D/gt2.vol renamed to Matlab_3D/gt-128.vol
No preview for this file type
No preview for this file type
Matlab_3D/validation.m
1 | clear all; | 1 | clear all; |
2 | %for t=100:100:5000 | 2 | %for t=100:100:5000 |
3 | -t=2350; | 3 | +t=2200; |
4 | X = 128; | 4 | X = 128; |
5 | Y = 128; | 5 | Y = 128; |
6 | Z = 128; | 6 | Z = 128; |
@@ -12,7 +12,7 @@ r2=10; | @@ -12,7 +12,7 @@ r2=10; | ||
12 | itr=5; | 12 | itr=5; |
13 | vote=7; | 13 | vote=7; |
14 | std = [5 5]; | 14 | std = [5 5]; |
15 | -gt_filename = 'gt2.vol'; | 15 | +gt_filename = 'gt-128.vol'; |
16 | % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); | 16 | % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); |
17 | out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t); | 17 | out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t); |
18 | % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); | 18 | % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); |
cpp/CMakeLists.txt
@@ -51,3 +51,4 @@ target_link_libraries(ivote3 | @@ -51,3 +51,4 @@ target_link_libraries(ivote3 | ||
51 | #copy an image test case | 51 | #copy an image test case |
52 | configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) | 52 | configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) |
53 | configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) | 53 | configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) |
54 | +configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY) |
cpp/cudafunc.cu
@@ -10,7 +10,7 @@ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); | @@ -10,7 +10,7 @@ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); | ||
10 | #include "gradient3.cuh" | 10 | #include "gradient3.cuh" |
11 | #include "mag3.cuh" | 11 | #include "mag3.cuh" |
12 | #include "vote3_atomic_aabb.cuh" | 12 | #include "vote3_atomic_aabb.cuh" |
13 | -#include "update_dir3.cuh" | 13 | +#include "update_dir3_aabb.cuh" |
14 | #include "local_max3.cuh" | 14 | #include "local_max3.cuh" |
15 | 15 | ||
16 | 16 | ||
@@ -19,27 +19,21 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | @@ -19,27 +19,21 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | ||
19 | 19 | ||
20 | 20 | ||
21 | cudaSetDevice(1); | 21 | cudaSetDevice(1); |
22 | - // compute the number of bytes in the input data | ||
23 | - unsigned int bytes = x * y * z * sizeof(float); | 22 | + |
23 | + unsigned int bytes = x * y * z * sizeof(float); // compute the number of bytes in the input data | ||
24 | 24 | ||
25 | - //assign memory on gpu for the input data.z | ||
26 | - float* gpuI0; | 25 | + float* gpuI0; //assign memory on gpu for the input data |
27 | cudaMalloc(&gpuI0, bytes); | 26 | cudaMalloc(&gpuI0, bytes); |
27 | + cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. | ||
28 | 28 | ||
29 | - //copy the image data to the GPU. | ||
30 | - cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); | ||
31 | - | ||
32 | - //call the blurring function from the gpu. | ||
33 | - gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); | ||
34 | - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); | 29 | + |
30 | + gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); //call the blurring function from the gpu. | ||
35 | cudaDeviceSynchronize(); | 31 | cudaDeviceSynchronize(); |
36 | 32 | ||
37 | - //assign memory on the gpu for the gradient along the X, y, z. | ||
38 | - float* gpu_grad; | 33 | + float* gpu_grad; //assign memory on the gpu for the gradient along the X, y, z. |
39 | cudaMalloc(&gpu_grad, bytes*3); | 34 | cudaMalloc(&gpu_grad, bytes*3); |
40 | 35 | ||
41 | - //call the gradient function from the gpu. | ||
42 | - gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z); | 36 | + gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z); //call the gradient function from the gpu. |
43 | cudaFree(gpuI0); | 37 | cudaFree(gpuI0); |
44 | 38 | ||
45 | float* gpu_vote; | 39 | float* gpu_vote; |
@@ -51,11 +45,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | @@ -51,11 +45,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, | ||
51 | for (int i = 0; i < iter; i++){ | 45 | for (int i = 0; i < iter; i++){ |
52 | 46 | ||
53 | cudaMemset(gpu_vote, 0, bytes); | 47 | cudaMemset(gpu_vote, 0, bytes); |
54 | - gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); | 48 | + gpu_vote3<float>(gpu_vote, gpu_grad, phi, cos_phi, r, x, y, z); |
55 | cudaDeviceSynchronize(); | 49 | cudaDeviceSynchronize(); |
56 | 50 | ||
57 | //if (phi >= d_phi){ | 51 | //if (phi >= d_phi){ |
58 | - gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); | 52 | + gpu_update_dir3<float>(gpu_grad, gpu_vote, phi, cos_phi, r, x, y, z); |
59 | cudaDeviceSynchronize(); | 53 | cudaDeviceSynchronize(); |
60 | phi = phi - d_phi; | 54 | phi = phi - d_phi; |
61 | cos_phi = cos(phi); | 55 | cos_phi = cos(phi); |
@@ -87,21 +81,18 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u | @@ -87,21 +81,18 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u | ||
87 | 81 | ||
88 | cudaSetDevice(1); | 82 | cudaSetDevice(1); |
89 | 83 | ||
90 | - //assign memory on gpu for the input data. | ||
91 | - float* gpuV; | 84 | + |
85 | + float* gpuV; //assign memory on gpu for the input data. | ||
92 | cudaMalloc(&gpuV, bytes); | 86 | cudaMalloc(&gpuV, bytes); |
93 | 87 | ||
94 | - //copy the image data to the GPU. | ||
95 | - cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); | 88 | + cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. |
96 | 89 | ||
97 | float* gpuOut; | 90 | float* gpuOut; |
98 | cudaMalloc(&gpuOut, bytes); | 91 | cudaMalloc(&gpuOut, bytes); |
99 | 92 | ||
100 | - //call the local max function | ||
101 | - gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); | 93 | + gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); //call the local max function |
102 | 94 | ||
103 | - //copy the final result to the cpu. | ||
104 | - cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); | 95 | + cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); //copy the final result to the cpu. |
105 | 96 | ||
106 | cudaFree(gpuV); | 97 | cudaFree(gpuV); |
107 | cudaFree(gpuOut); | 98 | cudaFree(gpuOut); |
cpp/main.cpp
@@ -9,14 +9,13 @@ | @@ -9,14 +9,13 @@ | ||
9 | #include <stim/image/image.h> | 9 | #include <stim/image/image.h> |
10 | #define pi 3.14159 | 10 | #define pi 3.14159 |
11 | 11 | ||
12 | -#define M_PI 3.14159 | ||
13 | -#include <stim/math/circle.h> | ||
14 | -#include <stim/math/vec3.h> | ||
15 | -#include <stim/math/plane.h> | ||
16 | -#include <stim/math/vector.h> | ||
17 | -//#include <cuda.h> | ||
18 | -//#include <stim/cuda/cudatools.h> | ||
19 | -//#include <stim/cuda/cudatools/error.h> | 12 | +//#define M_PI 3.14159 |
13 | +//#include <stim/math/circle.h> | ||
14 | +//#include <stim/math/vec3.h> | ||
15 | +//#include <stim/math/plane.h> | ||
16 | +//#include <stim/math/vector.h> | ||
17 | +//#include <stim/visualization/aabb3.h> | ||
18 | + | ||
20 | 19 | ||
21 | 20 | ||
22 | /*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z); | 21 | /*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z); |
@@ -52,42 +51,78 @@ int main(){ | @@ -52,42 +51,78 @@ int main(){ | ||
52 | } | 51 | } |
53 | list.close(); | 52 | list.close(); |
54 | */ | 53 | */ |
55 | - /* | ||
56 | - int n=10; | ||
57 | - stim::circle<float> cir; | ||
58 | - float* c0= (float*) malloc(3*sizeof(float)); | ||
59 | - c0[0] =-4; | ||
60 | - c0[1]=0; | ||
61 | - c0[2] = 3; | ||
62 | - stim::vec3<float> c(c0[0],c0[1],c0[2]); | ||
63 | - float len = c.len(); | ||
64 | - stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len); | ||
65 | - std::cout<< len << '\n'; | ||
66 | - std::cout<< norm << '\n'; | ||
67 | - cir.center(c); | ||
68 | - cir.normal(norm); | ||
69 | - cir.scale(2); | ||
70 | - stim::vec3<float> out = cir.p(45); | ||
71 | - std::vector<stim::vec3<float>> out2 = cir.getPoints(n); | 54 | +/* |
55 | + int main(){ | ||
56 | + | ||
57 | + | ||
58 | + stim::vec3<float> g(-44,-3.4,-0.005); // form a vec3 variable for the gradient vector | ||
59 | + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | ||
60 | + int n =36; //set the number of points to find the boundaries of the conical voting area | ||
61 | + int xi = 105; | ||
62 | + int yi = 17; | ||
63 | + int zi = 23; | ||
64 | + float xc = 12 * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter | ||
65 | + float yc = 10 * sin(g_sph[1]) * sin(g_sph[2]) ; | ||
66 | + float zc = 10 * cos(g_sph[2]) ; | ||
67 | + float r = sqrt(xc*xc + yc*yc + zc*zc); | ||
68 | + xc+=xi; | ||
69 | + yc+=yi; | ||
70 | + zc+=zi; | ||
71 | + stim::vec3<float> center(xc,yc,zc); | ||
72 | + | ||
73 | + float d = 2 * r * tan(25*pi/180 ); //find the diameter of the conical voting area | ||
74 | + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | ||
75 | + float step = 360.0/(float) n; | ||
76 | + stim::circle<float> cir(center, d, norm); | ||
77 | + stim::aabb3<int> bb(xi,yi,zi); | ||
78 | + bb.insert(xc,yc,zc); | ||
79 | + for(float j = 0; j <360.0; j += step){ | ||
80 | + stim::vec3<float> out = cir.p(j); | ||
81 | + bb.insert(out[0], out[1], out[2]); | ||
82 | + } | ||
83 | + | ||
84 | + bb.trim_low(0,0,0); | ||
85 | + bb.trim_high(128-1, 128-1, 128-1); | ||
86 | + | ||
87 | + std::cout<< bb.low[0] << '\t' << bb.low[1] << '\t' << bb.low[2] << '\n'; | ||
88 | + std::cout<< bb.high[0] << '\t' << bb.high[1] << '\t' << bb.high[2] << '\n'; | ||
89 | + std::cin >> n; | ||
90 | +*/ | ||
91 | + /*int n=10; | ||
92 | + stim::circle<float> cir; | ||
93 | + float* c0= (float*) malloc(3*sizeof(float)); | ||
94 | + c0[0] =-4; | ||
95 | + c0[1]=0; | ||
96 | + c0[2] = 3; | ||
97 | + stim::vec3<float> c(c0[0],c0[1],c0[2]); | ||
98 | + float len = c.len(); | ||
99 | + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len); | ||
100 | + std::cout<< len << '\n'; | ||
101 | + std::cout<< norm << '\n'; | ||
102 | + cir.center(c); | ||
103 | + cir.normal(norm); | ||
104 | + cir.scale(2); | ||
105 | + stim::vec3<float> out = cir.p(45); | ||
106 | + std::vector<stim::vec3<float>> out2 = cir.getPoints(n); | ||
72 | 107 | ||
73 | - std::cout<< out << '\n'; | ||
74 | - std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n'; | ||
75 | - std::cout<< c << '\n'; | 108 | + std::cout<< out << '\n'; |
109 | + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n'; | ||
110 | + std::cout<< c << '\n'; | ||
76 | 111 | ||
77 | - for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i) | ||
78 | - std::cout << *i << '\n'; | ||
79 | - std::ofstream list("circle_check.txt"); | ||
80 | - if (list.is_open()){ | ||
81 | - for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j) | ||
82 | - list << *j << '\n'; | ||
83 | - } | ||
84 | - list.close(); | ||
85 | - std::cin >> n; | 112 | + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i) |
113 | + std::cout << *i << '\n'; | ||
114 | + std::ofstream list("circle_check.txt"); | ||
115 | + if (list.is_open()){ | ||
116 | + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j) | ||
117 | + list << *j << '\n'; | ||
118 | + } | ||
119 | + list.close(); | ||
120 | + std::cin >> n; | ||
86 | 121 | ||
87 | } | 122 | } |
88 | - | ||
89 | */ | 123 | */ |
90 | 124 | ||
125 | + | ||
91 | void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], | 126 | void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], |
92 | unsigned int x, unsigned int y, unsigned int z); | 127 | unsigned int x, unsigned int y, unsigned int z); |
93 | void lmax(float* center, float* vote, float t1, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z); | 128 | void lmax(float* center, float* vote, float t1, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z); |
@@ -140,7 +175,7 @@ int main(int argc, char** argv){ | @@ -140,7 +175,7 @@ int main(int argc, char** argv){ | ||
140 | args.add("z", "size of the dataset along Z axis", "positive value"); | 175 | args.add("z", "size of the dataset along Z axis", "positive value"); |
141 | args.add("t", "threshold value for the final result", "positive valu"); | 176 | args.add("t", "threshold value for the final result", "positive valu"); |
142 | args.add("invert", "to invert the input data set", "string"); | 177 | args.add("invert", "to invert the input data set", "string"); |
143 | - args.add("anisotropy", "anisotropy value of the imaging", "positive value"); | 178 | + args.add("anisotropy", "anisotropy value of the imaging", "1"); |
144 | //parse the command line arguments. | 179 | //parse the command line arguments. |
145 | args.parse(argc, argv); | 180 | args.parse(argc, argv); |
146 | 181 |
No preview for this file type
cpp/update_dir3.cuh
@@ -129,7 +129,7 @@ | @@ -129,7 +129,7 @@ | ||
129 | } | 129 | } |
130 | 130 | ||
131 | template<typename T> | 131 | template<typename T> |
132 | - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | 132 | + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ |
133 | 133 | ||
134 | unsigned int max_threads = stim::maxThreadsPerBlock(); | 134 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
135 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); | 135 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
cpp/update_dir3_aabb.cuh
@@ -14,8 +14,7 @@ | @@ -14,8 +14,7 @@ | ||
14 | 14 | ||
15 | // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. | 15 | // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. |
16 | template<typename T> | 16 | template<typename T> |
17 | - __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | ||
18 | - //extern __shared__ float s_vote[]; | 17 | + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ |
19 | 18 | ||
20 | int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | 19 | int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread |
21 | 20 | ||
@@ -26,26 +25,13 @@ | @@ -26,26 +25,13 @@ | ||
26 | if(xi >= x|| yi >= y || zi>= z) return; | 25 | if(xi >= x|| yi >= y || zi>= z) return; |
27 | int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel | 26 | int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel |
28 | 27 | ||
29 | - | ||
30 | - // find the starting points for this block along the x and y directions | ||
31 | - //int bxi = blockIdx.x * blockDim.x; | ||
32 | - //int byi = blockidx_y * blockDim.y; | ||
33 | - //find the starting points and the size of the window, which will be copied to the 2D-shared memory | ||
34 | - //int bxs = bxi - rx; | ||
35 | - //int bys = byi - ry; | ||
36 | - //int xwidth = 2 * rx + blockDim.x; | ||
37 | - //int ywidth = 2 * ry + blockDim.y; | ||
38 | - //compute the coordinations of this pixel in the 2D-shared memory. | ||
39 | - //int sx_rx = threadIdx.x + rx; | ||
40 | - //int sy_ry = threadIdx.y + ry; | ||
41 | - | ||
42 | float rx_sq = rx * rx; // compute the square for rmax | 28 | float rx_sq = rx * rx; // compute the square for rmax |
43 | float ry_sq = ry * ry; | 29 | float ry_sq = ry * ry; |
44 | float rz_sq = rz * rz; | 30 | float rz_sq = rz * rz; |
45 | 31 | ||
46 | stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector | 32 | stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector |
47 | stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | 33 | stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector |
48 | - int n =4; //set the number of points to find the boundaries of the conical voting area | 34 | + float n =8; //set the number of points to find the boundaries of the conical voting area |
49 | float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter | 35 | float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter |
50 | float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; | 36 | float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; |
51 | float zc = rz * cos(g_sph[2]) ; | 37 | float zc = rz * cos(g_sph[2]) ; |
@@ -54,9 +40,10 @@ | @@ -54,9 +40,10 @@ | ||
54 | yc+=yi; | 40 | yc+=yi; |
55 | zc+=zi; | 41 | zc+=zi; |
56 | stim::vec3<float> center(xc,yc,zc); | 42 | stim::vec3<float> center(xc,yc,zc); |
57 | - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | 43 | + |
44 | + float d = 2 * r * tan(phi); //find the diameter of the conical voting area | ||
58 | stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | 45 | stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector |
59 | - float step = 360.0/(float) n; | 46 | + float step = 360.0/n; |
60 | stim::circle<float> cir(center, d, norm); | 47 | stim::circle<float> cir(center, d, norm); |
61 | stim::aabb3<int> bb(xi,yi,zi); | 48 | stim::aabb3<int> bb(xi,yi,zi); |
62 | bb.insert(xc,yc,zc); | 49 | bb.insert(xc,yc,zc); |
@@ -64,13 +51,13 @@ | @@ -64,13 +51,13 @@ | ||
64 | stim::vec3<float> out = cir.p(j); | 51 | stim::vec3<float> out = cir.p(j); |
65 | bb.insert(out[0], out[1], out[2]); | 52 | bb.insert(out[0], out[1], out[2]); |
66 | } | 53 | } |
67 | - | 54 | + bb.trim_low(xi-rx, yi-ry, zi-rz); |
68 | bb.trim_low(0,0,0); | 55 | bb.trim_low(0,0,0); |
56 | + bb.trim_high(xi+rx, yi+ry, zi+rz); | ||
69 | bb.trim_high(x-1, y-1, z-1); | 57 | bb.trim_high(x-1, y-1, z-1); |
70 | int bx,by,bz; | 58 | int bx,by,bz; |
71 | int dx, dy, dz; | 59 | int dx, dy, dz; |
72 | float dx_sq, dy_sq, dz_sq; | 60 | float dx_sq, dy_sq, dz_sq; |
73 | - | ||
74 | float dist, cos_diff; | 61 | float dist, cos_diff; |
75 | int idx_c; | 62 | int idx_c; |
76 | 63 | ||
@@ -118,42 +105,38 @@ | @@ -118,42 +105,38 @@ | ||
118 | template<typename T> | 105 | template<typename T> |
119 | __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){ | 106 | __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){ |
120 | 107 | ||
121 | - //calculate x,y,z coordinates for this thread | ||
122 | - int xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
123 | - //find the grid size along y | ||
124 | - int grid_y = y / blockDim.y; | 108 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread |
109 | + | ||
110 | + int grid_y = y / blockDim.y; //find the grid size along y | ||
125 | int blockidx_y = blockIdx.y % grid_y; | 111 | int blockidx_y = blockIdx.y % grid_y; |
126 | int yi = blockidx_y * blockDim.y + threadIdx.y; | 112 | int yi = blockidx_y * blockDim.y + threadIdx.y; |
127 | int zi = blockIdx.y / grid_y; | 113 | int zi = blockIdx.y / grid_y; |
128 | int i = zi * x * y + yi * x + xi; | 114 | int i = zi * x * y + yi * x + xi; |
129 | 115 | ||
130 | if(xi >= x || yi >= y || zi >= z) return; | 116 | if(xi >= x || yi >= y || zi >= z) return; |
131 | - //update the gradient image with the new direction direction | ||
132 | - gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; | 117 | + |
118 | + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; //update the gradient image with the new direction direction | ||
133 | gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; | 119 | gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; |
134 | gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; | 120 | gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; |
135 | } | 121 | } |
136 | 122 | ||
137 | template<typename T> | 123 | template<typename T> |
138 | - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | 124 | + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ |
139 | 125 | ||
140 | unsigned int max_threads = stim::maxThreadsPerBlock(); | 126 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
141 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); | 127 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
142 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | 128 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); |
143 | - //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T); | ||
144 | - // allocate space on the GPU for the updated vote direction | ||
145 | - T* gpu_dir; | 129 | + |
130 | + T* gpu_dir; // allocate space on the GPU for the updated vote direction | ||
146 | cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); | 131 | cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); |
147 | 132 | ||
148 | //call the kernel to calculate the new voting direction | 133 | //call the kernel to calculate the new voting direction |
149 | - update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z); | ||
150 | - | 134 | + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, phi, cos_phi, r[0], r[1], r[2], x , y, z); |
151 | 135 | ||
152 | //call the kernel to update the gradient direction | 136 | //call the kernel to update the gradient direction |
153 | update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); | 137 | update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); |
154 | - | ||
155 | - //free allocated memory | ||
156 | - cudaFree(gpu_dir); | 138 | + |
139 | + cudaFree(gpu_dir); //free allocated memory | ||
157 | 140 | ||
158 | } | 141 | } |
159 | 142 |
cpp/vote3.cuh
@@ -101,7 +101,7 @@ | @@ -101,7 +101,7 @@ | ||
101 | } | 101 | } |
102 | 102 | ||
103 | template<typename T> | 103 | template<typename T> |
104 | - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | 104 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ |
105 | 105 | ||
106 | 106 | ||
107 | unsigned int max_threads = stim::maxThreadsPerBlock(); | 107 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
cpp/vote3_atomic.cuh
@@ -27,14 +27,14 @@ | @@ -27,14 +27,14 @@ | ||
27 | 27 | ||
28 | float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter | 28 | float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter |
29 | 29 | ||
30 | - float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter | ||
31 | - float gy_v_n = gy_v/mag_v; | ||
32 | - float gz_v_n = gz_v/mag_v; | 30 | + //float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter |
31 | + //float gy_v_n = gy_v/mag_v; | ||
32 | + //float gz_v_n = gz_v/mag_v; | ||
33 | 33 | ||
34 | float rx_sq = rx * rx; // compute the square for rmax | 34 | float rx_sq = rx * rx; // compute the square for rmax |
35 | float ry_sq = ry * ry; | 35 | float ry_sq = ry * ry; |
36 | float rz_sq = rz * rz; | 36 | float rz_sq = rz * rz; |
37 | - float x_sq, y_sq, z_sq, d_c, cos_diff; | 37 | + float x_sq, y_sq, z_sq, dist, cos_diff; |
38 | int xi_c, yi_c, zi_c, idx_c; | 38 | int xi_c, yi_c, zi_c, idx_c; |
39 | 39 | ||
40 | for (int z_c=-rz; z_c<=rz; z_c++){ | 40 | for (int z_c=-rz; z_c<=rz; z_c++){ |
@@ -49,8 +49,8 @@ | @@ -49,8 +49,8 @@ | ||
49 | xi_c = xi + x_c; | 49 | xi_c = xi + x_c; |
50 | if (xi_c < x && xi_c>=0){ | 50 | if (xi_c < x && xi_c>=0){ |
51 | x_sq = x_c * x_c; | 51 | x_sq = x_c * x_c; |
52 | - d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter | ||
53 | - cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter | 52 | + dist = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter |
53 | + cos_diff = (gx_v * x_c + gy_v * y_c + gz_v * z_c)/(dist * mag_v); // calculate the cosine of angle between the voter and the current counter | ||
54 | if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ | 54 | if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ |
55 | idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter | 55 | idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter |
56 | atomicAdd (&gpu_vote[idx_c] , mag_v); | 56 | atomicAdd (&gpu_vote[idx_c] , mag_v); |
@@ -64,7 +64,7 @@ | @@ -64,7 +64,7 @@ | ||
64 | } | 64 | } |
65 | 65 | ||
66 | template<typename T> | 66 | template<typename T> |
67 | - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | 67 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ |
68 | 68 | ||
69 | 69 | ||
70 | unsigned int max_threads = stim::maxThreadsPerBlock(); | 70 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
cpp/vote3_atomic_aabb.cuh
@@ -15,7 +15,7 @@ | @@ -15,7 +15,7 @@ | ||
15 | 15 | ||
16 | // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area | 16 | // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area |
17 | template<typename T> | 17 | template<typename T> |
18 | - __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | 18 | + __global__ void vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ |
19 | 19 | ||
20 | int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | 20 | int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread |
21 | 21 | ||
@@ -32,13 +32,10 @@ | @@ -32,13 +32,10 @@ | ||
32 | float rx_sq = rx * rx; // compute the square for rmax | 32 | float rx_sq = rx * rx; // compute the square for rmax |
33 | float ry_sq = ry * ry; | 33 | float ry_sq = ry * ry; |
34 | float rz_sq = rz * rz; | 34 | float rz_sq = rz * rz; |
35 | - float dist, cos_diff; | ||
36 | - int idx_c; | ||
37 | - | ||
38 | - //float rmax = sqrt(rx_sq + ry_sq + rz_sq); | 35 | + |
39 | stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector | 36 | stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector |
40 | stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | 37 | stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector |
41 | - int n =4; //set the number of points to find the boundaries of the conical voting area | 38 | + float n =8; //set the number of points to find the boundaries of the conical voting area |
42 | float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter | 39 | float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter |
43 | float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; | 40 | float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; |
44 | float zc = rz * cos(g_sph[2]) ; | 41 | float zc = rz * cos(g_sph[2]) ; |
@@ -48,9 +45,9 @@ | @@ -48,9 +45,9 @@ | ||
48 | zc+=zi; | 45 | zc+=zi; |
49 | stim::vec3<float> center(xc,yc,zc); | 46 | stim::vec3<float> center(xc,yc,zc); |
50 | 47 | ||
51 | - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | 48 | + float d = 2 * r * tan(phi); //find the diameter of the conical voting area |
52 | stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | 49 | stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector |
53 | - float step = 360.0/(float) n; | 50 | + float step = 360.0/n; |
54 | stim::circle<float> cir(center, d, norm); | 51 | stim::circle<float> cir(center, d, norm); |
55 | stim::aabb3<int> bb(xi,yi,zi); | 52 | stim::aabb3<int> bb(xi,yi,zi); |
56 | bb.insert(xc,yc,zc); | 53 | bb.insert(xc,yc,zc); |
@@ -58,12 +55,15 @@ | @@ -58,12 +55,15 @@ | ||
58 | stim::vec3<float> out = cir.p(j); | 55 | stim::vec3<float> out = cir.p(j); |
59 | bb.insert(out[0], out[1], out[2]); | 56 | bb.insert(out[0], out[1], out[2]); |
60 | } | 57 | } |
61 | - | 58 | + bb.trim_low(xi-rx, yi-ry, zi-rz); |
62 | bb.trim_low(0,0,0); | 59 | bb.trim_low(0,0,0); |
60 | + bb.trim_high(xi+rx, yi+ry, zi+rz); | ||
63 | bb.trim_high(x-1, y-1, z-1); | 61 | bb.trim_high(x-1, y-1, z-1); |
64 | int bx,by,bz; | 62 | int bx,by,bz; |
65 | int dx, dy, dz; | 63 | int dx, dy, dz; |
66 | float dx_sq, dy_sq, dz_sq; | 64 | float dx_sq, dy_sq, dz_sq; |
65 | + float dist, cos_diff; | ||
66 | + int idx_c; | ||
67 | for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ | 67 | for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ |
68 | dz = bz - zi; //compute the distance bw the voter and the current counter along z axis | 68 | dz = bz - zi; //compute the distance bw the voter and the current counter along z axis |
69 | dz_sq = dz * dz; | 69 | dz_sq = dz * dz; |
@@ -86,13 +86,13 @@ | @@ -86,13 +86,13 @@ | ||
86 | } | 86 | } |
87 | 87 | ||
88 | template<typename T> | 88 | template<typename T> |
89 | - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | 89 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ |
90 | 90 | ||
91 | 91 | ||
92 | unsigned int max_threads = stim::maxThreadsPerBlock(); | 92 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
93 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); | 93 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
94 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | 94 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); |
95 | - vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting | 95 | + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, phi, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting |
96 | 96 | ||
97 | } | 97 | } |
98 | 98 |