Commit 1f55a87438cefd956c4a8cd420fa8c3af4c01c6a
1 parent
a744d027
upload the fixed ivote3 working with the bounding box method
Showing
12 changed files
with
131 additions
and
121 deletions
Show diff stats
Matlab_3D/gt2.vol renamed to Matlab_3D/gt-128.vol
No preview for this file type
No preview for this file type
Matlab_3D/validation.m
1 | 1 | clear all; |
2 | 2 | %for t=100:100:5000 |
3 | -t=2350; | |
3 | +t=2200; | |
4 | 4 | X = 128; |
5 | 5 | Y = 128; |
6 | 6 | Z = 128; |
... | ... | @@ -12,7 +12,7 @@ r2=10; |
12 | 12 | itr=5; |
13 | 13 | vote=7; |
14 | 14 | std = [5 5]; |
15 | -gt_filename = 'gt2.vol'; | |
15 | +gt_filename = 'gt-128.vol'; | |
16 | 16 | % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); |
17 | 17 | out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t); |
18 | 18 | % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); | ... | ... |
cpp/CMakeLists.txt
... | ... | @@ -51,3 +51,4 @@ target_link_libraries(ivote3 |
51 | 51 | #copy an image test case |
52 | 52 | configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) |
53 | 53 | configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) |
54 | +configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY) | ... | ... |
cpp/cudafunc.cu
... | ... | @@ -10,7 +10,7 @@ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); |
10 | 10 | #include "gradient3.cuh" |
11 | 11 | #include "mag3.cuh" |
12 | 12 | #include "vote3_atomic_aabb.cuh" |
13 | -#include "update_dir3.cuh" | |
13 | +#include "update_dir3_aabb.cuh" | |
14 | 14 | #include "local_max3.cuh" |
15 | 15 | |
16 | 16 | |
... | ... | @@ -19,27 +19,21 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, |
19 | 19 | |
20 | 20 | |
21 | 21 | cudaSetDevice(1); |
22 | - // compute the number of bytes in the input data | |
23 | - unsigned int bytes = x * y * z * sizeof(float); | |
22 | + | |
23 | + unsigned int bytes = x * y * z * sizeof(float); // compute the number of bytes in the input data | |
24 | 24 | |
25 | - //assign memory on gpu for the input data.z | |
26 | - float* gpuI0; | |
25 | + float* gpuI0; //assign memory on gpu for the input data | |
27 | 26 | cudaMalloc(&gpuI0, bytes); |
27 | + cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. | |
28 | 28 | |
29 | - //copy the image data to the GPU. | |
30 | - cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); | |
31 | - | |
32 | - //call the blurring function from the gpu. | |
33 | - gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); | |
34 | - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); | |
29 | + | |
30 | + gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); //call the blurring function from the gpu. | |
35 | 31 | cudaDeviceSynchronize(); |
36 | 32 | |
37 | - //assign memory on the gpu for the gradient along the X, y, z. | |
38 | - float* gpu_grad; | |
33 | + float* gpu_grad; //assign memory on the gpu for the gradient along the X, y, z. | |
39 | 34 | cudaMalloc(&gpu_grad, bytes*3); |
40 | 35 | |
41 | - //call the gradient function from the gpu. | |
42 | - gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z); | |
36 | + gpu_gradient3<float>(gpu_grad, gpuI0, anisotropy, x, y, z); //call the gradient function from the gpu. | |
43 | 37 | cudaFree(gpuI0); |
44 | 38 | |
45 | 39 | float* gpu_vote; |
... | ... | @@ -51,11 +45,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, |
51 | 45 | for (int i = 0; i < iter; i++){ |
52 | 46 | |
53 | 47 | cudaMemset(gpu_vote, 0, bytes); |
54 | - gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); | |
48 | + gpu_vote3<float>(gpu_vote, gpu_grad, phi, cos_phi, r, x, y, z); | |
55 | 49 | cudaDeviceSynchronize(); |
56 | 50 | |
57 | 51 | //if (phi >= d_phi){ |
58 | - gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); | |
52 | + gpu_update_dir3<float>(gpu_grad, gpu_vote, phi, cos_phi, r, x, y, z); | |
59 | 53 | cudaDeviceSynchronize(); |
60 | 54 | phi = phi - d_phi; |
61 | 55 | cos_phi = cos(phi); |
... | ... | @@ -87,21 +81,18 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u |
87 | 81 | |
88 | 82 | cudaSetDevice(1); |
89 | 83 | |
90 | - //assign memory on gpu for the input data. | |
91 | - float* gpuV; | |
84 | + | |
85 | + float* gpuV; //assign memory on gpu for the input data. | |
92 | 86 | cudaMalloc(&gpuV, bytes); |
93 | 87 | |
94 | - //copy the image data to the GPU. | |
95 | - cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); | |
88 | + cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. | |
96 | 89 | |
97 | 90 | float* gpuOut; |
98 | 91 | cudaMalloc(&gpuOut, bytes); |
99 | 92 | |
100 | - //call the local max function | |
101 | - gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); | |
93 | + gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); //call the local max function | |
102 | 94 | |
103 | - //copy the final result to the cpu. | |
104 | - cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); | |
95 | + cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); //copy the final result to the cpu. | |
105 | 96 | |
106 | 97 | cudaFree(gpuV); |
107 | 98 | cudaFree(gpuOut); | ... | ... |
cpp/main.cpp
... | ... | @@ -9,14 +9,13 @@ |
9 | 9 | #include <stim/image/image.h> |
10 | 10 | #define pi 3.14159 |
11 | 11 | |
12 | -#define M_PI 3.14159 | |
13 | -#include <stim/math/circle.h> | |
14 | -#include <stim/math/vec3.h> | |
15 | -#include <stim/math/plane.h> | |
16 | -#include <stim/math/vector.h> | |
17 | -//#include <cuda.h> | |
18 | -//#include <stim/cuda/cudatools.h> | |
19 | -//#include <stim/cuda/cudatools/error.h> | |
12 | +//#define M_PI 3.14159 | |
13 | +//#include <stim/math/circle.h> | |
14 | +//#include <stim/math/vec3.h> | |
15 | +//#include <stim/math/plane.h> | |
16 | +//#include <stim/math/vector.h> | |
17 | +//#include <stim/visualization/aabb3.h> | |
18 | + | |
20 | 19 | |
21 | 20 | |
22 | 21 | /*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z); |
... | ... | @@ -52,42 +51,78 @@ int main(){ |
52 | 51 | } |
53 | 52 | list.close(); |
54 | 53 | */ |
55 | - /* | |
56 | - int n=10; | |
57 | - stim::circle<float> cir; | |
58 | - float* c0= (float*) malloc(3*sizeof(float)); | |
59 | - c0[0] =-4; | |
60 | - c0[1]=0; | |
61 | - c0[2] = 3; | |
62 | - stim::vec3<float> c(c0[0],c0[1],c0[2]); | |
63 | - float len = c.len(); | |
64 | - stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len); | |
65 | - std::cout<< len << '\n'; | |
66 | - std::cout<< norm << '\n'; | |
67 | - cir.center(c); | |
68 | - cir.normal(norm); | |
69 | - cir.scale(2); | |
70 | - stim::vec3<float> out = cir.p(45); | |
71 | - std::vector<stim::vec3<float>> out2 = cir.getPoints(n); | |
54 | +/* | |
55 | + int main(){ | |
56 | + | |
57 | + | |
58 | + stim::vec3<float> g(-44,-3.4,-0.005); // form a vec3 variable for the gradient vector | |
59 | + stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector | |
60 | + int n =36; //set the number of points to find the boundaries of the conical voting area | |
61 | + int xi = 105; | |
62 | + int yi = 17; | |
63 | + int zi = 23; | |
64 | + float xc = 12 * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter | |
65 | + float yc = 10 * sin(g_sph[1]) * sin(g_sph[2]) ; | |
66 | + float zc = 10 * cos(g_sph[2]) ; | |
67 | + float r = sqrt(xc*xc + yc*yc + zc*zc); | |
68 | + xc+=xi; | |
69 | + yc+=yi; | |
70 | + zc+=zi; | |
71 | + stim::vec3<float> center(xc,yc,zc); | |
72 | + | |
73 | + float d = 2 * r * tan(25*pi/180 ); //find the diameter of the conical voting area | |
74 | + stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector | |
75 | + float step = 360.0/(float) n; | |
76 | + stim::circle<float> cir(center, d, norm); | |
77 | + stim::aabb3<int> bb(xi,yi,zi); | |
78 | + bb.insert(xc,yc,zc); | |
79 | + for(float j = 0; j <360.0; j += step){ | |
80 | + stim::vec3<float> out = cir.p(j); | |
81 | + bb.insert(out[0], out[1], out[2]); | |
82 | + } | |
83 | + | |
84 | + bb.trim_low(0,0,0); | |
85 | + bb.trim_high(128-1, 128-1, 128-1); | |
86 | + | |
87 | + std::cout<< bb.low[0] << '\t' << bb.low[1] << '\t' << bb.low[2] << '\n'; | |
88 | + std::cout<< bb.high[0] << '\t' << bb.high[1] << '\t' << bb.high[2] << '\n'; | |
89 | + std::cin >> n; | |
90 | +*/ | |
91 | + /*int n=10; | |
92 | + stim::circle<float> cir; | |
93 | + float* c0= (float*) malloc(3*sizeof(float)); | |
94 | + c0[0] =-4; | |
95 | + c0[1]=0; | |
96 | + c0[2] = 3; | |
97 | + stim::vec3<float> c(c0[0],c0[1],c0[2]); | |
98 | + float len = c.len(); | |
99 | + stim::vec3<float> norm(c0[0]/len,c0[1]/len,c0[2]/len); | |
100 | + std::cout<< len << '\n'; | |
101 | + std::cout<< norm << '\n'; | |
102 | + cir.center(c); | |
103 | + cir.normal(norm); | |
104 | + cir.scale(2); | |
105 | + stim::vec3<float> out = cir.p(45); | |
106 | + std::vector<stim::vec3<float>> out2 = cir.getPoints(n); | |
72 | 107 | |
73 | - std::cout<< out << '\n'; | |
74 | - std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n'; | |
75 | - std::cout<< c << '\n'; | |
108 | + std::cout<< out << '\n'; | |
109 | + std::cout <<out[0] << '\t' << out[1] << '\t' << out[2] <<'\n'; | |
110 | + std::cout<< c << '\n'; | |
76 | 111 | |
77 | - for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i) | |
78 | - std::cout << *i << '\n'; | |
79 | - std::ofstream list("circle_check.txt"); | |
80 | - if (list.is_open()){ | |
81 | - for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j) | |
82 | - list << *j << '\n'; | |
83 | - } | |
84 | - list.close(); | |
85 | - std::cin >> n; | |
112 | + for (std::vector<stim::vec3<float>>::const_iterator i = out2.begin(); i != out2.end(); ++i) | |
113 | + std::cout << *i << '\n'; | |
114 | + std::ofstream list("circle_check.txt"); | |
115 | + if (list.is_open()){ | |
116 | + for (std::vector<stim::vec3<float>>::const_iterator j = out2.begin(); j != out2.end(); ++j) | |
117 | + list << *j << '\n'; | |
118 | + } | |
119 | + list.close(); | |
120 | + std::cin >> n; | |
86 | 121 | |
87 | 122 | } |
88 | - | |
89 | 123 | */ |
90 | 124 | |
125 | + | |
91 | 126 | void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], |
92 | 127 | unsigned int x, unsigned int y, unsigned int z); |
93 | 128 | void lmax(float* center, float* vote, float t1, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z); |
... | ... | @@ -140,7 +175,7 @@ int main(int argc, char** argv){ |
140 | 175 | args.add("z", "size of the dataset along Z axis", "positive value"); |
141 | 176 | args.add("t", "threshold value for the final result", "positive valu"); |
142 | 177 | args.add("invert", "to invert the input data set", "string"); |
143 | - args.add("anisotropy", "anisotropy value of the imaging", "positive value"); | |
178 | + args.add("anisotropy", "anisotropy value of the imaging", "1"); | |
144 | 179 | //parse the command line arguments. |
145 | 180 | args.parse(argc, argv); |
146 | 181 | ... | ... |
No preview for this file type
cpp/update_dir3.cuh
... | ... | @@ -129,7 +129,7 @@ |
129 | 129 | } |
130 | 130 | |
131 | 131 | template<typename T> |
132 | - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
132 | + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
133 | 133 | |
134 | 134 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
135 | 135 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); | ... | ... |
cpp/update_dir3_aabb.cuh
... | ... | @@ -14,8 +14,7 @@ |
14 | 14 | |
15 | 15 | // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. |
16 | 16 | template<typename T> |
17 | - __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
18 | - //extern __shared__ float s_vote[]; | |
17 | + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
19 | 18 | |
20 | 19 | int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread |
21 | 20 | |
... | ... | @@ -26,26 +25,13 @@ |
26 | 25 | if(xi >= x|| yi >= y || zi>= z) return; |
27 | 26 | int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel |
28 | 27 | |
29 | - | |
30 | - // find the starting points for this block along the x and y directions | |
31 | - //int bxi = blockIdx.x * blockDim.x; | |
32 | - //int byi = blockidx_y * blockDim.y; | |
33 | - //find the starting points and the size of the window, which will be copied to the 2D-shared memory | |
34 | - //int bxs = bxi - rx; | |
35 | - //int bys = byi - ry; | |
36 | - //int xwidth = 2 * rx + blockDim.x; | |
37 | - //int ywidth = 2 * ry + blockDim.y; | |
38 | - //compute the coordinations of this pixel in the 2D-shared memory. | |
39 | - //int sx_rx = threadIdx.x + rx; | |
40 | - //int sy_ry = threadIdx.y + ry; | |
41 | - | |
42 | 28 | float rx_sq = rx * rx; // compute the square for rmax |
43 | 29 | float ry_sq = ry * ry; |
44 | 30 | float rz_sq = rz * rz; |
45 | 31 | |
46 | 32 | stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector |
47 | 33 | stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector |
48 | - int n =4; //set the number of points to find the boundaries of the conical voting area | |
34 | + float n =8; //set the number of points to find the boundaries of the conical voting area | |
49 | 35 | float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter |
50 | 36 | float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; |
51 | 37 | float zc = rz * cos(g_sph[2]) ; |
... | ... | @@ -54,9 +40,10 @@ |
54 | 40 | yc+=yi; |
55 | 41 | zc+=zi; |
56 | 42 | stim::vec3<float> center(xc,yc,zc); |
57 | - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | |
43 | + | |
44 | + float d = 2 * r * tan(phi); //find the diameter of the conical voting area | |
58 | 45 | stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector |
59 | - float step = 360.0/(float) n; | |
46 | + float step = 360.0/n; | |
60 | 47 | stim::circle<float> cir(center, d, norm); |
61 | 48 | stim::aabb3<int> bb(xi,yi,zi); |
62 | 49 | bb.insert(xc,yc,zc); |
... | ... | @@ -64,13 +51,13 @@ |
64 | 51 | stim::vec3<float> out = cir.p(j); |
65 | 52 | bb.insert(out[0], out[1], out[2]); |
66 | 53 | } |
67 | - | |
54 | + bb.trim_low(xi-rx, yi-ry, zi-rz); | |
68 | 55 | bb.trim_low(0,0,0); |
56 | + bb.trim_high(xi+rx, yi+ry, zi+rz); | |
69 | 57 | bb.trim_high(x-1, y-1, z-1); |
70 | 58 | int bx,by,bz; |
71 | 59 | int dx, dy, dz; |
72 | 60 | float dx_sq, dy_sq, dz_sq; |
73 | - | |
74 | 61 | float dist, cos_diff; |
75 | 62 | int idx_c; |
76 | 63 | |
... | ... | @@ -118,42 +105,38 @@ |
118 | 105 | template<typename T> |
119 | 106 | __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){ |
120 | 107 | |
121 | - //calculate x,y,z coordinates for this thread | |
122 | - int xi = blockIdx.x * blockDim.x + threadIdx.x; | |
123 | - //find the grid size along y | |
124 | - int grid_y = y / blockDim.y; | |
108 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread | |
109 | + | |
110 | + int grid_y = y / blockDim.y; //find the grid size along y | |
125 | 111 | int blockidx_y = blockIdx.y % grid_y; |
126 | 112 | int yi = blockidx_y * blockDim.y + threadIdx.y; |
127 | 113 | int zi = blockIdx.y / grid_y; |
128 | 114 | int i = zi * x * y + yi * x + xi; |
129 | 115 | |
130 | 116 | if(xi >= x || yi >= y || zi >= z) return; |
131 | - //update the gradient image with the new direction direction | |
132 | - gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; | |
117 | + | |
118 | + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; //update the gradient image with the new direction direction | |
133 | 119 | gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; |
134 | 120 | gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; |
135 | 121 | } |
136 | 122 | |
137 | 123 | template<typename T> |
138 | - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
124 | + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
139 | 125 | |
140 | 126 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
141 | 127 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
142 | 128 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); |
143 | - //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T); | |
144 | - // allocate space on the GPU for the updated vote direction | |
145 | - T* gpu_dir; | |
129 | + | |
130 | + T* gpu_dir; // allocate space on the GPU for the updated vote direction | |
146 | 131 | cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); |
147 | 132 | |
148 | 133 | //call the kernel to calculate the new voting direction |
149 | - update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z); | |
150 | - | |
134 | + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, phi, cos_phi, r[0], r[1], r[2], x , y, z); | |
151 | 135 | |
152 | 136 | //call the kernel to update the gradient direction |
153 | 137 | update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); |
154 | - | |
155 | - //free allocated memory | |
156 | - cudaFree(gpu_dir); | |
138 | + | |
139 | + cudaFree(gpu_dir); //free allocated memory | |
157 | 140 | |
158 | 141 | } |
159 | 142 | ... | ... |
cpp/vote3.cuh
... | ... | @@ -101,7 +101,7 @@ |
101 | 101 | } |
102 | 102 | |
103 | 103 | template<typename T> |
104 | - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
104 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
105 | 105 | |
106 | 106 | |
107 | 107 | unsigned int max_threads = stim::maxThreadsPerBlock(); | ... | ... |
cpp/vote3_atomic.cuh
... | ... | @@ -27,14 +27,14 @@ |
27 | 27 | |
28 | 28 | float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter |
29 | 29 | |
30 | - float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter | |
31 | - float gy_v_n = gy_v/mag_v; | |
32 | - float gz_v_n = gz_v/mag_v; | |
30 | + //float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter | |
31 | + //float gy_v_n = gy_v/mag_v; | |
32 | + //float gz_v_n = gz_v/mag_v; | |
33 | 33 | |
34 | 34 | float rx_sq = rx * rx; // compute the square for rmax |
35 | 35 | float ry_sq = ry * ry; |
36 | 36 | float rz_sq = rz * rz; |
37 | - float x_sq, y_sq, z_sq, d_c, cos_diff; | |
37 | + float x_sq, y_sq, z_sq, dist, cos_diff; | |
38 | 38 | int xi_c, yi_c, zi_c, idx_c; |
39 | 39 | |
40 | 40 | for (int z_c=-rz; z_c<=rz; z_c++){ |
... | ... | @@ -49,8 +49,8 @@ |
49 | 49 | xi_c = xi + x_c; |
50 | 50 | if (xi_c < x && xi_c>=0){ |
51 | 51 | x_sq = x_c * x_c; |
52 | - d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter | |
53 | - cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter | |
52 | + dist = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter | |
53 | + cos_diff = (gx_v * x_c + gy_v * y_c + gz_v * z_c)/(dist * mag_v); // calculate the cosine of angle between the voter and the current counter | |
54 | 54 | if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ |
55 | 55 | idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter |
56 | 56 | atomicAdd (&gpu_vote[idx_c] , mag_v); |
... | ... | @@ -64,7 +64,7 @@ |
64 | 64 | } |
65 | 65 | |
66 | 66 | template<typename T> |
67 | - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
67 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
68 | 68 | |
69 | 69 | |
70 | 70 | unsigned int max_threads = stim::maxThreadsPerBlock(); | ... | ... |
cpp/vote3_atomic_aabb.cuh
... | ... | @@ -15,7 +15,7 @@ |
15 | 15 | |
16 | 16 | // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area |
17 | 17 | template<typename T> |
18 | - __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
18 | + __global__ void vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
19 | 19 | |
20 | 20 | int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread |
21 | 21 | |
... | ... | @@ -32,13 +32,10 @@ |
32 | 32 | float rx_sq = rx * rx; // compute the square for rmax |
33 | 33 | float ry_sq = ry * ry; |
34 | 34 | float rz_sq = rz * rz; |
35 | - float dist, cos_diff; | |
36 | - int idx_c; | |
37 | - | |
38 | - //float rmax = sqrt(rx_sq + ry_sq + rz_sq); | |
35 | + | |
39 | 36 | stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector |
40 | 37 | stim::vec3<float> g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector |
41 | - int n =4; //set the number of points to find the boundaries of the conical voting area | |
38 | + float n =8; //set the number of points to find the boundaries of the conical voting area | |
42 | 39 | float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter |
43 | 40 | float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; |
44 | 41 | float zc = rz * cos(g_sph[2]) ; |
... | ... | @@ -48,9 +45,9 @@ |
48 | 45 | zc+=zi; |
49 | 46 | stim::vec3<float> center(xc,yc,zc); |
50 | 47 | |
51 | - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area | |
48 | + float d = 2 * r * tan(phi); //find the diameter of the conical voting area | |
52 | 49 | stim::vec3<float> norm = g.norm(); //compute the normalize gradient vector |
53 | - float step = 360.0/(float) n; | |
50 | + float step = 360.0/n; | |
54 | 51 | stim::circle<float> cir(center, d, norm); |
55 | 52 | stim::aabb3<int> bb(xi,yi,zi); |
56 | 53 | bb.insert(xc,yc,zc); |
... | ... | @@ -58,12 +55,15 @@ |
58 | 55 | stim::vec3<float> out = cir.p(j); |
59 | 56 | bb.insert(out[0], out[1], out[2]); |
60 | 57 | } |
61 | - | |
58 | + bb.trim_low(xi-rx, yi-ry, zi-rz); | |
62 | 59 | bb.trim_low(0,0,0); |
60 | + bb.trim_high(xi+rx, yi+ry, zi+rz); | |
63 | 61 | bb.trim_high(x-1, y-1, z-1); |
64 | 62 | int bx,by,bz; |
65 | 63 | int dx, dy, dz; |
66 | 64 | float dx_sq, dy_sq, dz_sq; |
65 | + float dist, cos_diff; | |
66 | + int idx_c; | |
67 | 67 | for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ |
68 | 68 | dz = bz - zi; //compute the distance bw the voter and the current counter along z axis |
69 | 69 | dz_sq = dz * dz; |
... | ... | @@ -86,13 +86,13 @@ |
86 | 86 | } |
87 | 87 | |
88 | 88 | template<typename T> |
89 | - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
89 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
90 | 90 | |
91 | 91 | |
92 | 92 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
93 | 93 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
94 | 94 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); |
95 | - vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting | |
95 | + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, phi, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting | |
96 | 96 | |
97 | 97 | } |
98 | 98 | ... | ... |