Commit 02fb26b3ee6354848bb0fb35b9e970b571b251b7
1 parent
6ef1dab9
change the vote and update_dir and cudafunc codes to use three channels for stor…
…ing the gradient instead of four channels
Showing
3 changed files
with
20 additions
and
40 deletions
Show diff stats
cpp/cudafunc.cu
... | ... | @@ -27,9 +27,6 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un |
27 | 27 | |
28 | 28 | cudaDeviceSynchronize(); |
29 | 29 | |
30 | - //copy the blur data back to the cpu | |
31 | - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); | |
32 | - | |
33 | 30 | //assign memory on the gpu for the gradient along the X, y, z. |
34 | 31 | float* gpu_grad; |
35 | 32 | cudaMalloc(&gpu_grad, bytes*3); |
... | ... | @@ -38,14 +35,6 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un |
38 | 35 | gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z); |
39 | 36 | cudaFree(gpuI0); |
40 | 37 | |
41 | - //assign memory on the gpu for the gradient magnitude | |
42 | - float* gpu_mag; | |
43 | - cudaMalloc(&gpu_mag, bytes); | |
44 | - | |
45 | - //call the magnitude function | |
46 | - gpu_mag3<float>(gpu_mag, gpu_grad, x, y, z); | |
47 | - //cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost); | |
48 | - //assign memory on the gpu for the vote. | |
49 | 38 | float* gpu_vote; |
50 | 39 | cudaMalloc(&gpu_vote, bytes); |
51 | 40 | |
... | ... | @@ -54,7 +43,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un |
54 | 43 | //call the vote function. |
55 | 44 | for (int i = 0; i < iter; i++){ |
56 | 45 | |
57 | - gpu_vote3<float>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z); | |
46 | + gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z); | |
58 | 47 | cudaDeviceSynchronize(); |
59 | 48 | if (i==0) |
60 | 49 | cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); |
... | ... | @@ -68,8 +57,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un |
68 | 57 | |
69 | 58 | } |
70 | 59 | |
71 | - cudaFree(gpu_grad); | |
72 | - cudaFree(gpu_mag); | |
60 | + cudaFree(gpu_grad); | |
73 | 61 | cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost); |
74 | 62 | |
75 | 63 | //allocate space on the gpu for the final detected cells. | ... | ... |
cpp/update_dir3.cuh
... | ... | @@ -26,7 +26,7 @@ |
26 | 26 | float g_v_x = gpu_grad[i * 3 + 0]; |
27 | 27 | float g_v_y = gpu_grad[i * 3 + 1]; |
28 | 28 | float g_v_z = gpu_grad[i * 3 + 2]; |
29 | - float g_v_m = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); | |
29 | + float mag_v = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); | |
30 | 30 | |
31 | 31 | |
32 | 32 | // define a local variable to maximum value of the vote image in the voting area for this voter |
... | ... | @@ -60,7 +60,7 @@ |
60 | 60 | float d_pv = sqrt(x_sq + y_sq + z_sq); |
61 | 61 | |
62 | 62 | // calculate the angle between the pixel and the current voter. |
63 | - float cos_diff = (g_v_x * x_p + g_v_y * y_p + g_v_z * z_p)/(d_pv * g_v_m); | |
63 | + float cos_diff = (g_v_x * x_p + g_v_y * y_p + g_v_z * z_p)/(d_pv * mag_v); | |
64 | 64 | |
65 | 65 | if ((((x_sq)/rx_sq + (y_sq)/ry_sq + (z_sq)/rz_sq)<= 1) && (cos_diff >= cos_phi)){ |
66 | 66 | |
... | ... | @@ -82,10 +82,11 @@ |
82 | 82 | } |
83 | 83 | } |
84 | 84 | |
85 | - __syncthreads(); | |
86 | - gpu_dir[i * 3 + 0] = id_x; | |
87 | - gpu_dir[i * 3 + 1] = id_y; | |
88 | - gpu_dir[i * 3 + 2] = id_z; | |
85 | + | |
86 | + float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z); | |
87 | + gpu_dir[i * 3 + 0] = mag_v * (id_x/m_id); | |
88 | + gpu_dir[i * 3 + 1] = mag_v * (id_y/m_id); | |
89 | + gpu_dir[i * 3 + 2] = mag_v * (id_z/m_id); | |
89 | 90 | } |
90 | 91 | |
91 | 92 | // this kernel updates the gradient direction by the calculated voting direction. | ... | ... |
cpp/vote3.cuh
... | ... | @@ -10,7 +10,7 @@ |
10 | 10 | |
11 | 11 | // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area |
12 | 12 | template<typename T> |
13 | - __global__ void vote3(T* gpu_vote, T* gpu_grad, T* gpu_mag, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
13 | + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ | |
14 | 14 | |
15 | 15 | //calculate x,y,z coordinates for this thread |
16 | 16 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
... | ... | @@ -46,12 +46,11 @@ |
46 | 46 | unsigned int id_v = (zi_v) * x * y + (yi_v) * x + (xi_v); |
47 | 47 | |
48 | 48 | //find the gradient values along the x, y ,z axis, and the gradient magnitude for this voter |
49 | - float mag_v = gpu_mag[id_v]; | |
49 | + | |
50 | 50 | float g_v_x = gpu_grad[id_v * 3 + 0]; |
51 | 51 | float g_v_y = gpu_grad[id_v * 3 + 1]; |
52 | 52 | float g_v_z = gpu_grad[id_v * 3 + 2]; |
53 | - float g_v_m = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); | |
54 | - | |
53 | + float mag_v = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); | |
55 | 54 | //calculate the distance between the pixel and the current voter. |
56 | 55 | float x_sq = x_v * x_v; |
57 | 56 | float y_sq = y_v * y_v; |
... | ... | @@ -59,7 +58,7 @@ |
59 | 58 | float d_pv = sqrt(x_sq + y_sq + z_sq); |
60 | 59 | |
61 | 60 | // calculate the angle between the pixel and the current voter. |
62 | - float cos_diff = (g_v_x * (-x_v) + g_v_y * (-y_v) + g_v_z * (-z_v))/(d_pv * g_v_m); | |
61 | + float cos_diff = (g_v_x * (-x_v) + g_v_y * (-y_v) + g_v_z * (-z_v))/(d_pv * mag_v); | |
63 | 62 | |
64 | 63 | // check if the current voter is located in the voting area of this pixel. |
65 | 64 | if ((((x_sq)/rx_sq + (y_sq)/ry_sq + (z_sq)/rz_sq)<= 1) && (cos_diff >= cos_phi)){ |
... | ... | @@ -75,23 +74,21 @@ |
75 | 74 | } |
76 | 75 | |
77 | 76 | template<typename T> |
78 | - void gpu_vote3(T* gpu_vote, T* gpu_grad, T* gpu_mag, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
77 | + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
79 | 78 | |
80 | - int rx = r[0]; | |
81 | - int ry = r[1]; | |
82 | - int rz = r[2]; | |
79 | + | |
83 | 80 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
84 | 81 | dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
85 | 82 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); |
86 | 83 | |
87 | 84 | //call the kernel to do the voting |
88 | - vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, gpu_mag, cos_phi, rx, ry, rz, x , y, z); | |
85 | + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); | |
89 | 86 | |
90 | 87 | } |
91 | 88 | |
92 | 89 | |
93 | 90 | template<typename T> |
94 | - void cpu_vote3(T* cpu_vote, T* cpu_grad, T* cpu_mag, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
91 | + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ | |
95 | 92 | |
96 | 93 | //calculate the number of bytes in the array |
97 | 94 | unsigned int bytes = x * y * z * sizeof(T); |
... | ... | @@ -105,19 +102,13 @@ |
105 | 102 | T* gpu_grad; |
106 | 103 | cudaMalloc(&gpu_grad, bytes*3); |
107 | 104 | |
108 | - //allocate space on the GPU for the Gradient magnitude | |
109 | - T* gpu_mag; | |
110 | - cudaMalloc(&gpu_mag, bytes); | |
111 | 105 | |
112 | - | |
113 | 106 | //copy the Gradient data to the GPU |
114 | 107 | cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); |
115 | 108 | |
116 | - //copy the gradient magnitude to the GPU | |
117 | - cudaMemcpy(gpu_mag, cpu_mag, bytes, cudaMemcpyHostToDevice); | |
118 | - | |
109 | + | |
119 | 110 | //call the GPU version of the vote calculation function |
120 | - gpu_vote3<T>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x , y, z); | |
111 | + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z); | |
121 | 112 | |
122 | 113 | //copy the Vote Data back to the CPU |
123 | 114 | cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ; |
... | ... | @@ -125,7 +116,7 @@ |
125 | 116 | //free allocated memory |
126 | 117 | cudaFree(gpu_vote); |
127 | 118 | cudaFree(gpu_grad); |
128 | - cudaFree(gpu_mag); | |
119 | + | |
129 | 120 | } |
130 | 121 | |
131 | 122 | ... | ... |