Commit 02fb26b3ee6354848bb0fb35b9e970b571b251b7

Authored by Laila Saadatifard
1 parent 6ef1dab9

change the vote and update_dir and cudafunc codes to use three channels for stor…

…ing the gradient instead of four channels
Showing 3 changed files with 20 additions and 40 deletions   Show diff stats
cpp/cudafunc.cu
... ... @@ -27,9 +27,6 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
27 27  
28 28 cudaDeviceSynchronize();
29 29  
30   - //copy the blur data back to the cpu
31   - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost);
32   -
33 30 //assign memory on the gpu for the gradient along the X, y, z.
34 31 float* gpu_grad;
35 32 cudaMalloc(&gpu_grad, bytes*3);
... ... @@ -38,14 +35,6 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
38 35 gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z);
39 36 cudaFree(gpuI0);
40 37  
41   - //assign memory on the gpu for the gradient magnitude
42   - float* gpu_mag;
43   - cudaMalloc(&gpu_mag, bytes);
44   -
45   - //call the magnitude function
46   - gpu_mag3<float>(gpu_mag, gpu_grad, x, y, z);
47   - //cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost);
48   - //assign memory on the gpu for the vote.
49 38 float* gpu_vote;
50 39 cudaMalloc(&gpu_vote, bytes);
51 40  
... ... @@ -54,7 +43,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
54 43 //call the vote function.
55 44 for (int i = 0; i < iter; i++){
56 45  
57   - gpu_vote3<float>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z);
  46 + gpu_vote3<float>(gpu_vote, gpu_grad, cos_phi, r, x, y, z);
58 47 cudaDeviceSynchronize();
59 48 if (i==0)
60 49 cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);
... ... @@ -68,8 +57,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
68 57  
69 58 }
70 59  
71   - cudaFree(gpu_grad);
72   - cudaFree(gpu_mag);
  60 + cudaFree(gpu_grad);
73 61 cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);
74 62  
75 63 //allocate space on the gpu for the final detected cells.
... ...
cpp/update_dir3.cuh
... ... @@ -26,7 +26,7 @@
26 26 float g_v_x = gpu_grad[i * 3 + 0];
27 27 float g_v_y = gpu_grad[i * 3 + 1];
28 28 float g_v_z = gpu_grad[i * 3 + 2];
29   - float g_v_m = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z);
  29 + float mag_v = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z);
30 30  
31 31  
32 32 // define a local variable to maximum value of the vote image in the voting area for this voter
... ... @@ -60,7 +60,7 @@
60 60 float d_pv = sqrt(x_sq + y_sq + z_sq);
61 61  
62 62 // calculate the angle between the pixel and the current voter.
63   - float cos_diff = (g_v_x * x_p + g_v_y * y_p + g_v_z * z_p)/(d_pv * g_v_m);
  63 + float cos_diff = (g_v_x * x_p + g_v_y * y_p + g_v_z * z_p)/(d_pv * mag_v);
64 64  
65 65 if ((((x_sq)/rx_sq + (y_sq)/ry_sq + (z_sq)/rz_sq)<= 1) && (cos_diff >= cos_phi)){
66 66  
... ... @@ -82,10 +82,11 @@
82 82 }
83 83 }
84 84  
85   - __syncthreads();
86   - gpu_dir[i * 3 + 0] = id_x;
87   - gpu_dir[i * 3 + 1] = id_y;
88   - gpu_dir[i * 3 + 2] = id_z;
  85 +
  86 + float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z);
  87 + gpu_dir[i * 3 + 0] = mag_v * (id_x/m_id);
  88 + gpu_dir[i * 3 + 1] = mag_v * (id_y/m_id);
  89 + gpu_dir[i * 3 + 2] = mag_v * (id_z/m_id);
89 90 }
90 91  
91 92 // this kernel updates the gradient direction by the calculated voting direction.
... ...
cpp/vote3.cuh
... ... @@ -10,7 +10,7 @@
10 10  
11 11 // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
12 12 template<typename T>
13   - __global__ void vote3(T* gpu_vote, T* gpu_grad, T* gpu_mag, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
  13 + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
14 14  
15 15 //calculate x,y,z coordinates for this thread
16 16 int xi = blockIdx.x * blockDim.x + threadIdx.x;
... ... @@ -46,12 +46,11 @@
46 46 unsigned int id_v = (zi_v) * x * y + (yi_v) * x + (xi_v);
47 47  
48 48 //find the gradient values along the x, y ,z axis, and the gradient magnitude for this voter
49   - float mag_v = gpu_mag[id_v];
  49 +
50 50 float g_v_x = gpu_grad[id_v * 3 + 0];
51 51 float g_v_y = gpu_grad[id_v * 3 + 1];
52 52 float g_v_z = gpu_grad[id_v * 3 + 2];
53   - float g_v_m = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z);
54   -
  53 + float mag_v = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z);
55 54 //calculate the distance between the pixel and the current voter.
56 55 float x_sq = x_v * x_v;
57 56 float y_sq = y_v * y_v;
... ... @@ -59,7 +58,7 @@
59 58 float d_pv = sqrt(x_sq + y_sq + z_sq);
60 59  
61 60 // calculate the angle between the pixel and the current voter.
62   - float cos_diff = (g_v_x * (-x_v) + g_v_y * (-y_v) + g_v_z * (-z_v))/(d_pv * g_v_m);
  61 + float cos_diff = (g_v_x * (-x_v) + g_v_y * (-y_v) + g_v_z * (-z_v))/(d_pv * mag_v);
63 62  
64 63 // check if the current voter is located in the voting area of this pixel.
65 64 if ((((x_sq)/rx_sq + (y_sq)/ry_sq + (z_sq)/rz_sq)<= 1) && (cos_diff >= cos_phi)){
... ... @@ -75,23 +74,21 @@
75 74 }
76 75  
77 76 template<typename T>
78   - void gpu_vote3(T* gpu_vote, T* gpu_grad, T* gpu_mag, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  77 + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
79 78  
80   - int rx = r[0];
81   - int ry = r[1];
82   - int rz = r[2];
  79 +
83 80 unsigned int max_threads = stim::maxThreadsPerBlock();
84 81 dim3 threads(sqrt (max_threads),sqrt (max_threads));
85 82 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
86 83  
87 84 //call the kernel to do the voting
88   - vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, gpu_mag, cos_phi, rx, ry, rz, x , y, z);
  85 + vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z);
89 86  
90 87 }
91 88  
92 89  
93 90 template<typename T>
94   - void cpu_vote3(T* cpu_vote, T* cpu_grad, T* cpu_mag, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
  91 + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
95 92  
96 93 //calculate the number of bytes in the array
97 94 unsigned int bytes = x * y * z * sizeof(T);
... ... @@ -105,19 +102,13 @@
105 102 T* gpu_grad;
106 103 cudaMalloc(&gpu_grad, bytes*3);
107 104  
108   - //allocate space on the GPU for the Gradient magnitude
109   - T* gpu_mag;
110   - cudaMalloc(&gpu_mag, bytes);
111 105  
112   -
113 106 //copy the Gradient data to the GPU
114 107 cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
115 108  
116   - //copy the gradient magnitude to the GPU
117   - cudaMemcpy(gpu_mag, cpu_mag, bytes, cudaMemcpyHostToDevice);
118   -
  109 +
119 110 //call the GPU version of the vote calculation function
120   - gpu_vote3<T>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x , y, z);
  111 + gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z);
121 112  
122 113 //copy the Vote Data back to the CPU
123 114 cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ;
... ... @@ -125,7 +116,7 @@
125 116 //free allocated memory
126 117 cudaFree(gpu_vote);
127 118 cudaFree(gpu_grad);
128   - cudaFree(gpu_mag);
  119 +
129 120 }
130 121  
131 122  
... ...