From 02fb26b3ee6354848bb0fb35b9e970b571b251b7 Mon Sep 17 00:00:00 2001 From: Laila Saadatifard Date: Tue, 8 Dec 2015 15:27:36 -0600 Subject: [PATCH] change the vote and update_dir and cudafunc codes to use three channels for storing the gradient instead of four channels --- cpp/cudafunc.cu | 16 ++-------------- cpp/update_dir3.cuh | 13 +++++++------ cpp/vote3.cuh | 31 +++++++++++-------------------- 3 files changed, 20 insertions(+), 40 deletions(-) diff --git a/cpp/cudafunc.cu b/cpp/cudafunc.cu index c34ade6..20c60f9 100644 --- a/cpp/cudafunc.cu +++ b/cpp/cudafunc.cu @@ -27,9 +27,6 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un cudaDeviceSynchronize(); - //copy the blur data back to the cpu - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); - //assign memory on the gpu for the gradient along the X, y, z. float* gpu_grad; cudaMalloc(&gpu_grad, bytes*3); @@ -38,14 +35,6 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un gpu_gradient3(gpu_grad, gpuI0, x, y, z); cudaFree(gpuI0); - //assign memory on the gpu for the gradient magnitude - float* gpu_mag; - cudaMalloc(&gpu_mag, bytes); - - //call the magnitude function - gpu_mag3(gpu_mag, gpu_grad, x, y, z); - //cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost); - //assign memory on the gpu for the vote. float* gpu_vote; cudaMalloc(&gpu_vote, bytes); @@ -54,7 +43,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un //call the vote function. for (int i = 0; i < iter; i++){ - gpu_vote3(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z); + gpu_vote3(gpu_vote, gpu_grad, cos_phi, r, x, y, z); cudaDeviceSynchronize(); if (i==0) cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); @@ -68,8 +57,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un } - cudaFree(gpu_grad); - cudaFree(gpu_mag); + cudaFree(gpu_grad); cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost); //allocate space on the gpu for the final detected cells. diff --git a/cpp/update_dir3.cuh b/cpp/update_dir3.cuh index fc9213d..67ac0a0 100644 --- a/cpp/update_dir3.cuh +++ b/cpp/update_dir3.cuh @@ -26,7 +26,7 @@ float g_v_x = gpu_grad[i * 3 + 0]; float g_v_y = gpu_grad[i * 3 + 1]; float g_v_z = gpu_grad[i * 3 + 2]; - float g_v_m = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); + float mag_v = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); // define a local variable to maximum value of the vote image in the voting area for this voter @@ -60,7 +60,7 @@ float d_pv = sqrt(x_sq + y_sq + z_sq); // calculate the angle between the pixel and the current voter. - float cos_diff = (g_v_x * x_p + g_v_y * y_p + g_v_z * z_p)/(d_pv * g_v_m); + float cos_diff = (g_v_x * x_p + g_v_y * y_p + g_v_z * z_p)/(d_pv * mag_v); if ((((x_sq)/rx_sq + (y_sq)/ry_sq + (z_sq)/rz_sq)<= 1) && (cos_diff >= cos_phi)){ @@ -82,10 +82,11 @@ } } - __syncthreads(); - gpu_dir[i * 3 + 0] = id_x; - gpu_dir[i * 3 + 1] = id_y; - gpu_dir[i * 3 + 2] = id_z; + + float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z); + gpu_dir[i * 3 + 0] = mag_v * (id_x/m_id); + gpu_dir[i * 3 + 1] = mag_v * (id_y/m_id); + gpu_dir[i * 3 + 2] = mag_v * (id_z/m_id); } // this kernel updates the gradient direction by the calculated voting direction. diff --git a/cpp/vote3.cuh b/cpp/vote3.cuh index 4e25225..de48cc0 100644 --- a/cpp/vote3.cuh +++ b/cpp/vote3.cuh @@ -10,7 +10,7 @@ // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area template - __global__ void vote3(T* gpu_vote, T* gpu_grad, T* gpu_mag, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ + __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ //calculate x,y,z coordinates for this thread int xi = blockIdx.x * blockDim.x + threadIdx.x; @@ -46,12 +46,11 @@ unsigned int id_v = (zi_v) * x * y + (yi_v) * x + (xi_v); //find the gradient values along the x, y ,z axis, and the gradient magnitude for this voter - float mag_v = gpu_mag[id_v]; + float g_v_x = gpu_grad[id_v * 3 + 0]; float g_v_y = gpu_grad[id_v * 3 + 1]; float g_v_z = gpu_grad[id_v * 3 + 2]; - float g_v_m = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); - + float mag_v = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z); //calculate the distance between the pixel and the current voter. float x_sq = x_v * x_v; float y_sq = y_v * y_v; @@ -59,7 +58,7 @@ float d_pv = sqrt(x_sq + y_sq + z_sq); // calculate the angle between the pixel and the current voter. - float cos_diff = (g_v_x * (-x_v) + g_v_y * (-y_v) + g_v_z * (-z_v))/(d_pv * g_v_m); + float cos_diff = (g_v_x * (-x_v) + g_v_y * (-y_v) + g_v_z * (-z_v))/(d_pv * mag_v); // check if the current voter is located in the voting area of this pixel. if ((((x_sq)/rx_sq + (y_sq)/ry_sq + (z_sq)/rz_sq)<= 1) && (cos_diff >= cos_phi)){ @@ -75,23 +74,21 @@ } template - void gpu_vote3(T* gpu_vote, T* gpu_grad, T* gpu_mag, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ + void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ - int rx = r[0]; - int ry = r[1]; - int rz = r[2]; + unsigned int max_threads = stim::maxThreadsPerBlock(); dim3 threads(sqrt (max_threads),sqrt (max_threads)); dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); //call the kernel to do the voting - vote3 <<< blocks, threads >>>(gpu_vote, gpu_grad, gpu_mag, cos_phi, rx, ry, rz, x , y, z); + vote3 <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); } template - void cpu_vote3(T* cpu_vote, T* cpu_grad, T* cpu_mag, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ + void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ //calculate the number of bytes in the array unsigned int bytes = x * y * z * sizeof(T); @@ -105,19 +102,13 @@ T* gpu_grad; cudaMalloc(&gpu_grad, bytes*3); - //allocate space on the GPU for the Gradient magnitude - T* gpu_mag; - cudaMalloc(&gpu_mag, bytes); - //copy the Gradient data to the GPU cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); - //copy the gradient magnitude to the GPU - cudaMemcpy(gpu_mag, cpu_mag, bytes, cudaMemcpyHostToDevice); - + //call the GPU version of the vote calculation function - gpu_vote3(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x , y, z); + gpu_vote3(gpu_vote, gpu_grad, cos_phi, r, x , y, z); //copy the Vote Data back to the CPU cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ; @@ -125,7 +116,7 @@ //free allocated memory cudaFree(gpu_vote); cudaFree(gpu_grad); - cudaFree(gpu_mag); + } -- libgit2 0.21.4