#include "gaussian_blur3.cuh" #include "gradient3.cuh" #include "mag3.cuh" #include "vote3.cuh" #include "update_dir3.cuh" #include "local_max3.cuh" void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ cudaSetDevice(0); // compute the number of bytes in the input data unsigned int bytes = x * y * z * sizeof(float); //assign memory on gpu for the input data. float* gpuI0; cudaMalloc(&gpuI0, bytes); //copy the image data to the GPU. cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //call the blurring function from the gpu. gpu_gaussian_blur3(gpuI0, sigma, x, y, z); //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); //assign memory on the gpu for the gradient along the X, y, z. float* gpu_grad; cudaMalloc(&gpu_grad, bytes*3); //call the gradient function from the gpu. gpu_gradient3(gpu_grad, gpuI0, anisotropy, x, y, z); cudaFree(gpuI0); float* gpu_vote; cudaMalloc(&gpu_vote, bytes); float cos_phi = cos(phi); //call the vote function. for (int i = 0; i < iter; i++){ gpu_vote3(gpu_vote, gpu_grad, cos_phi, r, x, y, z); cudaDeviceSynchronize(); /*if (i==7) cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);*/ if (phi >= d_phi){ gpu_update_dir3(gpu_grad, gpu_vote, cos_phi, r, x, y, z); cudaDeviceSynchronize(); phi = phi - d_phi; cos_phi = cos(phi); } } cudaFree(gpu_grad); cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); //allocate space on the gpu for the final detected cells. //float* gpu_output; //cudaMalloc(&gpu_output, bytes); ////call the local max function //gpu_local_max3(gpu_output, gpu_vote, t, conn, x, y, z); ////copy the final result to the cpu. //cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); // // cudaFree(gpu_vote); //cudaFree(gpu_output); } void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ unsigned int bytes = x * y * z * sizeof(float); //assign memory on gpu for the input data. float* gpuV; cudaMalloc(&gpuV, bytes); //copy the image data to the GPU. cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); float* gpuOut; cudaMalloc(&gpuOut, bytes); //call the local max function gpu_local_max3(gpuOut, gpuV, t, conn, x, y, z); //copy the final result to the cpu. cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); cudaFree(gpuV); cudaFree(gpuOut); }