//#include "cuda_fp16.h" //#include "float_to_half.cuh" //#include "half_to_float.cuh" #include "gaussian_blur3.cuh" #include "gradient3.cuh" #include "mag3.cuh" #include "vote3.cuh" #include "update_dir3.cuh" #include "local_max3.cuh" void ivote3(float* center, float* img, float sigma[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ cudaSetDevice(0); // compute the number of bytes in the input data unsigned int bytes = x * y * z * sizeof(float); //assign memory on gpu for the input data. float* gpuI0; cudaMalloc(&gpuI0, bytes); //copy the image data to the GPU. cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //call the blurring function from the gpu. gpu_gaussian_blur3(gpuI0, sigma, x, y, z); //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); //assign memory on the gpu for the gradient along the X, y, z. float* gpu_grad; cudaMalloc(&gpu_grad, bytes*3); //call the gradient function from the gpu. gpu_gradient3(gpu_grad, gpuI0, anisotropy, x, y, z); cudaFree(gpuI0); float* gpu_vote; cudaMalloc(&gpu_vote, bytes); float cos_phi = cos(phi); //call the vote function. for (int i = 0; i < iter; i++){ gpu_vote3(gpu_vote, gpu_grad, cos_phi, r, x, y, z); cudaDeviceSynchronize(); if (i==7) cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); if (phi >= d_phi){ gpu_update_dir3(gpu_grad, gpu_vote, cos_phi, r, x, y, z); cudaDeviceSynchronize(); phi = phi - d_phi; cos_phi = cos(phi); } } cudaFree(gpu_grad); //cudaMemcpy(center, gpu_grad, bytes, cudaMemcpyDeviceToHost); //allocate space on the gpu for the final detected cells. float* gpu_output; cudaMalloc(&gpu_output, bytes); //call the local max function gpu_local_max3(gpu_output, gpu_vote, t, conn, x, y, z); //copy the final result to the cpu. cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); cudaFree(gpu_vote); cudaFree(gpu_output); }