/*#include "circle_check.cuh" void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z){ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); } */ #include "gaussian_blur3.cuh" #include "gradient3.cuh" #include "mag3.cuh" #include "vote3_atomic_aabb.cuh" #include "update_dir3_aabb.cuh" #include "local_max3.cuh" void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ cudaSetDevice(1); unsigned int bytes = x * y * z * sizeof(float); // compute the number of bytes in the input data float* gpuI0; //assign memory on gpu for the input data cudaMalloc(&gpuI0, bytes); cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. gpu_gaussian_blur3(gpuI0, sigma, x, y, z); //call the blurring function from the gpu. cudaDeviceSynchronize(); float* gpu_grad; //assign memory on the gpu for the gradient along the X, y, z. cudaMalloc(&gpu_grad, bytes*3); gpu_gradient3(gpu_grad, gpuI0, anisotropy, x, y, z); //call the gradient function from the gpu. cudaFree(gpuI0); float* gpu_vote; cudaMalloc(&gpu_vote, bytes); float cos_phi = cos(phi); //call the vote function. for (int i = 0; i < iter; i++){ cudaMemset(gpu_vote, 0, bytes); gpu_vote3(gpu_vote, gpu_grad, phi, cos_phi, r, x, y, z); cudaDeviceSynchronize(); //if (phi >= d_phi){ gpu_update_dir3(gpu_grad, gpu_vote, phi, cos_phi, r, x, y, z); cudaDeviceSynchronize(); phi = phi - d_phi; cos_phi = cos(phi); //} } cudaFree(gpu_grad); cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); //allocate space on the gpu for the final detected cells. //float* gpu_output; //cudaMalloc(&gpu_output, bytes); ////call the local max function //gpu_local_max3(gpu_output, gpu_vote, t, conn, x, y, z); ////copy the final result to the cpu. //cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); // // cudaFree(gpu_vote); //cudaFree(gpu_output); } void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ unsigned int bytes = x * y * z * sizeof(float); cudaSetDevice(1); float* gpuV; //assign memory on gpu for the input data. cudaMalloc(&gpuV, bytes); cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. float* gpuOut; cudaMalloc(&gpuOut, bytes); gpu_local_max3(gpuOut, gpuV, t, conn, x, y, z); //call the local max function cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); //copy the final result to the cpu. cudaFree(gpuV); cudaFree(gpuOut); }