cudafunc.cu 2.26 KB
#include "gaussian_blur3.cuh"
#include "gradient3.cuh"
#include "mag3.cuh"
#include "vote3.cuh"
#include "update_dir3.cuh"
#include "local_max3.cuh"


void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[],
			int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){


	// compute the number of bytes in the input data
	unsigned int bytes = x * y * z * sizeof(float);

	//assign memory on gpu for the input data.
	float* gpuI0;
	cudaMalloc(&gpuI0, bytes);	

	//copy the image data to the GPU.
	cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice);

	//call the blurring function from the gpu.
	gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z);

	cudaDeviceSynchronize();
		
	//copy the blur data back to the cpu
	//cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost);
	
	//assign memory on the gpu for the gradient along the X, y, z.
	float* gpu_grad;
	cudaMalloc(&gpu_grad, bytes*3);
	
	//call the gradient function from the gpu.
	gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z);
	cudaFree(gpuI0);
	
	//assign memory on the gpu for the gradient magnitude
	float* gpu_mag;
	cudaMalloc(&gpu_mag, bytes);
	
	//call the magnitude function
	gpu_mag3<float>(gpu_mag, gpu_grad, x, y, z);
	//cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost);
	//assign memory on the gpu for the vote.
	float* gpu_vote;
	cudaMalloc(&gpu_vote, bytes);

	float cos_phi = cos(phi);

	//call the vote function.
	for (int i = 0; i < iter; i++){

		gpu_vote3<float>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z);
		cudaDeviceSynchronize();
		if (i==0)
			cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);
		
		if (phi >= d_phi){	
			gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z);
			cudaDeviceSynchronize();
			phi = phi - d_phi;
			cos_phi = cos(phi);
		}
	
	}

	cudaFree(gpu_grad);
	cudaFree(gpu_mag);
	//cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);

	//allocate space on the gpu for the final detected cells.
	float* gpu_output;
	cudaMalloc(&gpu_output, bytes);

	//call the local max function
	gpu_local_max3<float>(gpu_output, gpu_vote, t, conn, x, y, z);

	//copy the final result to the cpu.
	cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost);
		
	
	cudaFree(gpu_vote);
	cudaFree(gpu_output);
	
}