stim/cuda/branch_detection.cuh

#include <iostream>
#include <fstream>
#include <cuda_runtime.h>
#include <stim/math/vector.h>
//#include <math.h>
#include <stim/visualization/colormap.h>
#include <stim/cuda/cuda_texture.cuh>
#include <stim/cuda/templates/gradient.cuh>
#include <stim/cuda/templates/gaussian_blur.cuh>
#include <stim/cuda/arraymath.cuh>
#include <stim/cuda/ivote.cuh>
#include <stim/cuda/testKernel.cuh>
typedef unsigned int uint;
typedef unsigned int uchar;
stim::cuda::cuda_texture t;	
float*		gpuTable;
float*		gpuGrad;
float*		gpuVote;	
float*		gpuI;
float*		gpuCenters;
void atan_2d(float* cpuTable, unsigned int rmax)
{
	//initialize the width and height of the window which atan2 are computed in.
	int xsize = 2*rmax +1;
	int ysize = 2*rmax +1;
	
	// assign the center coordinates of the atan2 window to yi and xi
	int yi = rmax;
	int xi = rmax;
	
	for (int xt = 0; xt < xsize; xt++){
		for(int yt = 0; yt < ysize; yt++){
			//convert the current 2D coordinates to 1D
			int id = yt * xsize + xt;
			// calculate the distance between the pixel and the center of the atan2 window
			float xd = xi - xt;
			float yd = yi - yt;
			// calculate the angle between the pixel and the center of the atan2 window and store the result.
			float atan_2d_vote = atan2(yd, xd);
			cpuTable[id] = atan_2d_vote;
		}
	}
}
void initCuda(unsigned int bytes_table, unsigned int bytes_ds)
{
	HANDLE_ERROR(
		cudaMalloc((void**) &gpuTable, bytes_table)
		);
	HANDLE_ERROR(
		cudaMalloc((void**) &gpuI, bytes_ds)
		);
	HANDLE_ERROR(
		cudaMalloc((void**) &gpuGrad,  bytes_ds*2)
		);
	HANDLE_ERROR(
		cudaMalloc((void**) &gpuVote,  bytes_ds)
		);
	HANDLE_ERROR(
		cudaMalloc((void**) &gpuCenters, bytes_ds)
		);
}
void cleanCuda()
{
	HANDLE_ERROR(
		cudaFree(gpuTable)
	);
	HANDLE_ERROR(
		cudaFree(gpuGrad)
	);
	HANDLE_ERROR(
		cudaFree(gpuVote)
	);
	HANDLE_ERROR(
		cudaFree(gpuCenters)
	);
	HANDLE_ERROR(
		cudaFree(gpuI)
	);
}
std::vector< stim::vec<float> >
find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
{
	float 		phi	 	= 15.1*M_PI/180;
	int		iter		= 5;
	float 		dphi		= phi/iter;
	float 		rmax 		= 10;
	float		sigma		= 4;
	unsigned int 	pixels 		= x * y;
	unsigned int 	bytes  		= sizeof(float) * pixels;
	unsigned int 	bytes_table	= sizeof(float) * (2*rmax + 1) * (2*rmax + 1);
	unsigned int 	x_ds		= (x + (x % 1 == 0 ? 0:1));
	unsigned int 	y_ds		= (y + (x % 1 == 0 ? 0:1));
	unsigned int	bytes_ds	= sizeof(float) * x_ds * y_ds;
	unsigned int	conn		= 5;
	float		final_t		= 200.0;
	float*		cpuTable	= (float*) malloc(bytes_table);
	float*		cpuCenters	= (float*) malloc(bytes_ds);
	stringstream name;
	std::vector<stim::vec<float> >  output;
	initCuda(bytes_table, bytes_ds); 
	atan_2d(cpuTable, rmax);
	cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
	test(texbufferID, texType);
	t.MapCudaTexture(texbufferID, texType);
	cudaDeviceSynchronize();
	stim::cuda::tex_gaussian_blur2<float>(
		gpuI, sigma, x, y, t.getTexture(), t.getArray()
		);
	cudaDeviceSynchronize();
	stim::gpu2image<float>(gpuI, "Blur.jpg", 16, 8*27, 0, 255);	
	stim::cuda::gpu_gradient_2d<float>(
		gpuGrad, gpuI, x, y
		);
	cudaDeviceSynchronize();
//	stim::gpu2image<float>(gpuGrad, "Grad.jpg", 16, 8*27, 0, 1);	
	
	stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, M_PI);
	cudaDeviceSynchronize();
//	stim::gpu2image<float>(gpuGrad, "Cart_2_polar.jpg", 16, 8*27, 0, 1);	
	cudaDeviceSynchronize();
	for (int i = 0; i < iter; i++)
	{
		stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
		name << "Vote" << i << ".bmp";
		stim::gpu2image<float>(gpuVote, name.str(), 16, 8*27, 0, 255);
		name.str("");
		name.clear();	
	cudaDeviceSynchronize();
//		std::cout << "got here7_a_" << i << std::endl;
		stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
	cudaDeviceSynchronize();
//		std::cout << "got here7_b_" << i << std::endl;
		phi = phi - dphi;
	}
	
	cudaDeviceSynchronize();
	stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
//	std::cout << "got here_sdkfj" << std::endl;
	cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
	stim::gpu2image<float>(gpuCenters, "Centers.jpg", 16, 8*27, 0, 255);	
	for(int i = 0; i < pixels; i++)
	{
		int ix = (i % x);
		int iy = (i / x);
//		std::cout << i << " : " << ix <<" : " <<  iy << std::endl;
		if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4))
		{
			std::cout << ix << " : " << iy << std::endl; 
			float x_v = (float) ix;
			float y_v = (float) iy;
			output.push_back(stim::vec<float>((x_v/(float)x),
							  (y_v/(float)y), 0.0));	
			std::cout << x_v/16.0 << " : " << y_v/216.0 << std::endl; 
		}
	}
	t.UnmapCudaTexture();
	cleanCuda();
	free(cpuTable);
	free(cpuCenters);
	return output;
}