tira/cuda/testKernel.cuh

#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <stim/visualization/colormap.h>
#include <sstream>
#include <stim/math/vector.h>
#include <stim/cuda/cudatools/devices.h>
#include <stim/cuda/cudatools/threads.h>
#include <stim/cuda/cuda_texture.cuh>
	
	float* print;
	
	///Initialization function, allocates the memory and passes the necessary
	///handles from OpenGL and Cuda.
	///@param DIM_Y			--integer controlling how much memory to allocate.
	void initArray(int x, int y)
	{
			cudaMalloc( (void**) &print, x*y*sizeof(float));     ///temporary
	}
	///Deinit function that frees the memery used and releases the texture resource
	///back to OpenGL.
	void cleanUP()
	{
			cudaFree(print);         ///temporary
	}  
	 __device__
	float templ(int x, int max_x)
	{
	if(x < max_x/6 || x > max_x*5/6 || (x > max_x*2/6 && x < max_x*4/6))                                                                                                                             
        	{
        	        return 1.0;
        	}else{
        	        return 0.0;
        	}
	}
		
	///Find the difference of the given set of samples and the template
	///using cuda acceleration.
	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
	///					  to the data.
	///@param float* result			--a pointer to the memory that stores the result.
	__global__
	//void get_diff (float *result)
	void get_diff (cudaTextureObject_t texIn, float *print, int dx)
	{       
		int x   = threadIdx.x + blockIdx.x * blockDim.x;
		int y   = threadIdx.y + blockIdx.y * blockDim.y;
		int idx = y*dx+x;
	//	int idx = y*16+x;
		float valIn             = tex2D<unsigned char>(texIn, x, y);
		float templa		= templ(x, 32)*255.0;
		//print[idx]              = abs(valIn-templa);             ///temporary
		print[idx]		= abs(valIn);
		//print[idx]              = abs(templa);             ///temporary
	}
	///Find the difference of the given set of samples and the template
	///using cuda acceleration.
	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
	///					  to the data.
	///@param float* result			--a pointer to the memory that stores the result.
	__global__
	//void get_diff (float *result)
	void get_diff2 (cudaTextureObject_t texIn, float *print, int dx)
	{       
		int x   = threadIdx.x + blockIdx.x * blockDim.x;
		int y   = threadIdx.y + blockIdx.y * blockDim.y;
		int idx = y*dx+x;
	//	int idx = y*16+x;
		float valIn             = tex2D<unsigned char>(texIn, x, y);
		print[idx]              = abs(valIn);             ///temporary
	}
	void test(cudaTextureObject_t tObj, int x, int y, std::string nam)
	{
		//Bind the Texture in GL and allow access to cuda.
		//initialize the return arrays.
		initArray(x,y);
		dim3 numBlocks(1, y);
		dim3 threadsPerBlock(x, 1);
		int max_threads = stim::maxThreadsPerBlock();
		//dim3 threads(max_threads, 1);
		//dim3 blocks(x / threads.x + 1, y);	
		//dim3 numBlocks(2, 2);
		//dim3 threadsPerBlock(8, 108);
//		get_diff <<< blocks, threads >>> (tx.getTexture(), print);
		get_diff <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
		cudaDeviceSynchronize();
		stringstream name;      //for debugging
		name << nam.c_str();
		stim::gpu2image<float>(print, name.str(),x,y,0,255);
	  
		cleanUP();
	}
	void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter)
	{
		//Bind the Texture in GL and allow access to cuda.
		//initialize the return arrays.
		initArray(x,y);
		dim3 numBlocks(1, y);
		dim3 threadsPerBlock(x, 1);
		int max_threads = stim::maxThreadsPerBlock();
		//dim3 threads(max_threads, 1);
		//dim3 blocks(x / threads.x + 1, y);	
		//dim3 numBlocks(2, 2);
		//dim3 threadsPerBlock(8, 108);
//		get_diff <<< blocks, threads >>> (tx.getTexture(), print);
		get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
		cudaDeviceSynchronize();
		stringstream name;      //for debugging
		name << nam.c_str();
		stim::gpu2image<float>(print, name.str(),x,y,0,255);
	  
		cleanUP();
	}