Blame view

tira/cuda/testKernel.cuh 3.95 KB
ce6381d7   David Mayerich   updating to TIRA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  #include <assert.h>
  #include <cuda.h>
  #include <cuda_runtime.h>
  #include <stdio.h>
  #include <stim/visualization/colormap.h>
  #include <sstream>
  #include <stim/math/vector.h>
  #include <stim/cuda/cudatools/devices.h>
  #include <stim/cuda/cudatools/threads.h>
  #include <stim/cuda/cuda_texture.cuh>
  	
  	float* print;
  	
  	///Initialization function, allocates the memory and passes the necessary
  	///handles from OpenGL and Cuda.
  	///@param DIM_Y			--integer controlling how much memory to allocate.
  	void initArray(int x, int y)
  	{
  			cudaMalloc( (void**) &print, x*y*sizeof(float));     ///temporary
  	}
  
  	///Deinit function that frees the memery used and releases the texture resource
  	///back to OpenGL.
  	void cleanUP()
  	{
  			cudaFree(print);         ///temporary
  	}  
  
  	 __device__
  	float templ(int x, int max_x)
  	{
  	if(x < max_x/6 || x > max_x*5/6 || (x > max_x*2/6 && x < max_x*4/6))                                                                                                                             
          	{
          	        return 1.0;
          	}else{
          	        return 0.0;
          	}
  	}
  		
  	///Find the difference of the given set of samples and the template
  	///using cuda acceleration.
  	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
  	///					  to the data.
  	///@param float* result			--a pointer to the memory that stores the result.
  	__global__
  	//void get_diff (float *result)
  	void get_diff (cudaTextureObject_t texIn, float *print, int dx)
  	{       
  		int x   = threadIdx.x + blockIdx.x * blockDim.x;
  		int y   = threadIdx.y + blockIdx.y * blockDim.y;
  		int idx = y*dx+x;
  	//	int idx = y*16+x;
  
  		float valIn             = tex2D<unsigned char>(texIn, x, y);
  		float templa		= templ(x, 32)*255.0;
  		//print[idx]              = abs(valIn-templa);             ///temporary
  		print[idx]		= abs(valIn);
  		//print[idx]              = abs(templa);             ///temporary
  
  	}
  
  	///Find the difference of the given set of samples and the template
  	///using cuda acceleration.
  	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
  	///					  to the data.
  	///@param float* result			--a pointer to the memory that stores the result.
  	__global__
  	//void get_diff (float *result)
  	void get_diff2 (cudaTextureObject_t texIn, float *print, int dx)
  	{       
  		int x   = threadIdx.x + blockIdx.x * blockDim.x;
  		int y   = threadIdx.y + blockIdx.y * blockDim.y;
  		int idx = y*dx+x;
  	//	int idx = y*16+x;
  
  		float valIn             = tex2D<unsigned char>(texIn, x, y);
  		print[idx]              = abs(valIn);             ///temporary
  
  	}
  
  	void test(cudaTextureObject_t tObj, int x, int y, std::string nam)
  	{
  
  		//Bind the Texture in GL and allow access to cuda.
  
  		//initialize the return arrays.
  
  		initArray(x,y);
  		dim3 numBlocks(1, y);
  		dim3 threadsPerBlock(x, 1);
  		int max_threads = stim::maxThreadsPerBlock();
  		//dim3 threads(max_threads, 1);
  		//dim3 blocks(x / threads.x + 1, y);	
  		//dim3 numBlocks(2, 2);
  		//dim3 threadsPerBlock(8, 108);
  
  
  //		get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  		get_diff <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
  
  		cudaDeviceSynchronize();
  		stringstream name;      //for debugging
  		name << nam.c_str();
  		stim::gpu2image<float>(print, name.str(),x,y,0,255);
  	  
  		cleanUP();
  	}
  
  
  	void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter)
  	{
  
  		//Bind the Texture in GL and allow access to cuda.
  
  		//initialize the return arrays.
  
  		initArray(x,y);
  		dim3 numBlocks(1, y);
  		dim3 threadsPerBlock(x, 1);
  		int max_threads = stim::maxThreadsPerBlock();
  		//dim3 threads(max_threads, 1);
  		//dim3 blocks(x / threads.x + 1, y);	
  		//dim3 numBlocks(2, 2);
  		//dim3 threadsPerBlock(8, 108);
  
  
  //		get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  		get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
  
  		cudaDeviceSynchronize();
  		stringstream name;      //for debugging
  		name << nam.c_str();
  		stim::gpu2image<float>(print, name.str(),x,y,0,255);
  	  
  		cleanUP();
  	}