Blame view

stim/cuda/testKernel.cuh 3.95 KB
84eff8b1   Pavel Govyadinov   Merged only the n...
1
2
3
4
5
6
7
8
9
10
  #include <assert.h>
  #include <cuda.h>
  #include <cuda_runtime.h>
  #include <stdio.h>
  #include <stim/visualization/colormap.h>
  #include <sstream>
  #include <stim/math/vector.h>
  #include <stim/cuda/cudatools/devices.h>
  #include <stim/cuda/cudatools/threads.h>
  #include <stim/cuda/cuda_texture.cuh>
4166e973   Pavel Govyadinov   Added debug tags ...
11
  	
84eff8b1   Pavel Govyadinov   Merged only the n...
12
13
14
15
16
  	float* print;
  	
  	///Initialization function, allocates the memory and passes the necessary
  	///handles from OpenGL and Cuda.
  	///@param DIM_Y			--integer controlling how much memory to allocate.
e45b97ce   Pavel Govyadinov   safety commit, I ...
17
  	void initArray(int x, int y)
84eff8b1   Pavel Govyadinov   Merged only the n...
18
  	{
e45b97ce   Pavel Govyadinov   safety commit, I ...
19
  			cudaMalloc( (void**) &print, x*y*sizeof(float));     ///temporary
84eff8b1   Pavel Govyadinov   Merged only the n...
20
21
22
23
24
25
26
27
  	}
  
  	///Deinit function that frees the memery used and releases the texture resource
  	///back to OpenGL.
  	void cleanUP()
  	{
  			cudaFree(print);         ///temporary
  	}  
5de3a9c2   Pavel Govyadinov   CHECKPOINT: befo...
28
  
4166e973   Pavel Govyadinov   Added debug tags ...
29
30
31
32
33
34
35
36
37
38
  	 __device__
  	float templ(int x, int max_x)
  	{
  	if(x < max_x/6 || x > max_x*5/6 || (x > max_x*2/6 && x < max_x*4/6))                                                                                                                             
          	{
          	        return 1.0;
          	}else{
          	        return 0.0;
          	}
  	}
84eff8b1   Pavel Govyadinov   Merged only the n...
39
40
41
42
43
44
45
46
  		
  	///Find the difference of the given set of samples and the template
  	///using cuda acceleration.
  	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
  	///					  to the data.
  	///@param float* result			--a pointer to the memory that stores the result.
  	__global__
  	//void get_diff (float *result)
4166e973   Pavel Govyadinov   Added debug tags ...
47
  	void get_diff (cudaTextureObject_t texIn, float *print, int dx)
84eff8b1   Pavel Govyadinov   Merged only the n...
48
49
50
  	{       
  		int x   = threadIdx.x + blockIdx.x * blockDim.x;
  		int y   = threadIdx.y + blockIdx.y * blockDim.y;
4166e973   Pavel Govyadinov   Added debug tags ...
51
  		int idx = y*dx+x;
c0e09133   Pavel Govyadinov   STABLE: made temp...
52
  	//	int idx = y*16+x;
84eff8b1   Pavel Govyadinov   Merged only the n...
53
  
59781ee3   Pavel Govyadinov   fixed a stask bug...
54
  		float valIn             = tex2D<unsigned char>(texIn, x, y);
4166e973   Pavel Govyadinov   Added debug tags ...
55
  		float templa		= templ(x, 32)*255.0;
2eefb035   Pavel Govyadinov   added debugging c...
56
57
  		//print[idx]              = abs(valIn-templa);             ///temporary
  		print[idx]		= abs(valIn);
e45b97ce   Pavel Govyadinov   safety commit, I ...
58
  		//print[idx]              = abs(templa);             ///temporary
84eff8b1   Pavel Govyadinov   Merged only the n...
59
60
61
  
  	}
  
2eefb035   Pavel Govyadinov   added debugging c...
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
  	///Find the difference of the given set of samples and the template
  	///using cuda acceleration.
  	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
  	///					  to the data.
  	///@param float* result			--a pointer to the memory that stores the result.
  	__global__
  	//void get_diff (float *result)
  	void get_diff2 (cudaTextureObject_t texIn, float *print, int dx)
  	{       
  		int x   = threadIdx.x + blockIdx.x * blockDim.x;
  		int y   = threadIdx.y + blockIdx.y * blockDim.y;
  		int idx = y*dx+x;
  	//	int idx = y*16+x;
  
  		float valIn             = tex2D<unsigned char>(texIn, x, y);
  		print[idx]              = abs(valIn);             ///temporary
  
  	}
  
4166e973   Pavel Govyadinov   Added debug tags ...
81
  	void test(cudaTextureObject_t tObj, int x, int y, std::string nam)
84eff8b1   Pavel Govyadinov   Merged only the n...
82
83
84
  	{
  
  		//Bind the Texture in GL and allow access to cuda.
84eff8b1   Pavel Govyadinov   Merged only the n...
85
86
87
  
  		//initialize the return arrays.
  
e45b97ce   Pavel Govyadinov   safety commit, I ...
88
89
90
  		initArray(x,y);
  		dim3 numBlocks(1, y);
  		dim3 threadsPerBlock(x, 1);
84eff8b1   Pavel Govyadinov   Merged only the n...
91
  		int max_threads = stim::maxThreadsPerBlock();
1306fd96   Pavel Govyadinov   minor bug fixes i...
92
93
  		//dim3 threads(max_threads, 1);
  		//dim3 blocks(x / threads.x + 1, y);	
1306fd96   Pavel Govyadinov   minor bug fixes i...
94
95
  		//dim3 numBlocks(2, 2);
  		//dim3 threadsPerBlock(8, 108);
84eff8b1   Pavel Govyadinov   Merged only the n...
96
97
98
  
  
  //		get_diff <<< blocks, threads >>> (tx.getTexture(), print);
4166e973   Pavel Govyadinov   Added debug tags ...
99
  		get_diff <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
c0e09133   Pavel Govyadinov   STABLE: made temp...
100
101
102
103
  
  		cudaDeviceSynchronize();
  		stringstream name;      //for debugging
  		name << nam.c_str();
4166e973   Pavel Govyadinov   Added debug tags ...
104
  		stim::gpu2image<float>(print, name.str(),x,y,0,255);
c0e09133   Pavel Govyadinov   STABLE: made temp...
105
  	  
c0e09133   Pavel Govyadinov   STABLE: made temp...
106
107
108
  		cleanUP();
  	}
  
2eefb035   Pavel Govyadinov   added debugging c...
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  
  	void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter)
  	{
  
  		//Bind the Texture in GL and allow access to cuda.
  
  		//initialize the return arrays.
  
  		initArray(x,y);
  		dim3 numBlocks(1, y);
  		dim3 threadsPerBlock(x, 1);
  		int max_threads = stim::maxThreadsPerBlock();
  		//dim3 threads(max_threads, 1);
  		//dim3 blocks(x / threads.x + 1, y);	
  		//dim3 numBlocks(2, 2);
  		//dim3 threadsPerBlock(8, 108);
  
  
  //		get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  		get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
  
  		cudaDeviceSynchronize();
  		stringstream name;      //for debugging
  		name << nam.c_str();
  		stim::gpu2image<float>(print, name.str(),x,y,0,255);
  	  
  		cleanUP();
  	}