Blame view

stim/cuda/branch_detection.cuh 3.91 KB
84eff8b1   Pavel Govyadinov   Merged only the n...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  #include <iostream>
  #include <fstream>
  #include <cuda_runtime.h>
  #include <stim/math/vector.h>
  //#include <math.h>
  #include <stim/visualization/colormap.h>
  #include <stim/cuda/cuda_texture.cuh>
  #include <stim/cuda/templates/gradient.cuh>
  #include <stim/cuda/templates/gaussian_blur.cuh>
  #include <stim/cuda/arraymath.cuh>
  #include <stim/cuda/ivote.cuh>
  #include <stim/cuda/testKernel.cuh>
  typedef unsigned int uint;
  typedef unsigned int uchar;
  
  stim::cuda::cuda_texture t;	
  float*		gpuTable;
  float*		gpuGrad;
  float*		gpuVote;	
  float*		gpuI;
  float*		gpuCenters;
  
  void atan_2d(float* cpuTable, unsigned int rmax)
  {
  	//initialize the width and height of the window which atan2 are computed in.
  	int xsize = 2*rmax +1;
  	int ysize = 2*rmax +1;
  	
  	// assign the center coordinates of the atan2 window to yi and xi
  	int yi = rmax;
  	int xi = rmax;
  	
  
  	for (int xt = 0; xt < xsize; xt++){
  
  		for(int yt = 0; yt < ysize; yt++){
  
  			//convert the current 2D coordinates to 1D
  			int id = yt * xsize + xt;
  			// calculate the distance between the pixel and the center of the atan2 window
  			float xd = xi - xt;
  			float yd = yi - yt;
  
  			// calculate the angle between the pixel and the center of the atan2 window and store the result.
  			float atan_2d_vote = atan2(yd, xd);
  			cpuTable[id] = atan_2d_vote;
  		}
  	}
  
  }
  
  void initCuda(unsigned int bytes_table, unsigned int bytes_ds)
  {
  	HANDLE_ERROR(
  		cudaMalloc((void**) &gpuTable, bytes_table)
  		);
  	HANDLE_ERROR(
  		cudaMalloc((void**) &gpuI, bytes_ds)
  		);
  	HANDLE_ERROR(
  		cudaMalloc((void**) &gpuGrad,  bytes_ds*2)
  		);
  	HANDLE_ERROR(
  		cudaMalloc((void**) &gpuVote,  bytes_ds)
  		);
  	HANDLE_ERROR(
  		cudaMalloc((void**) &gpuCenters, bytes_ds)
  		);
  }
  
  void cleanCuda()
  {
  	HANDLE_ERROR(
  		cudaFree(gpuTable)
  	);
  	HANDLE_ERROR(
  		cudaFree(gpuGrad)
  	);
  	HANDLE_ERROR(
  		cudaFree(gpuVote)
  	);
  	HANDLE_ERROR(
  		cudaFree(gpuCenters)
  	);
  	HANDLE_ERROR(
  		cudaFree(gpuI)
  	);
  }
  
  std::vector< stim::vec<float> >
  find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  {
  	float 		phi	 	= 15.1*M_PI/180;
  	int		iter		= 5;
  	float 		dphi		= phi/iter;
  	float 		rmax 		= 10;
  	float		sigma		= 4;
  	unsigned int 	pixels 		= x * y;
  	unsigned int 	bytes  		= sizeof(float) * pixels;
  	unsigned int 	bytes_table	= sizeof(float) * (2*rmax + 1) * (2*rmax + 1);
  	unsigned int 	x_ds		= (x + (x % 1 == 0 ? 0:1));
  	unsigned int 	y_ds		= (y + (x % 1 == 0 ? 0:1));
  	unsigned int	bytes_ds	= sizeof(float) * x_ds * y_ds;
  	unsigned int	conn		= 5;
  	float		final_t		= 200.0;
  	float*		cpuTable	= (float*) malloc(bytes_table);
  	float*		cpuCenters	= (float*) malloc(bytes_ds);
  
  	stringstream name;
  
  
  
  
  	std::vector<stim::vec<float> >  output;
  	initCuda(bytes_table, bytes_ds); 
  
  	atan_2d(cpuTable, rmax);
  	cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
  
84eff8b1   Pavel Govyadinov   Merged only the n...
120
121
122
123
124
125
126
  
  	t.MapCudaTexture(texbufferID, texType);
  	cudaDeviceSynchronize();
  	stim::cuda::tex_gaussian_blur2<float>(
  		gpuI, sigma, x, y, t.getTexture(), t.getArray()
  		);
  	cudaDeviceSynchronize();
84eff8b1   Pavel Govyadinov   Merged only the n...
127
128
129
130
131
132
  
  
  	stim::cuda::gpu_gradient_2d<float>(
  		gpuGrad, gpuI, x, y
  		);
  	cudaDeviceSynchronize();
84eff8b1   Pavel Govyadinov   Merged only the n...
133
  	
59781ee3   Pavel Govyadinov   fixed a stask bug...
134
  	stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
84eff8b1   Pavel Govyadinov   Merged only the n...
135
  	cudaDeviceSynchronize();
84eff8b1   Pavel Govyadinov   Merged only the n...
136
137
138
139
140
  
  	cudaDeviceSynchronize();
  	for (int i = 0; i < iter; i++)
  	{
  		stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
84eff8b1   Pavel Govyadinov   Merged only the n...
141
  	cudaDeviceSynchronize();
84eff8b1   Pavel Govyadinov   Merged only the n...
142
143
  		stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  	cudaDeviceSynchronize();
84eff8b1   Pavel Govyadinov   Merged only the n...
144
145
146
147
148
  		phi = phi - dphi;
  	}
  	
  	cudaDeviceSynchronize();
  	stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
84eff8b1   Pavel Govyadinov   Merged only the n...
149
  	cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
84eff8b1   Pavel Govyadinov   Merged only the n...
150
151
152
153
  	for(int i = 0; i < pixels; i++)
  	{
  		int ix = (i % x);
  		int iy = (i / x);
1306fd96   Pavel Govyadinov   minor bug fixes i...
154
  		if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4))
84eff8b1   Pavel Govyadinov   Merged only the n...
155
  		{
84eff8b1   Pavel Govyadinov   Merged only the n...
156
157
158
159
160
161
  
  			float x_v = (float) ix;
  			float y_v = (float) iy;
  			output.push_back(stim::vec<float>((x_v/(float)x),
  							  (y_v/(float)y), 0.0));	
  
84eff8b1   Pavel Govyadinov   Merged only the n...
162
163
164
165
166
167
168
169
170
171
  		}
  	}
  
  
  	t.UnmapCudaTexture();
  	cleanCuda();
  	free(cpuTable);
  	free(cpuCenters);
  	return output;
  }