Blame view

stim/iVote/ivote2.cuh 6.74 KB
3f2d0f9f   Laila Saadatifard   changing the ivot...
1
2
3
4
5
6
7
8
  #ifndef STIM_IVOTE2_CUH
  #define STIM_IVOTE2_CUH
  
  #include <iostream>
  #include <fstream>
  #include <stim/cuda/cudatools/error.h>
  #include <stim/cuda/templates/gradient.cuh>
  #include <stim/cuda/arraymath.cuh>
6316fca0   Laila Saadatifard   fix the 'unresolv...
9
10
  #include <stim/iVote/ivote2/iter_vote2.cuh>
  #include <stim/iVote/ivote2/local_max.cuh>
3f2d0f9f   Laila Saadatifard   changing the ivot...
11
12
13
14
  #include <stim/math/constants.h>
  #include <stim/math/vector.h>
  #include <stim/visualization/colormap.h>
  
3f2d0f9f   Laila Saadatifard   changing the ivot...
15
  
6316fca0   Laila Saadatifard   fix the 'unresolv...
16
  namespace stim {
3f2d0f9f   Laila Saadatifard   changing the ivot...
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  	// this function precomputes the atan2 values
  	template<typename T>
  	void atan_2(T* cpuTable, unsigned int rmax) {
  		int xsize = 2 * rmax + 1;						//initialize the width and height of the window which atan2 are computed in.
  		int ysize = 2 * rmax + 1;
  		int yi = rmax;									// assign the center coordinates of the atan2 window to yi and xi
  		int xi = rmax;
  		for (int xt = 0; xt < xsize; xt++) {			//for each element in the atan2 table
  			for (int yt = 0; yt < ysize; yt++) {
  				int id = yt * xsize + xt;				//convert the current 2D coordinates to 1D
  				int xd = xi - xt;						// calculate the distance between the pixel and the center of the atan2 window
  				int yd = yi - yt;
  				T atan_2d = atan2((T)yd, (T)xd);	// calculate the angle between the pixel and the center of the atan2 window and store the result.
  				cpuTable[id] = atan_2d;
  			}
  		}
  	}
  
  	//this kernel invert the 2D image
  	template<typename T>
  	__global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
  		// calculate the 2D coordinates for this current thread.
  		size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
  		size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  
  		if (xi >= x || yi >= y) return;
  		size_t i = yi * x + xi;					// convert 2D coordinates to 1D
  		gpuI[i] = 255 - gpuI[i];				//invert the pixel intensity
  	}
  
  
  
  	//this function calculate the threshold using OTSU method
  	template<typename T>
  	T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
  		T Imax = pts[0];				//initialize the maximum value to the first one
  		T Imin = pts[0];				//initialize the maximum value to the first on
  
  		for (size_t n = 0; n < pixels; n++) {		//for every value
  			if (pts[n] > Imax) {			//if the value is higher than the current max
  				Imax = pts[n];
  			}
  		}
  		for (size_t n = 0; n< pixels; n++) {		//for every value
  			if (pts[n] < Imin) {			//if the value is higher than the current max
  				Imin = pts[n];
  			}
  		}
  
  		T th_step = ((Imax - Imin) / th_num);
  		vector<T> var_b;
  		for (unsigned int t0 = 0; t0 < th_num; t0++) {
  			T th = t0 * th_step + Imin;
  			unsigned int n_b(0), n_o(0);		//these variables save the number of elements that are below and over the threshold
  			T m_b(0), m_o(0);				//these variables save the mean value for each cluster
  			for (unsigned int idx = 0; idx < pixels; idx++) {
  				if (pts[idx] <= th) {
  					m_b += pts[idx];
  					n_b += 1;
  				}
  				else {
  					m_o += pts[idx];
  					n_o += 1;
  				}
  			}
  
  			m_b = m_b / n_b;		//calculate the mean value for the below threshold cluster
  			m_o = m_o / n_o;		//calculate the mean value for the over threshold cluster
  
  			var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
  		}
  
  		vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end());	//finding maximum elements in the vector
  		size_t th_idx = std::distance(var_b.begin(), max_var);
  		T threshold = Imin + (T)(th_idx * th_step);
  		return threshold;
  	}
  
  	//this function performs the 2D iterative voting algorithm on the image stored in the gpu 
  	template<typename T>
f861ad6c   Laila Saadatifard   do not threshold ...
97
  	void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {
3f2d0f9f   Laila Saadatifard   changing the ivot...
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  
  		size_t pixels = x * y;				//compute the size of input image
  		//
  		if (invert) {						//if inversion is required call the kernel to invert the image
  			unsigned int max_threads = stim::maxThreadsPerBlock();
  			dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads));
  			dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
  			cuda_invert << <blocks, threads >> > (gpuI, x, y);
  		}
  		//
  		size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T));				// create the atan2 table
  		T* cpuTable = (T*)malloc(table_bytes);											//assign memory on the cpu for atan2 table
  		atan_2<T>(cpuTable, rmax);														//call the function to precompute the atan2 table
  		T* gpuTable;  HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
  		HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice));	//copy atan2 table to the gpu
  
  		size_t bytes = pixels* sizeof(T);													//calculate the bytes of the input
  		float dphi = phi / iter;															//change in phi for each iteration
  
  		float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2));									//allocate space to store the 2D gradient
  		float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes));										//allocate space to store the vote image
  
  		stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);			//calculate the 2D gradient
6316fca0   Laila Saadatifard   fix the 'unresolv...
121
  		stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);					//convert cartesian coordinate of gradient to the polar
3f2d0f9f   Laila Saadatifard   changing the ivot...
122
123
124
  
  		for (int i = 0; i < iter; i++) {														//for each iteration
  			cudaMemset(gpuVote, 0, bytes);													//reset the vote image to 0
6316fca0   Laila Saadatifard   fix the 'unresolv...
125
126
  			stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug);		//perform voting
  			stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug);	//update the voter directions
3f2d0f9f   Laila Saadatifard   changing the ivot...
127
128
129
130
  			phi = phi - dphi;																//decrement phi
  		}
  		stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y);				//calculate the local maxima
  
f861ad6c   Laila Saadatifard   do not threshold ...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
  		if (t > 0) {
  			T* pts = (T*)malloc(bytes);													//allocate memory on the cpu to store the output of iterative voting
  			HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost));			//copy the output from gpu to the cpu memory
  
  			T threshold;
  			threshold = t;
  
  			size_t ind;
  			for (size_t ix = 0; ix < x; ix++) {
  				for (size_t iy = 0; iy < y; iy++) {
  					ind = iy * x + ix;
  					if (pts[ind] > threshold) {
  						pts[ind] = 1;
  					}
  					else pts[ind] = 0;
3f2d0f9f   Laila Saadatifard   changing the ivot...
146
  				}
3f2d0f9f   Laila Saadatifard   changing the ivot...
147
  			}
f861ad6c   Laila Saadatifard   do not threshold ...
148
  			HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice));		//copy the points to the gpu
3f2d0f9f   Laila Saadatifard   changing the ivot...
149
  		}
f861ad6c   Laila Saadatifard   do not threshold ...
150
  				
3f2d0f9f   Laila Saadatifard   changing the ivot...
151
152
153
154
  	}
  
  
  	template<typename T>
f861ad6c   Laila Saadatifard   do not threshold ...
155
  	void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {
3f2d0f9f   Laila Saadatifard   changing the ivot...
156
157
158
159
  		size_t bytes = x*y * sizeof(T);
  		T* gpuI;						//allocate space on the gpu to save the input image
  		HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
  		HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice));		//copy the image to the gpu
f861ad6c   Laila Saadatifard   do not threshold ...
160
  		stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, iter, phi, conn, debug);				//call the gpu version of the ivote
3f2d0f9f   Laila Saadatifard   changing the ivot...
161
162
163
164
  		HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost));		//copy the output to the cpu
  	}
  }
  #endif