Blame view

stim/iVote/ivote2.cuh 7.33 KB
3f2d0f9f   Laila Saadatifard   changing the ivot...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
  #ifndef STIM_IVOTE2_CUH
  #define STIM_IVOTE2_CUH
  
  #include <iostream>
  #include <fstream>
  #include <stim/cuda/cudatools/error.h>
  #include <stim/cuda/templates/gradient.cuh>
  #include <stim/cuda/arraymath.cuh>
  #include <stim/iVote/ivote2/ivote2.cuh>
  #include <stim/math/constants.h>
  #include <stim/math/vector.h>
  #include <stim/visualization/colormap.h>
  
  namespace stim {
  
  	// this function precomputes the atan2 values
  	template<typename T>
  	void atan_2(T* cpuTable, unsigned int rmax) {
  		int xsize = 2 * rmax + 1;						//initialize the width and height of the window which atan2 are computed in.
  		int ysize = 2 * rmax + 1;
  		int yi = rmax;									// assign the center coordinates of the atan2 window to yi and xi
  		int xi = rmax;
  		for (int xt = 0; xt < xsize; xt++) {			//for each element in the atan2 table
  			for (int yt = 0; yt < ysize; yt++) {
  				int id = yt * xsize + xt;				//convert the current 2D coordinates to 1D
  				int xd = xi - xt;						// calculate the distance between the pixel and the center of the atan2 window
  				int yd = yi - yt;
  				T atan_2d = atan2((T)yd, (T)xd);	// calculate the angle between the pixel and the center of the atan2 window and store the result.
  				cpuTable[id] = atan_2d;
  			}
  		}
  	}
  
  	//this kernel invert the 2D image
  	template<typename T>
  	__global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
  		// calculate the 2D coordinates for this current thread.
  		size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
  		size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  
  		if (xi >= x || yi >= y) return;
  		size_t i = yi * x + xi;					// convert 2D coordinates to 1D
  		gpuI[i] = 255 - gpuI[i];				//invert the pixel intensity
  	}
  
  
  
  	//this function calculate the threshold using OTSU method
  	template<typename T>
  	T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
  		T Imax = pts[0];				//initialize the maximum value to the first one
  		T Imin = pts[0];				//initialize the maximum value to the first on
  
  		for (size_t n = 0; n < pixels; n++) {		//for every value
  			if (pts[n] > Imax) {			//if the value is higher than the current max
  				Imax = pts[n];
  			}
  		}
  		for (size_t n = 0; n< pixels; n++) {		//for every value
  			if (pts[n] < Imin) {			//if the value is higher than the current max
  				Imin = pts[n];
  			}
  		}
  
  		T th_step = ((Imax - Imin) / th_num);
  		vector<T> var_b;
  		for (unsigned int t0 = 0; t0 < th_num; t0++) {
  			T th = t0 * th_step + Imin;
  			unsigned int n_b(0), n_o(0);		//these variables save the number of elements that are below and over the threshold
  			T m_b(0), m_o(0);				//these variables save the mean value for each cluster
  			for (unsigned int idx = 0; idx < pixels; idx++) {
  				if (pts[idx] <= th) {
  					m_b += pts[idx];
  					n_b += 1;
  				}
  				else {
  					m_o += pts[idx];
  					n_o += 1;
  				}
  			}
  
  			m_b = m_b / n_b;		//calculate the mean value for the below threshold cluster
  			m_o = m_o / n_o;		//calculate the mean value for the over threshold cluster
  
  			var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
  		}
  
  		vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end());	//finding maximum elements in the vector
  		size_t th_idx = std::distance(var_b.begin(), max_var);
  		T threshold = Imin + (T)(th_idx * th_step);
  		return threshold;
  	}
  
  	//this function performs the 2D iterative voting algorithm on the image stored in the gpu 
  	template<typename T>
  	void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
  					int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
  
  		size_t pixels = x * y;				//compute the size of input image
  		//
  		if (invert) {						//if inversion is required call the kernel to invert the image
  			unsigned int max_threads = stim::maxThreadsPerBlock();
  			dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads));
  			dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
  			cuda_invert << <blocks, threads >> > (gpuI, x, y);
  		}
  		//
  		size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T));				// create the atan2 table
  		T* cpuTable = (T*)malloc(table_bytes);											//assign memory on the cpu for atan2 table
  		atan_2<T>(cpuTable, rmax);														//call the function to precompute the atan2 table
  		T* gpuTable;  HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
  		HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice));	//copy atan2 table to the gpu
  
  		size_t bytes = pixels* sizeof(T);													//calculate the bytes of the input
  		float dphi = phi / iter;															//change in phi for each iteration
  
  		float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2));									//allocate space to store the 2D gradient
  		float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes));										//allocate space to store the vote image
  
  		stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);			//calculate the 2D gradient
  		//if (invert)  stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI);
  		//else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
  		stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);		//convert cartesian coordinate of gradient to the polar
  
  		for (int i = 0; i < iter; i++) {														//for each iteration
  			cudaMemset(gpuVote, 0, bytes);													//reset the vote image to 0
  			stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);		//perform voting
  			stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);	//update the voter directions
  			phi = phi - dphi;																//decrement phi
  		}
  		stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y);				//calculate the local maxima
  
  		T* pts = (T*)malloc(bytes);													//allocate memory on the cpu to store the output of iterative voting
  		HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost));			//copy the output from gpu to the cpu memory
  		
  		T threshold;
  		if (t == 0) threshold = stim::th_otsu<T>(pts, pixels);	//if threshold value is not set call the function to compute the threshold
  		else threshold = t;
  		
  		std::ofstream output;		//save the thresholded detected seeds in a text file
  		output.open(outname_txt);
  		output << "X" << " " << "Y" << " " << "threshold" << "\n";
  		size_t ind;
  		for (size_t ix = 0; ix < x; ix++) {
  			for (size_t iy = 0; iy < y; iy++) {
  				ind = iy * x + ix;
  				if (pts[ind] > threshold) {
  					output << ix << " " << iy << " " << pts[ind] << "\n";
  					pts[ind] = 1;
  				}
  				else pts[ind] = 0;
  			}
  		}
  		output.close();
  
  		HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice));		//copy the points to the gpu
  		stim::cpu2image(pts, outname_img, x, y); //output the image
  		
  	}
  
  
  	template<typename T>
  	void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
  					int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
  		size_t bytes = x*y * sizeof(T);
  		T* gpuI;						//allocate space on the gpu to save the input image
  		HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
  		HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice));		//copy the image to the gpu
  		stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn);				//call the gpu version of the ivote
  		HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost));		//copy the output to the cpu
  	}
  }
  #endif