Commit 9d087c90863fb562c9838d25f73620aa0b84027e
Merge branch 'ivote2' into 'master'
changing the ivote directory from stim/cuda/ to stim/ and add a cuda header file… … to include ivote2 function See merge request !33
Showing
17 changed files
with
179 additions
and
229 deletions
Show diff stats
stim/cuda/ivote.cuh deleted
1 | -#ifndef STIM_CUDA_IVOTE_H | |
2 | -#define STIM_CUDA_IVOTE_H | |
3 | - | |
4 | -#include <stim/cuda/ivote/down_sample.cuh> | |
5 | -#include <stim/cuda/ivote/local_max.cuh> | |
6 | -#include <stim/cuda/ivote/update_dir.cuh> | |
7 | -#include <stim/cuda/ivote/vote.cuh> | |
8 | - | |
9 | -namespace stim{ | |
10 | - namespace cuda{ | |
11 | - | |
12 | - } | |
13 | -} | |
14 | - | |
15 | - | |
16 | - | |
17 | -#endif | |
18 | 0 | \ No newline at end of file |
stim/cuda/ivote/down_sample.cuh deleted
1 | -#ifndef STIM_CUDA_DOWN_SAMPLE_H | |
2 | -#define STIM_CUDA_DOWN_SAMPLE_H | |
3 | - | |
4 | -#include <iostream> | |
5 | -#include <cuda.h> | |
6 | -#include <stim/cuda/cudatools.h> | |
7 | -#include <stim/cuda/templates/gaussian_blur.cuh> | |
8 | - | |
9 | -namespace stim{ | |
10 | - namespace cuda{ | |
11 | - | |
12 | - template<typename T> | |
13 | - __global__ void down_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){ | |
14 | - | |
15 | - unsigned int sigma_ds = 1/resize; | |
16 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | |
17 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | |
18 | - | |
19 | - | |
20 | - // calculate the 2D coordinates for this current thread. | |
21 | - int xi = blockIdx.x * blockDim.x + threadIdx.x; | |
22 | - int yi = blockIdx.y; | |
23 | - // convert 2D coordinates to 1D | |
24 | - int i = yi * x_ds + xi; | |
25 | - | |
26 | - if(xi< x_ds && yi< y_ds){ | |
27 | - | |
28 | - int x_org = xi * sigma_ds ; | |
29 | - int y_org = yi * sigma_ds ; | |
30 | - int i_org = y_org * x + x_org; | |
31 | - gpuI[i] = gpuI0[i_org]; | |
32 | - } | |
33 | - | |
34 | - } | |
35 | - | |
36 | - | |
37 | - /// Applies a Gaussian blur to a 2D image stored on the GPU | |
38 | - template<typename T> | |
39 | - void gpu_down_sample(T* gpuI, T* gpuI0, T resize, size_t x, size_t y){ | |
40 | - | |
41 | - | |
42 | - unsigned int sigma_ds = (unsigned int)(1.0f/resize); | |
43 | - size_t x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | |
44 | - size_t y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | |
45 | - | |
46 | - //get the number of pixels in the image | |
47 | -// unsigned int pixels_ds = x_ds * y_ds; | |
48 | - | |
49 | - unsigned int max_threads = stim::maxThreadsPerBlock(); | |
50 | - dim3 threads(max_threads, 1); | |
51 | - dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds); | |
52 | - | |
53 | - stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y); | |
54 | - | |
55 | - //resample the image | |
56 | - down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y); | |
57 | - | |
58 | - } | |
59 | - | |
60 | - /// Applies a Gaussian blur to a 2D image stored on the CPU | |
61 | - template<typename T> | |
62 | - void cpu_down_sample(T* re_img, T* image, T resize, unsigned int x, unsigned int y){ | |
63 | - | |
64 | - //get the number of pixels in the image | |
65 | - unsigned int pixels = x * y; | |
66 | - unsigned int bytes = sizeof(T) * pixels; | |
67 | - | |
68 | - unsigned int sigma_ds = 1/resize; | |
69 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | |
70 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | |
71 | - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds; | |
72 | - | |
73 | - | |
74 | - | |
75 | - //allocate space on the GPU for the original image | |
76 | - T* gpuI0; | |
77 | - cudaMalloc(&gpuI0, bytes); | |
78 | - | |
79 | - | |
80 | - //copy the image data to the GPU | |
81 | - cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice); | |
82 | - | |
83 | - //allocate space on the GPU for the down sampled image | |
84 | - T* gpuI; | |
85 | - cudaMalloc(&gpuI, bytes_ds); | |
86 | - | |
87 | - //run the GPU-based version of the algorithm | |
88 | - gpu_down_sample<T>(gpuI, gpuI0, resize, x, y); | |
89 | - | |
90 | - //copy the image data to the GPU | |
91 | - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice); | |
92 | - | |
93 | - cudaFree(gpuI0); | |
94 | - cudeFree(gpuI); | |
95 | - } | |
96 | - | |
97 | - } | |
98 | -} | |
99 | - | |
100 | -#endif |
stim/cuda/ivote/re_sample.cuh deleted
1 | -#ifndef STIM_CUDA_RE_SAMPLE_H | |
2 | -#define STIM_CUDA_RE_SAMPLE_H | |
3 | - | |
4 | -#include <iostream> | |
5 | -#include <cuda.h> | |
6 | -#include <stim/cuda/cudatools.h> | |
7 | -#include <stim/cuda/templates/gaussian_blur.cuh> | |
8 | - | |
9 | -namespace stim{ | |
10 | - namespace cuda{ | |
11 | - | |
12 | - template<typename T> | |
13 | - __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){ | |
14 | - | |
15 | - unsigned int sigma_ds = 1/resize; | |
16 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | |
17 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | |
18 | - | |
19 | - | |
20 | - // calculate the 2D coordinates for this current thread. | |
21 | - int xi = blockIdx.x * blockDim.x + threadIdx.x; | |
22 | - int yi = blockIdx.y; | |
23 | - // convert 2D coordinates to 1D | |
24 | - int i = yi * x + xi; | |
25 | - | |
26 | - if(xi< x && yi< y){ | |
27 | - if(xi%sigma_ds==0){ | |
28 | - if(yi%sigma_ds==0){ | |
29 | - gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds]; | |
30 | - } | |
31 | - } | |
32 | - else gpuI[i] = 0; | |
33 | - | |
34 | - //int x_org = xi * sigma_ds ; | |
35 | - //int y_org = yi * sigma_ds ; | |
36 | - //int i_org = y_org * x + x_org; | |
37 | - //gpuI[i] = gpuI0[i_org]; | |
38 | - } | |
39 | - | |
40 | - } | |
41 | - | |
42 | - | |
43 | - /// Applies a Gaussian blur to a 2D image stored on the GPU | |
44 | - template<typename T> | |
45 | - void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){ | |
46 | - | |
47 | - | |
48 | - //unsigned int sigma_ds = 1/resize; | |
49 | - //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | |
50 | - //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | |
51 | - | |
52 | - //get the number of pixels in the image | |
53 | - //unsigned int pixels_ds = x_ds * y_ds; | |
54 | - | |
55 | - unsigned int max_threads = stim::maxThreadsPerBlock(); | |
56 | - dim3 threads(max_threads, 1); | |
57 | - dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y); | |
58 | - | |
59 | - //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y); | |
60 | - | |
61 | - //resample the image | |
62 | - cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y); | |
63 | - | |
64 | - } | |
65 | - | |
66 | - /// Applies a Gaussian blur to a 2D image stored on the CPU | |
67 | - template<typename T> | |
68 | - void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){ | |
69 | - | |
70 | - //get the number of pixels in the image | |
71 | - unsigned int pixels = x*y; | |
72 | - unsigned int bytes = sizeof(T) * pixels; | |
73 | - | |
74 | - unsigned int sigma_ds = 1/resize; | |
75 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | |
76 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | |
77 | - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds; | |
78 | - | |
79 | - | |
80 | - | |
81 | - //allocate space on the GPU for the original image | |
82 | - T* gpuI0; | |
83 | - cudaMalloc(&gpuI0, bytes_ds); | |
84 | - | |
85 | - | |
86 | - //copy the image data to the GPU | |
87 | - cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice); | |
88 | - | |
89 | - //allocate space on the GPU for the down sampled image | |
90 | - T* gpuI; | |
91 | - cudaMalloc(&gpuI, bytes); | |
92 | - | |
93 | - //run the GPU-based version of the algorithm | |
94 | - gpu_re_sample<T>(gpuI, gpuI0, resize, x, y); | |
95 | - | |
96 | - //copy the image data to the GPU | |
97 | - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice); | |
98 | - | |
99 | - cudaFree(gpuI0); | |
100 | - cudeFree(gpuI); | |
101 | - } | |
102 | - | |
103 | - } | |
104 | -} | |
105 | - | |
106 | -#endif | |
107 | 0 | \ No newline at end of file |
1 | +#ifndef STIM_IVOTE2_CUH | |
2 | +#define STIM_IVOTE2_CUH | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <fstream> | |
6 | +#include <stim/cuda/cudatools/error.h> | |
7 | +#include <stim/cuda/templates/gradient.cuh> | |
8 | +#include <stim/cuda/arraymath.cuh> | |
9 | +#include <stim/iVote/ivote2/ivote2.cuh> | |
10 | +#include <stim/math/constants.h> | |
11 | +#include <stim/math/vector.h> | |
12 | +#include <stim/visualization/colormap.h> | |
13 | + | |
14 | +namespace stim { | |
15 | + | |
16 | + // this function precomputes the atan2 values | |
17 | + template<typename T> | |
18 | + void atan_2(T* cpuTable, unsigned int rmax) { | |
19 | + int xsize = 2 * rmax + 1; //initialize the width and height of the window which atan2 are computed in. | |
20 | + int ysize = 2 * rmax + 1; | |
21 | + int yi = rmax; // assign the center coordinates of the atan2 window to yi and xi | |
22 | + int xi = rmax; | |
23 | + for (int xt = 0; xt < xsize; xt++) { //for each element in the atan2 table | |
24 | + for (int yt = 0; yt < ysize; yt++) { | |
25 | + int id = yt * xsize + xt; //convert the current 2D coordinates to 1D | |
26 | + int xd = xi - xt; // calculate the distance between the pixel and the center of the atan2 window | |
27 | + int yd = yi - yt; | |
28 | + T atan_2d = atan2((T)yd, (T)xd); // calculate the angle between the pixel and the center of the atan2 window and store the result. | |
29 | + cpuTable[id] = atan_2d; | |
30 | + } | |
31 | + } | |
32 | + } | |
33 | + | |
34 | + //this kernel invert the 2D image | |
35 | + template<typename T> | |
36 | + __global__ void cuda_invert(T* gpuI, size_t x, size_t y) { | |
37 | + // calculate the 2D coordinates for this current thread. | |
38 | + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; | |
39 | + size_t yi = blockIdx.y * blockDim.y + threadIdx.y; | |
40 | + | |
41 | + if (xi >= x || yi >= y) return; | |
42 | + size_t i = yi * x + xi; // convert 2D coordinates to 1D | |
43 | + gpuI[i] = 255 - gpuI[i]; //invert the pixel intensity | |
44 | + } | |
45 | + | |
46 | + | |
47 | + | |
48 | + //this function calculate the threshold using OTSU method | |
49 | + template<typename T> | |
50 | + T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) { | |
51 | + T Imax = pts[0]; //initialize the maximum value to the first one | |
52 | + T Imin = pts[0]; //initialize the maximum value to the first on | |
53 | + | |
54 | + for (size_t n = 0; n < pixels; n++) { //for every value | |
55 | + if (pts[n] > Imax) { //if the value is higher than the current max | |
56 | + Imax = pts[n]; | |
57 | + } | |
58 | + } | |
59 | + for (size_t n = 0; n< pixels; n++) { //for every value | |
60 | + if (pts[n] < Imin) { //if the value is higher than the current max | |
61 | + Imin = pts[n]; | |
62 | + } | |
63 | + } | |
64 | + | |
65 | + T th_step = ((Imax - Imin) / th_num); | |
66 | + vector<T> var_b; | |
67 | + for (unsigned int t0 = 0; t0 < th_num; t0++) { | |
68 | + T th = t0 * th_step + Imin; | |
69 | + unsigned int n_b(0), n_o(0); //these variables save the number of elements that are below and over the threshold | |
70 | + T m_b(0), m_o(0); //these variables save the mean value for each cluster | |
71 | + for (unsigned int idx = 0; idx < pixels; idx++) { | |
72 | + if (pts[idx] <= th) { | |
73 | + m_b += pts[idx]; | |
74 | + n_b += 1; | |
75 | + } | |
76 | + else { | |
77 | + m_o += pts[idx]; | |
78 | + n_o += 1; | |
79 | + } | |
80 | + } | |
81 | + | |
82 | + m_b = m_b / n_b; //calculate the mean value for the below threshold cluster | |
83 | + m_o = m_o / n_o; //calculate the mean value for the over threshold cluster | |
84 | + | |
85 | + var_b.push_back(n_b * n_o * pow((m_b - m_o), 2)); | |
86 | + } | |
87 | + | |
88 | + vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end()); //finding maximum elements in the vector | |
89 | + size_t th_idx = std::distance(var_b.begin(), max_var); | |
90 | + T threshold = Imin + (T)(th_idx * th_step); | |
91 | + return threshold; | |
92 | + } | |
93 | + | |
94 | + //this function performs the 2D iterative voting algorithm on the image stored in the gpu | |
95 | + template<typename T> | |
96 | + void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | |
97 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | |
98 | + | |
99 | + size_t pixels = x * y; //compute the size of input image | |
100 | + // | |
101 | + if (invert) { //if inversion is required call the kernel to invert the image | |
102 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | |
103 | + dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads)); | |
104 | + dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1); | |
105 | + cuda_invert << <blocks, threads >> > (gpuI, x, y); | |
106 | + } | |
107 | + // | |
108 | + size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T)); // create the atan2 table | |
109 | + T* cpuTable = (T*)malloc(table_bytes); //assign memory on the cpu for atan2 table | |
110 | + atan_2<T>(cpuTable, rmax); //call the function to precompute the atan2 table | |
111 | + T* gpuTable; HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes)); | |
112 | + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice)); //copy atan2 table to the gpu | |
113 | + | |
114 | + size_t bytes = pixels* sizeof(T); //calculate the bytes of the input | |
115 | + float dphi = phi / iter; //change in phi for each iteration | |
116 | + | |
117 | + float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2)); //allocate space to store the 2D gradient | |
118 | + float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image | |
119 | + | |
120 | + stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient | |
121 | + //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI); | |
122 | + //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); | |
123 | + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar | |
124 | + | |
125 | + for (int i = 0; i < iter; i++) { //for each iteration | |
126 | + cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0 | |
127 | + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting | |
128 | + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions | |
129 | + phi = phi - dphi; //decrement phi | |
130 | + } | |
131 | + stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima | |
132 | + | |
133 | + T* pts = (T*)malloc(bytes); //allocate memory on the cpu to store the output of iterative voting | |
134 | + HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output from gpu to the cpu memory | |
135 | + | |
136 | + T threshold; | |
137 | + if (t == 0) threshold = stim::th_otsu<T>(pts, pixels); //if threshold value is not set call the function to compute the threshold | |
138 | + else threshold = t; | |
139 | + | |
140 | + std::ofstream output; //save the thresholded detected seeds in a text file | |
141 | + output.open(outname_txt); | |
142 | + output << "X" << " " << "Y" << " " << "threshold" << "\n"; | |
143 | + size_t ind; | |
144 | + for (size_t ix = 0; ix < x; ix++) { | |
145 | + for (size_t iy = 0; iy < y; iy++) { | |
146 | + ind = iy * x + ix; | |
147 | + if (pts[ind] > threshold) { | |
148 | + output << ix << " " << iy << " " << pts[ind] << "\n"; | |
149 | + pts[ind] = 1; | |
150 | + } | |
151 | + else pts[ind] = 0; | |
152 | + } | |
153 | + } | |
154 | + output.close(); | |
155 | + | |
156 | + HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice)); //copy the points to the gpu | |
157 | + stim::cpu2image(pts, outname_img, x, y); //output the image | |
158 | + | |
159 | + } | |
160 | + | |
161 | + | |
162 | + template<typename T> | |
163 | + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | |
164 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | |
165 | + size_t bytes = x*y * sizeof(T); | |
166 | + T* gpuI; //allocate space on the gpu to save the input image | |
167 | + HANDLE_ERROR(cudaMalloc(&gpuI, bytes)); | |
168 | + HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu | |
169 | + stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote | |
170 | + HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu | |
171 | + } | |
172 | +} | |
173 | +#endif | |
0 | 174 | \ No newline at end of file | ... | ... |
stim/cuda/ivote_atomic_bb.cuh renamed to stim/iVote/ivote2/iter_vote2.cuh
1 | -#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H | |
2 | -#define STIM_CUDA_IVOTE_ATOMIC_BB_H | |
1 | +#ifndef STIM_CUDA_ITER_VOTE2_H | |
2 | +#define STIM_CUDA_ITER_VOTE2_H | |
3 | 3 | |
4 | 4 | extern bool DEBUG; |
5 | -#include <stim/cuda/ivote/down_sample.cuh> | |
6 | -#include <stim/cuda/ivote/local_max.cuh> | |
7 | -#include <stim/cuda/ivote/update_dir_bb.cuh> | |
8 | -#include <stim/cuda/ivote/vote_atomic_bb.cuh> | |
5 | + | |
6 | +#include "local_max.cuh" | |
7 | +#include "update_dir_bb.cuh" | |
8 | +#include "vote_atomic_bb.cuh" | |
9 | 9 | |
10 | 10 | namespace stim{ |
11 | 11 | namespace cuda{ | ... | ... |
stim/cuda/ivote/local_max.cuh renamed to stim/iVote/ivote2/local_max.cuh
stim/cuda/ivote/update_dir.cuh renamed to stim/iVote/ivote2/update_dir.cuh
stim/cuda/ivote/update_dir_bb.cuh renamed to stim/iVote/ivote2/update_dir_bb.cuh
stim/cuda/ivote/update_dir_shared.cuh renamed to stim/iVote/ivote2/update_dir_shared.cuh
stim/cuda/ivote/update_dir_threshold_global.cuh renamed to stim/iVote/ivote2/update_dir_threshold_global.cuh
stim/cuda/ivote/vote.cuh renamed to stim/iVote/ivote2/vote.cuh
stim/cuda/ivote/vote_atomic.cuh renamed to stim/iVote/ivote2/vote_atomic.cuh
stim/cuda/ivote/vote_atomic_bb.cuh renamed to stim/iVote/ivote2/vote_atomic_bb.cuh
stim/cuda/ivote/vote_atomic_shared.cuh renamed to stim/iVote/ivote2/vote_atomic_shared.cuh
stim/cuda/ivote/vote_shared.cuh renamed to stim/iVote/ivote2/vote_shared.cuh
stim/cuda/ivote/vote_shared_32-32.cuh renamed to stim/iVote/ivote2/vote_shared_32-32.cuh
stim/cuda/ivote/vote_threshold_global.cuh renamed to stim/iVote/ivote2/vote_threshold_global.cuh