Commit 9d087c90863fb562c9838d25f73620aa0b84027e

Authored by David Mayerich
2 parents 3c2d1625 3f2d0f9f

Merge branch 'ivote2' into 'master'

changing the ivote directory from stim/cuda/ to stim/ and add a cuda header file…

… to include ivote2 function

See merge request !33
stim/cuda/ivote.cuh deleted
1 -#ifndef STIM_CUDA_IVOTE_H  
2 -#define STIM_CUDA_IVOTE_H  
3 -  
4 -#include <stim/cuda/ivote/down_sample.cuh>  
5 -#include <stim/cuda/ivote/local_max.cuh>  
6 -#include <stim/cuda/ivote/update_dir.cuh>  
7 -#include <stim/cuda/ivote/vote.cuh>  
8 -  
9 -namespace stim{  
10 - namespace cuda{  
11 -  
12 - }  
13 -}  
14 -  
15 -  
16 -  
17 -#endif  
18 \ No newline at end of file 0 \ No newline at end of file
stim/cuda/ivote/down_sample.cuh deleted
1 -#ifndef STIM_CUDA_DOWN_SAMPLE_H  
2 -#define STIM_CUDA_DOWN_SAMPLE_H  
3 -  
4 -#include <iostream>  
5 -#include <cuda.h>  
6 -#include <stim/cuda/cudatools.h>  
7 -#include <stim/cuda/templates/gaussian_blur.cuh>  
8 -  
9 -namespace stim{  
10 - namespace cuda{  
11 -  
12 - template<typename T>  
13 - __global__ void down_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){  
14 -  
15 - unsigned int sigma_ds = 1/resize;  
16 - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));  
17 - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));  
18 -  
19 -  
20 - // calculate the 2D coordinates for this current thread.  
21 - int xi = blockIdx.x * blockDim.x + threadIdx.x;  
22 - int yi = blockIdx.y;  
23 - // convert 2D coordinates to 1D  
24 - int i = yi * x_ds + xi;  
25 -  
26 - if(xi< x_ds && yi< y_ds){  
27 -  
28 - int x_org = xi * sigma_ds ;  
29 - int y_org = yi * sigma_ds ;  
30 - int i_org = y_org * x + x_org;  
31 - gpuI[i] = gpuI0[i_org];  
32 - }  
33 -  
34 - }  
35 -  
36 -  
37 - /// Applies a Gaussian blur to a 2D image stored on the GPU  
38 - template<typename T>  
39 - void gpu_down_sample(T* gpuI, T* gpuI0, T resize, size_t x, size_t y){  
40 -  
41 -  
42 - unsigned int sigma_ds = (unsigned int)(1.0f/resize);  
43 - size_t x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));  
44 - size_t y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));  
45 -  
46 - //get the number of pixels in the image  
47 -// unsigned int pixels_ds = x_ds * y_ds;  
48 -  
49 - unsigned int max_threads = stim::maxThreadsPerBlock();  
50 - dim3 threads(max_threads, 1);  
51 - dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds);  
52 -  
53 - stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);  
54 -  
55 - //resample the image  
56 - down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);  
57 -  
58 - }  
59 -  
60 - /// Applies a Gaussian blur to a 2D image stored on the CPU  
61 - template<typename T>  
62 - void cpu_down_sample(T* re_img, T* image, T resize, unsigned int x, unsigned int y){  
63 -  
64 - //get the number of pixels in the image  
65 - unsigned int pixels = x * y;  
66 - unsigned int bytes = sizeof(T) * pixels;  
67 -  
68 - unsigned int sigma_ds = 1/resize;  
69 - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));  
70 - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));  
71 - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;  
72 -  
73 -  
74 -  
75 - //allocate space on the GPU for the original image  
76 - T* gpuI0;  
77 - cudaMalloc(&gpuI0, bytes);  
78 -  
79 -  
80 - //copy the image data to the GPU  
81 - cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice);  
82 -  
83 - //allocate space on the GPU for the down sampled image  
84 - T* gpuI;  
85 - cudaMalloc(&gpuI, bytes_ds);  
86 -  
87 - //run the GPU-based version of the algorithm  
88 - gpu_down_sample<T>(gpuI, gpuI0, resize, x, y);  
89 -  
90 - //copy the image data to the GPU  
91 - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);  
92 -  
93 - cudaFree(gpuI0);  
94 - cudeFree(gpuI);  
95 - }  
96 -  
97 - }  
98 -}  
99 -  
100 -#endif  
stim/cuda/ivote/re_sample.cuh deleted
1 -#ifndef STIM_CUDA_RE_SAMPLE_H  
2 -#define STIM_CUDA_RE_SAMPLE_H  
3 -  
4 -#include <iostream>  
5 -#include <cuda.h>  
6 -#include <stim/cuda/cudatools.h>  
7 -#include <stim/cuda/templates/gaussian_blur.cuh>  
8 -  
9 -namespace stim{  
10 - namespace cuda{  
11 -  
12 - template<typename T>  
13 - __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){  
14 -  
15 - unsigned int sigma_ds = 1/resize;  
16 - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));  
17 - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));  
18 -  
19 -  
20 - // calculate the 2D coordinates for this current thread.  
21 - int xi = blockIdx.x * blockDim.x + threadIdx.x;  
22 - int yi = blockIdx.y;  
23 - // convert 2D coordinates to 1D  
24 - int i = yi * x + xi;  
25 -  
26 - if(xi< x && yi< y){  
27 - if(xi%sigma_ds==0){  
28 - if(yi%sigma_ds==0){  
29 - gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];  
30 - }  
31 - }  
32 - else gpuI[i] = 0;  
33 -  
34 - //int x_org = xi * sigma_ds ;  
35 - //int y_org = yi * sigma_ds ;  
36 - //int i_org = y_org * x + x_org;  
37 - //gpuI[i] = gpuI0[i_org];  
38 - }  
39 -  
40 - }  
41 -  
42 -  
43 - /// Applies a Gaussian blur to a 2D image stored on the GPU  
44 - template<typename T>  
45 - void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){  
46 -  
47 -  
48 - //unsigned int sigma_ds = 1/resize;  
49 - //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));  
50 - //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));  
51 -  
52 - //get the number of pixels in the image  
53 - //unsigned int pixels_ds = x_ds * y_ds;  
54 -  
55 - unsigned int max_threads = stim::maxThreadsPerBlock();  
56 - dim3 threads(max_threads, 1);  
57 - dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);  
58 -  
59 - //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);  
60 -  
61 - //resample the image  
62 - cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);  
63 -  
64 - }  
65 -  
66 - /// Applies a Gaussian blur to a 2D image stored on the CPU  
67 - template<typename T>  
68 - void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){  
69 -  
70 - //get the number of pixels in the image  
71 - unsigned int pixels = x*y;  
72 - unsigned int bytes = sizeof(T) * pixels;  
73 -  
74 - unsigned int sigma_ds = 1/resize;  
75 - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));  
76 - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));  
77 - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;  
78 -  
79 -  
80 -  
81 - //allocate space on the GPU for the original image  
82 - T* gpuI0;  
83 - cudaMalloc(&gpuI0, bytes_ds);  
84 -  
85 -  
86 - //copy the image data to the GPU  
87 - cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);  
88 -  
89 - //allocate space on the GPU for the down sampled image  
90 - T* gpuI;  
91 - cudaMalloc(&gpuI, bytes);  
92 -  
93 - //run the GPU-based version of the algorithm  
94 - gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);  
95 -  
96 - //copy the image data to the GPU  
97 - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);  
98 -  
99 - cudaFree(gpuI0);  
100 - cudeFree(gpuI);  
101 - }  
102 -  
103 - }  
104 -}  
105 -  
106 -#endif  
107 \ No newline at end of file 0 \ No newline at end of file
stim/iVote/ivote2.cuh 0 → 100644
  1 +#ifndef STIM_IVOTE2_CUH
  2 +#define STIM_IVOTE2_CUH
  3 +
  4 +#include <iostream>
  5 +#include <fstream>
  6 +#include <stim/cuda/cudatools/error.h>
  7 +#include <stim/cuda/templates/gradient.cuh>
  8 +#include <stim/cuda/arraymath.cuh>
  9 +#include <stim/iVote/ivote2/ivote2.cuh>
  10 +#include <stim/math/constants.h>
  11 +#include <stim/math/vector.h>
  12 +#include <stim/visualization/colormap.h>
  13 +
  14 +namespace stim {
  15 +
  16 + // this function precomputes the atan2 values
  17 + template<typename T>
  18 + void atan_2(T* cpuTable, unsigned int rmax) {
  19 + int xsize = 2 * rmax + 1; //initialize the width and height of the window which atan2 are computed in.
  20 + int ysize = 2 * rmax + 1;
  21 + int yi = rmax; // assign the center coordinates of the atan2 window to yi and xi
  22 + int xi = rmax;
  23 + for (int xt = 0; xt < xsize; xt++) { //for each element in the atan2 table
  24 + for (int yt = 0; yt < ysize; yt++) {
  25 + int id = yt * xsize + xt; //convert the current 2D coordinates to 1D
  26 + int xd = xi - xt; // calculate the distance between the pixel and the center of the atan2 window
  27 + int yd = yi - yt;
  28 + T atan_2d = atan2((T)yd, (T)xd); // calculate the angle between the pixel and the center of the atan2 window and store the result.
  29 + cpuTable[id] = atan_2d;
  30 + }
  31 + }
  32 + }
  33 +
  34 + //this kernel invert the 2D image
  35 + template<typename T>
  36 + __global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
  37 + // calculate the 2D coordinates for this current thread.
  38 + size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
  39 + size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  40 +
  41 + if (xi >= x || yi >= y) return;
  42 + size_t i = yi * x + xi; // convert 2D coordinates to 1D
  43 + gpuI[i] = 255 - gpuI[i]; //invert the pixel intensity
  44 + }
  45 +
  46 +
  47 +
  48 + //this function calculate the threshold using OTSU method
  49 + template<typename T>
  50 + T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
  51 + T Imax = pts[0]; //initialize the maximum value to the first one
  52 + T Imin = pts[0]; //initialize the maximum value to the first on
  53 +
  54 + for (size_t n = 0; n < pixels; n++) { //for every value
  55 + if (pts[n] > Imax) { //if the value is higher than the current max
  56 + Imax = pts[n];
  57 + }
  58 + }
  59 + for (size_t n = 0; n< pixels; n++) { //for every value
  60 + if (pts[n] < Imin) { //if the value is higher than the current max
  61 + Imin = pts[n];
  62 + }
  63 + }
  64 +
  65 + T th_step = ((Imax - Imin) / th_num);
  66 + vector<T> var_b;
  67 + for (unsigned int t0 = 0; t0 < th_num; t0++) {
  68 + T th = t0 * th_step + Imin;
  69 + unsigned int n_b(0), n_o(0); //these variables save the number of elements that are below and over the threshold
  70 + T m_b(0), m_o(0); //these variables save the mean value for each cluster
  71 + for (unsigned int idx = 0; idx < pixels; idx++) {
  72 + if (pts[idx] <= th) {
  73 + m_b += pts[idx];
  74 + n_b += 1;
  75 + }
  76 + else {
  77 + m_o += pts[idx];
  78 + n_o += 1;
  79 + }
  80 + }
  81 +
  82 + m_b = m_b / n_b; //calculate the mean value for the below threshold cluster
  83 + m_o = m_o / n_o; //calculate the mean value for the over threshold cluster
  84 +
  85 + var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
  86 + }
  87 +
  88 + vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end()); //finding maximum elements in the vector
  89 + size_t th_idx = std::distance(var_b.begin(), max_var);
  90 + T threshold = Imin + (T)(th_idx * th_step);
  91 + return threshold;
  92 + }
  93 +
  94 + //this function performs the 2D iterative voting algorithm on the image stored in the gpu
  95 + template<typename T>
  96 + void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
  97 + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
  98 +
  99 + size_t pixels = x * y; //compute the size of input image
  100 + //
  101 + if (invert) { //if inversion is required call the kernel to invert the image
  102 + unsigned int max_threads = stim::maxThreadsPerBlock();
  103 + dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads));
  104 + dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
  105 + cuda_invert << <blocks, threads >> > (gpuI, x, y);
  106 + }
  107 + //
  108 + size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T)); // create the atan2 table
  109 + T* cpuTable = (T*)malloc(table_bytes); //assign memory on the cpu for atan2 table
  110 + atan_2<T>(cpuTable, rmax); //call the function to precompute the atan2 table
  111 + T* gpuTable; HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
  112 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice)); //copy atan2 table to the gpu
  113 +
  114 + size_t bytes = pixels* sizeof(T); //calculate the bytes of the input
  115 + float dphi = phi / iter; //change in phi for each iteration
  116 +
  117 + float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2)); //allocate space to store the 2D gradient
  118 + float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image
  119 +
  120 + stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient
  121 + //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI);
  122 + //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
  123 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar
  124 +
  125 + for (int i = 0; i < iter; i++) { //for each iteration
  126 + cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0
  127 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting
  128 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions
  129 + phi = phi - dphi; //decrement phi
  130 + }
  131 + stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima
  132 +
  133 + T* pts = (T*)malloc(bytes); //allocate memory on the cpu to store the output of iterative voting
  134 + HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output from gpu to the cpu memory
  135 +
  136 + T threshold;
  137 + if (t == 0) threshold = stim::th_otsu<T>(pts, pixels); //if threshold value is not set call the function to compute the threshold
  138 + else threshold = t;
  139 +
  140 + std::ofstream output; //save the thresholded detected seeds in a text file
  141 + output.open(outname_txt);
  142 + output << "X" << " " << "Y" << " " << "threshold" << "\n";
  143 + size_t ind;
  144 + for (size_t ix = 0; ix < x; ix++) {
  145 + for (size_t iy = 0; iy < y; iy++) {
  146 + ind = iy * x + ix;
  147 + if (pts[ind] > threshold) {
  148 + output << ix << " " << iy << " " << pts[ind] << "\n";
  149 + pts[ind] = 1;
  150 + }
  151 + else pts[ind] = 0;
  152 + }
  153 + }
  154 + output.close();
  155 +
  156 + HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice)); //copy the points to the gpu
  157 + stim::cpu2image(pts, outname_img, x, y); //output the image
  158 +
  159 + }
  160 +
  161 +
  162 + template<typename T>
  163 + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
  164 + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
  165 + size_t bytes = x*y * sizeof(T);
  166 + T* gpuI; //allocate space on the gpu to save the input image
  167 + HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
  168 + HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu
  169 + stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote
  170 + HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu
  171 + }
  172 +}
  173 +#endif
0 \ No newline at end of file 174 \ No newline at end of file
stim/cuda/ivote_atomic_bb.cuh renamed to stim/iVote/ivote2/iter_vote2.cuh
1 -#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H  
2 -#define STIM_CUDA_IVOTE_ATOMIC_BB_H 1 +#ifndef STIM_CUDA_ITER_VOTE2_H
  2 +#define STIM_CUDA_ITER_VOTE2_H
3 3
4 extern bool DEBUG; 4 extern bool DEBUG;
5 -#include <stim/cuda/ivote/down_sample.cuh>  
6 -#include <stim/cuda/ivote/local_max.cuh>  
7 -#include <stim/cuda/ivote/update_dir_bb.cuh>  
8 -#include <stim/cuda/ivote/vote_atomic_bb.cuh> 5 +
  6 +#include "local_max.cuh"
  7 +#include "update_dir_bb.cuh"
  8 +#include "vote_atomic_bb.cuh"
9 9
10 namespace stim{ 10 namespace stim{
11 namespace cuda{ 11 namespace cuda{
stim/cuda/ivote/local_max.cuh renamed to stim/iVote/ivote2/local_max.cuh
stim/cuda/ivote/update_dir.cuh renamed to stim/iVote/ivote2/update_dir.cuh
stim/cuda/ivote/update_dir_bb.cuh renamed to stim/iVote/ivote2/update_dir_bb.cuh
stim/cuda/ivote/update_dir_shared.cuh renamed to stim/iVote/ivote2/update_dir_shared.cuh
stim/cuda/ivote/update_dir_threshold_global.cuh renamed to stim/iVote/ivote2/update_dir_threshold_global.cuh
stim/cuda/ivote/vote.cuh renamed to stim/iVote/ivote2/vote.cuh
stim/cuda/ivote/vote_atomic.cuh renamed to stim/iVote/ivote2/vote_atomic.cuh
stim/cuda/ivote/vote_atomic_bb.cuh renamed to stim/iVote/ivote2/vote_atomic_bb.cuh
stim/cuda/ivote/vote_atomic_shared.cuh renamed to stim/iVote/ivote2/vote_atomic_shared.cuh
stim/cuda/ivote/vote_shared.cuh renamed to stim/iVote/ivote2/vote_shared.cuh
stim/cuda/ivote/vote_shared_32-32.cuh renamed to stim/iVote/ivote2/vote_shared_32-32.cuh
stim/cuda/ivote/vote_threshold_global.cuh renamed to stim/iVote/ivote2/vote_threshold_global.cuh