Commit 035d968fd376e2ab1e0edc87de2c12dc9a944a6c

Authored by David Mayerich
2 parents 3e3f30a2 9d087c90

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/cuda/ivote.cuh deleted
1   -#ifndef STIM_CUDA_IVOTE_H
2   -#define STIM_CUDA_IVOTE_H
3   -
4   -#include <stim/cuda/ivote/down_sample.cuh>
5   -#include <stim/cuda/ivote/local_max.cuh>
6   -#include <stim/cuda/ivote/update_dir.cuh>
7   -#include <stim/cuda/ivote/vote.cuh>
8   -
9   -namespace stim{
10   - namespace cuda{
11   -
12   - }
13   -}
14   -
15   -
16   -
17   -#endif
18 0 \ No newline at end of file
stim/cuda/ivote/down_sample.cuh deleted
1   -#ifndef STIM_CUDA_DOWN_SAMPLE_H
2   -#define STIM_CUDA_DOWN_SAMPLE_H
3   -
4   -#include <iostream>
5   -#include <cuda.h>
6   -#include <stim/cuda/cudatools.h>
7   -#include <stim/cuda/templates/gaussian_blur.cuh>
8   -
9   -namespace stim{
10   - namespace cuda{
11   -
12   - template<typename T>
13   - __global__ void down_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
14   -
15   - unsigned int sigma_ds = 1/resize;
16   - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
17   - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
18   -
19   -
20   - // calculate the 2D coordinates for this current thread.
21   - int xi = blockIdx.x * blockDim.x + threadIdx.x;
22   - int yi = blockIdx.y;
23   - // convert 2D coordinates to 1D
24   - int i = yi * x_ds + xi;
25   -
26   - if(xi< x_ds && yi< y_ds){
27   -
28   - int x_org = xi * sigma_ds ;
29   - int y_org = yi * sigma_ds ;
30   - int i_org = y_org * x + x_org;
31   - gpuI[i] = gpuI0[i_org];
32   - }
33   -
34   - }
35   -
36   -
37   - /// Applies a Gaussian blur to a 2D image stored on the GPU
38   - template<typename T>
39   - void gpu_down_sample(T* gpuI, T* gpuI0, T resize, size_t x, size_t y){
40   -
41   -
42   - unsigned int sigma_ds = (unsigned int)(1.0f/resize);
43   - size_t x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
44   - size_t y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
45   -
46   - //get the number of pixels in the image
47   -// unsigned int pixels_ds = x_ds * y_ds;
48   -
49   - unsigned int max_threads = stim::maxThreadsPerBlock();
50   - dim3 threads(max_threads, 1);
51   - dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds);
52   -
53   - stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
54   -
55   - //resample the image
56   - down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
57   -
58   - }
59   -
60   - /// Applies a Gaussian blur to a 2D image stored on the CPU
61   - template<typename T>
62   - void cpu_down_sample(T* re_img, T* image, T resize, unsigned int x, unsigned int y){
63   -
64   - //get the number of pixels in the image
65   - unsigned int pixels = x * y;
66   - unsigned int bytes = sizeof(T) * pixels;
67   -
68   - unsigned int sigma_ds = 1/resize;
69   - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
70   - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
71   - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
72   -
73   -
74   -
75   - //allocate space on the GPU for the original image
76   - T* gpuI0;
77   - cudaMalloc(&gpuI0, bytes);
78   -
79   -
80   - //copy the image data to the GPU
81   - cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice);
82   -
83   - //allocate space on the GPU for the down sampled image
84   - T* gpuI;
85   - cudaMalloc(&gpuI, bytes_ds);
86   -
87   - //run the GPU-based version of the algorithm
88   - gpu_down_sample<T>(gpuI, gpuI0, resize, x, y);
89   -
90   - //copy the image data to the GPU
91   - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
92   -
93   - cudaFree(gpuI0);
94   - cudeFree(gpuI);
95   - }
96   -
97   - }
98   -}
99   -
100   -#endif
stim/cuda/ivote/re_sample.cuh deleted
1   -#ifndef STIM_CUDA_RE_SAMPLE_H
2   -#define STIM_CUDA_RE_SAMPLE_H
3   -
4   -#include <iostream>
5   -#include <cuda.h>
6   -#include <stim/cuda/cudatools.h>
7   -#include <stim/cuda/templates/gaussian_blur.cuh>
8   -
9   -namespace stim{
10   - namespace cuda{
11   -
12   - template<typename T>
13   - __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
14   -
15   - unsigned int sigma_ds = 1/resize;
16   - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
17   - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
18   -
19   -
20   - // calculate the 2D coordinates for this current thread.
21   - int xi = blockIdx.x * blockDim.x + threadIdx.x;
22   - int yi = blockIdx.y;
23   - // convert 2D coordinates to 1D
24   - int i = yi * x + xi;
25   -
26   - if(xi< x && yi< y){
27   - if(xi%sigma_ds==0){
28   - if(yi%sigma_ds==0){
29   - gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
30   - }
31   - }
32   - else gpuI[i] = 0;
33   -
34   - //int x_org = xi * sigma_ds ;
35   - //int y_org = yi * sigma_ds ;
36   - //int i_org = y_org * x + x_org;
37   - //gpuI[i] = gpuI0[i_org];
38   - }
39   -
40   - }
41   -
42   -
43   - /// Applies a Gaussian blur to a 2D image stored on the GPU
44   - template<typename T>
45   - void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
46   -
47   -
48   - //unsigned int sigma_ds = 1/resize;
49   - //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
50   - //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
51   -
52   - //get the number of pixels in the image
53   - //unsigned int pixels_ds = x_ds * y_ds;
54   -
55   - unsigned int max_threads = stim::maxThreadsPerBlock();
56   - dim3 threads(max_threads, 1);
57   - dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
58   -
59   - //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
60   -
61   - //resample the image
62   - cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
63   -
64   - }
65   -
66   - /// Applies a Gaussian blur to a 2D image stored on the CPU
67   - template<typename T>
68   - void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){
69   -
70   - //get the number of pixels in the image
71   - unsigned int pixels = x*y;
72   - unsigned int bytes = sizeof(T) * pixels;
73   -
74   - unsigned int sigma_ds = 1/resize;
75   - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
76   - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
77   - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
78   -
79   -
80   -
81   - //allocate space on the GPU for the original image
82   - T* gpuI0;
83   - cudaMalloc(&gpuI0, bytes_ds);
84   -
85   -
86   - //copy the image data to the GPU
87   - cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);
88   -
89   - //allocate space on the GPU for the down sampled image
90   - T* gpuI;
91   - cudaMalloc(&gpuI, bytes);
92   -
93   - //run the GPU-based version of the algorithm
94   - gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);
95   -
96   - //copy the image data to the GPU
97   - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
98   -
99   - cudaFree(gpuI0);
100   - cudeFree(gpuI);
101   - }
102   -
103   - }
104   -}
105   -
106   -#endif
107 0 \ No newline at end of file
stim/iVote/ivote2.cuh 0 โ†’ 100644
  1 +#ifndef STIM_IVOTE2_CUH
  2 +#define STIM_IVOTE2_CUH
  3 +
  4 +#include <iostream>
  5 +#include <fstream>
  6 +#include <stim/cuda/cudatools/error.h>
  7 +#include <stim/cuda/templates/gradient.cuh>
  8 +#include <stim/cuda/arraymath.cuh>
  9 +#include <stim/iVote/ivote2/ivote2.cuh>
  10 +#include <stim/math/constants.h>
  11 +#include <stim/math/vector.h>
  12 +#include <stim/visualization/colormap.h>
  13 +
  14 +namespace stim {
  15 +
  16 + // this function precomputes the atan2 values
  17 + template<typename T>
  18 + void atan_2(T* cpuTable, unsigned int rmax) {
  19 + int xsize = 2 * rmax + 1; //initialize the width and height of the window which atan2 are computed in.
  20 + int ysize = 2 * rmax + 1;
  21 + int yi = rmax; // assign the center coordinates of the atan2 window to yi and xi
  22 + int xi = rmax;
  23 + for (int xt = 0; xt < xsize; xt++) { //for each element in the atan2 table
  24 + for (int yt = 0; yt < ysize; yt++) {
  25 + int id = yt * xsize + xt; //convert the current 2D coordinates to 1D
  26 + int xd = xi - xt; // calculate the distance between the pixel and the center of the atan2 window
  27 + int yd = yi - yt;
  28 + T atan_2d = atan2((T)yd, (T)xd); // calculate the angle between the pixel and the center of the atan2 window and store the result.
  29 + cpuTable[id] = atan_2d;
  30 + }
  31 + }
  32 + }
  33 +
  34 + //this kernel invert the 2D image
  35 + template<typename T>
  36 + __global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
  37 + // calculate the 2D coordinates for this current thread.
  38 + size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
  39 + size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  40 +
  41 + if (xi >= x || yi >= y) return;
  42 + size_t i = yi * x + xi; // convert 2D coordinates to 1D
  43 + gpuI[i] = 255 - gpuI[i]; //invert the pixel intensity
  44 + }
  45 +
  46 +
  47 +
  48 + //this function calculate the threshold using OTSU method
  49 + template<typename T>
  50 + T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
  51 + T Imax = pts[0]; //initialize the maximum value to the first one
  52 + T Imin = pts[0]; //initialize the maximum value to the first on
  53 +
  54 + for (size_t n = 0; n < pixels; n++) { //for every value
  55 + if (pts[n] > Imax) { //if the value is higher than the current max
  56 + Imax = pts[n];
  57 + }
  58 + }
  59 + for (size_t n = 0; n< pixels; n++) { //for every value
  60 + if (pts[n] < Imin) { //if the value is higher than the current max
  61 + Imin = pts[n];
  62 + }
  63 + }
  64 +
  65 + T th_step = ((Imax - Imin) / th_num);
  66 + vector<T> var_b;
  67 + for (unsigned int t0 = 0; t0 < th_num; t0++) {
  68 + T th = t0 * th_step + Imin;
  69 + unsigned int n_b(0), n_o(0); //these variables save the number of elements that are below and over the threshold
  70 + T m_b(0), m_o(0); //these variables save the mean value for each cluster
  71 + for (unsigned int idx = 0; idx < pixels; idx++) {
  72 + if (pts[idx] <= th) {
  73 + m_b += pts[idx];
  74 + n_b += 1;
  75 + }
  76 + else {
  77 + m_o += pts[idx];
  78 + n_o += 1;
  79 + }
  80 + }
  81 +
  82 + m_b = m_b / n_b; //calculate the mean value for the below threshold cluster
  83 + m_o = m_o / n_o; //calculate the mean value for the over threshold cluster
  84 +
  85 + var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
  86 + }
  87 +
  88 + vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end()); //finding maximum elements in the vector
  89 + size_t th_idx = std::distance(var_b.begin(), max_var);
  90 + T threshold = Imin + (T)(th_idx * th_step);
  91 + return threshold;
  92 + }
  93 +
  94 + //this function performs the 2D iterative voting algorithm on the image stored in the gpu
  95 + template<typename T>
  96 + void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
  97 + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
  98 +
  99 + size_t pixels = x * y; //compute the size of input image
  100 + //
  101 + if (invert) { //if inversion is required call the kernel to invert the image
  102 + unsigned int max_threads = stim::maxThreadsPerBlock();
  103 + dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads));
  104 + dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
  105 + cuda_invert << <blocks, threads >> > (gpuI, x, y);
  106 + }
  107 + //
  108 + size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T)); // create the atan2 table
  109 + T* cpuTable = (T*)malloc(table_bytes); //assign memory on the cpu for atan2 table
  110 + atan_2<T>(cpuTable, rmax); //call the function to precompute the atan2 table
  111 + T* gpuTable; HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
  112 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice)); //copy atan2 table to the gpu
  113 +
  114 + size_t bytes = pixels* sizeof(T); //calculate the bytes of the input
  115 + float dphi = phi / iter; //change in phi for each iteration
  116 +
  117 + float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2)); //allocate space to store the 2D gradient
  118 + float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image
  119 +
  120 + stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient
  121 + //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI);
  122 + //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
  123 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar
  124 +
  125 + for (int i = 0; i < iter; i++) { //for each iteration
  126 + cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0
  127 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting
  128 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions
  129 + phi = phi - dphi; //decrement phi
  130 + }
  131 + stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima
  132 +
  133 + T* pts = (T*)malloc(bytes); //allocate memory on the cpu to store the output of iterative voting
  134 + HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output from gpu to the cpu memory
  135 +
  136 + T threshold;
  137 + if (t == 0) threshold = stim::th_otsu<T>(pts, pixels); //if threshold value is not set call the function to compute the threshold
  138 + else threshold = t;
  139 +
  140 + std::ofstream output; //save the thresholded detected seeds in a text file
  141 + output.open(outname_txt);
  142 + output << "X" << " " << "Y" << " " << "threshold" << "\n";
  143 + size_t ind;
  144 + for (size_t ix = 0; ix < x; ix++) {
  145 + for (size_t iy = 0; iy < y; iy++) {
  146 + ind = iy * x + ix;
  147 + if (pts[ind] > threshold) {
  148 + output << ix << " " << iy << " " << pts[ind] << "\n";
  149 + pts[ind] = 1;
  150 + }
  151 + else pts[ind] = 0;
  152 + }
  153 + }
  154 + output.close();
  155 +
  156 + HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice)); //copy the points to the gpu
  157 + stim::cpu2image(pts, outname_img, x, y); //output the image
  158 +
  159 + }
  160 +
  161 +
  162 + template<typename T>
  163 + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
  164 + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
  165 + size_t bytes = x*y * sizeof(T);
  166 + T* gpuI; //allocate space on the gpu to save the input image
  167 + HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
  168 + HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu
  169 + stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote
  170 + HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu
  171 + }
  172 +}
  173 +#endif
0 174 \ No newline at end of file
... ...
stim/cuda/ivote_atomic_bb.cuh renamed to stim/iVote/ivote2/iter_vote2.cuh
1   -#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H
2   -#define STIM_CUDA_IVOTE_ATOMIC_BB_H
  1 +#ifndef STIM_CUDA_ITER_VOTE2_H
  2 +#define STIM_CUDA_ITER_VOTE2_H
3 3  
4 4 extern bool DEBUG;
5   -#include <stim/cuda/ivote/down_sample.cuh>
6   -#include <stim/cuda/ivote/local_max.cuh>
7   -#include <stim/cuda/ivote/update_dir_bb.cuh>
8   -#include <stim/cuda/ivote/vote_atomic_bb.cuh>
  5 +
  6 +#include "local_max.cuh"
  7 +#include "update_dir_bb.cuh"
  8 +#include "vote_atomic_bb.cuh"
9 9  
10 10 namespace stim{
11 11 namespace cuda{
... ...
stim/cuda/ivote/local_max.cuh renamed to stim/iVote/ivote2/local_max.cuh
stim/cuda/ivote/update_dir.cuh renamed to stim/iVote/ivote2/update_dir.cuh
stim/cuda/ivote/update_dir_bb.cuh renamed to stim/iVote/ivote2/update_dir_bb.cuh
stim/cuda/ivote/update_dir_shared.cuh renamed to stim/iVote/ivote2/update_dir_shared.cuh
stim/cuda/ivote/update_dir_threshold_global.cuh renamed to stim/iVote/ivote2/update_dir_threshold_global.cuh
stim/cuda/ivote/vote.cuh renamed to stim/iVote/ivote2/vote.cuh
stim/cuda/ivote/vote_atomic.cuh renamed to stim/iVote/ivote2/vote_atomic.cuh
stim/cuda/ivote/vote_atomic_bb.cuh renamed to stim/iVote/ivote2/vote_atomic_bb.cuh
stim/cuda/ivote/vote_atomic_shared.cuh renamed to stim/iVote/ivote2/vote_atomic_shared.cuh
stim/cuda/ivote/vote_shared.cuh renamed to stim/iVote/ivote2/vote_shared.cuh
stim/cuda/ivote/vote_shared_32-32.cuh renamed to stim/iVote/ivote2/vote_shared_32-32.cuh
stim/cuda/ivote/vote_threshold_global.cuh renamed to stim/iVote/ivote2/vote_threshold_global.cuh