codebase / stimlib

Commit 9d087c90863fb562c9838d25f73620aa0b84027e

Authored by David Mayerich 2017-03-10 10:51:12 -0600

Merge branch 'ivote2' into 'master'

changing the ivote directory from stim/cuda/ to stim/ and add a cuda header file…

… to include ivote2 function

See merge request !33

Showing 17 changed files with 179 additions and 229 deletions Show diff stats

Inline Side-by-side

stim/cuda/ivote.cuh deleted

View file @3c2d162

1		-#ifndef STIM_CUDA_IVOTE_H
2		-#define STIM_CUDA_IVOTE_H
3		-
4		-#include <stim/cuda/ivote/down_sample.cuh>
5		-#include <stim/cuda/ivote/local_max.cuh>
6		-#include <stim/cuda/ivote/update_dir.cuh>
7		-#include <stim/cuda/ivote/vote.cuh>
8		-
9		-namespace stim{
10		- namespace cuda{
11		-
12		- }
13		-}
14		-
15		-
16		-
17		-#endif
18	0	\ No newline at end of file

stim/cuda/ivote/down_sample.cuh deleted

View file @3c2d162

1		-#ifndef STIM_CUDA_DOWN_SAMPLE_H
2		-#define STIM_CUDA_DOWN_SAMPLE_H
3		-
4		-#include <iostream>
5		-#include <cuda.h>
6		-#include <stim/cuda/cudatools.h>
7		-#include <stim/cuda/templates/gaussian_blur.cuh>
8		-
9		-namespace stim{
10		- namespace cuda{
11		-
12		- template<typename T>
13		- __global__ void down_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
14		-
15		- unsigned int sigma_ds = 1/resize;
16		- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
17		- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
18		-
19		-
20		- // calculate the 2D coordinates for this current thread.
21		- int xi = blockIdx.x * blockDim.x + threadIdx.x;
22		- int yi = blockIdx.y;
23		- // convert 2D coordinates to 1D
24		- int i = yi * x_ds + xi;
25		-
26		- if(xi< x_ds && yi< y_ds){
27		-
28		- int x_org = xi * sigma_ds ;
29		- int y_org = yi * sigma_ds ;
30		- int i_org = y_org * x + x_org;
31		- gpuI[i] = gpuI0[i_org];
32		- }
33		-
34		- }
35		-
36		-
37		- /// Applies a Gaussian blur to a 2D image stored on the GPU
38		- template<typename T>
39		- void gpu_down_sample(T* gpuI, T* gpuI0, T resize, size_t x, size_t y){
40		-
41		-
42		- unsigned int sigma_ds = (unsigned int)(1.0f/resize);
43		- size_t x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
44		- size_t y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
45		-
46		- //get the number of pixels in the image
47		-// unsigned int pixels_ds = x_ds * y_ds;
48		-
49		- unsigned int max_threads = stim::maxThreadsPerBlock();
50		- dim3 threads(max_threads, 1);
51		- dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds);
52		-
53		- stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
54		-
55		- //resample the image
56		- down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
57		-
58		- }
59		-
60		- /// Applies a Gaussian blur to a 2D image stored on the CPU
61		- template<typename T>
62		- void cpu_down_sample(T* re_img, T* image, T resize, unsigned int x, unsigned int y){
63		-
64		- //get the number of pixels in the image
65		- unsigned int pixels = x * y;
66		- unsigned int bytes = sizeof(T) * pixels;
67		-
68		- unsigned int sigma_ds = 1/resize;
69		- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
70		- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
71		- unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
72		-
73		-
74		-
75		- //allocate space on the GPU for the original image
76		- T* gpuI0;
77		- cudaMalloc(&gpuI0, bytes);
78		-
79		-
80		- //copy the image data to the GPU
81		- cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice);
82		-
83		- //allocate space on the GPU for the down sampled image
84		- T* gpuI;
85		- cudaMalloc(&gpuI, bytes_ds);
86		-
87		- //run the GPU-based version of the algorithm
88		- gpu_down_sample<T>(gpuI, gpuI0, resize, x, y);
89		-
90		- //copy the image data to the GPU
91		- cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
92		-
93		- cudaFree(gpuI0);
94		- cudeFree(gpuI);
95		- }
96		-
97		- }
98		-}
99		-
100		-#endif

stim/cuda/ivote/re_sample.cuh deleted

View file @3c2d162

1		-#ifndef STIM_CUDA_RE_SAMPLE_H
2		-#define STIM_CUDA_RE_SAMPLE_H
3		-
4		-#include <iostream>
5		-#include <cuda.h>
6		-#include <stim/cuda/cudatools.h>
7		-#include <stim/cuda/templates/gaussian_blur.cuh>
8		-
9		-namespace stim{
10		- namespace cuda{
11		-
12		- template<typename T>
13		- __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
14		-
15		- unsigned int sigma_ds = 1/resize;
16		- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
17		- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
18		-
19		-
20		- // calculate the 2D coordinates for this current thread.
21		- int xi = blockIdx.x * blockDim.x + threadIdx.x;
22		- int yi = blockIdx.y;
23		- // convert 2D coordinates to 1D
24		- int i = yi * x + xi;
25		-
26		- if(xi< x && yi< y){
27		- if(xi%sigma_ds==0){
28		- if(yi%sigma_ds==0){
29		- gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
30		- }
31		- }
32		- else gpuI[i] = 0;
33		-
34		- //int x_org = xi * sigma_ds ;
35		- //int y_org = yi * sigma_ds ;
36		- //int i_org = y_org * x + x_org;
37		- //gpuI[i] = gpuI0[i_org];
38		- }
39		-
40		- }
41		-
42		-
43		- /// Applies a Gaussian blur to a 2D image stored on the GPU
44		- template<typename T>
45		- void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
46		-
47		-
48		- //unsigned int sigma_ds = 1/resize;
49		- //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
50		- //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
51		-
52		- //get the number of pixels in the image
53		- //unsigned int pixels_ds = x_ds * y_ds;
54		-
55		- unsigned int max_threads = stim::maxThreadsPerBlock();
56		- dim3 threads(max_threads, 1);
57		- dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
58		-
59		- //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
60		-
61		- //resample the image
62		- cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
63		-
64		- }
65		-
66		- /// Applies a Gaussian blur to a 2D image stored on the CPU
67		- template<typename T>
68		- void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){
69		-
70		- //get the number of pixels in the image
71		- unsigned int pixels = x*y;
72		- unsigned int bytes = sizeof(T) * pixels;
73		-
74		- unsigned int sigma_ds = 1/resize;
75		- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
76		- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
77		- unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
78		-
79		-
80		-
81		- //allocate space on the GPU for the original image
82		- T* gpuI0;
83		- cudaMalloc(&gpuI0, bytes_ds);
84		-
85		-
86		- //copy the image data to the GPU
87		- cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);
88		-
89		- //allocate space on the GPU for the down sampled image
90		- T* gpuI;
91		- cudaMalloc(&gpuI, bytes);
92		-
93		- //run the GPU-based version of the algorithm
94		- gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);
95		-
96		- //copy the image data to the GPU
97		- cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
98		-
99		- cudaFree(gpuI0);
100		- cudeFree(gpuI);
101		- }
102		-
103		- }
104		-}
105		-
106		-#endif
107	0	\ No newline at end of file

stim/iVote/ivote2.cuh 0 → 100644

Wrap text Show/Hide comments View file @9d087c9

	1	+#ifndef STIM_IVOTE2_CUH
	2	+#define STIM_IVOTE2_CUH
	3	+
	4	+#include <iostream>
	5	+#include <fstream>
	6	+#include <stim/cuda/cudatools/error.h>
	7	+#include <stim/cuda/templates/gradient.cuh>
	8	+#include <stim/cuda/arraymath.cuh>
	9	+#include <stim/iVote/ivote2/ivote2.cuh>
	10	+#include <stim/math/constants.h>
	11	+#include <stim/math/vector.h>
	12	+#include <stim/visualization/colormap.h>
	13	+
	14	+namespace stim {
	15	+
	16	+ // this function precomputes the atan2 values
	17	+ template<typename T>
	18	+ void atan_2(T* cpuTable, unsigned int rmax) {
	19	+ int xsize = 2 * rmax + 1; //initialize the width and height of the window which atan2 are computed in.
	20	+ int ysize = 2 * rmax + 1;
	21	+ int yi = rmax; // assign the center coordinates of the atan2 window to yi and xi
	22	+ int xi = rmax;
	23	+ for (int xt = 0; xt < xsize; xt++) { //for each element in the atan2 table
	24	+ for (int yt = 0; yt < ysize; yt++) {
	25	+ int id = yt * xsize + xt; //convert the current 2D coordinates to 1D
	26	+ int xd = xi - xt; // calculate the distance between the pixel and the center of the atan2 window
	27	+ int yd = yi - yt;
	28	+ T atan_2d = atan2((T)yd, (T)xd); // calculate the angle between the pixel and the center of the atan2 window and store the result.
	29	+ cpuTable[id] = atan_2d;
	30	+ }
	31	+ }
	32	+ }
	33	+
	34	+ //this kernel invert the 2D image
	35	+ template<typename T>
	36	+ __global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
	37	+ // calculate the 2D coordinates for this current thread.
	38	+ size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
	39	+ size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
	40	+
	41	+ if (xi >= x \|\| yi >= y) return;
	42	+ size_t i = yi * x + xi; // convert 2D coordinates to 1D
	43	+ gpuI[i] = 255 - gpuI[i]; //invert the pixel intensity
	44	+ }
	45	+
	46	+
	47	+
	48	+ //this function calculate the threshold using OTSU method
	49	+ template<typename T>
	50	+ T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
	51	+ T Imax = pts[0]; //initialize the maximum value to the first one
	52	+ T Imin = pts[0]; //initialize the maximum value to the first on
	53	+
	54	+ for (size_t n = 0; n < pixels; n++) { //for every value
	55	+ if (pts[n] > Imax) { //if the value is higher than the current max
	56	+ Imax = pts[n];
	57	+ }
	58	+ }
	59	+ for (size_t n = 0; n< pixels; n++) { //for every value
	60	+ if (pts[n] < Imin) { //if the value is higher than the current max
	61	+ Imin = pts[n];
	62	+ }
	63	+ }
	64	+
	65	+ T th_step = ((Imax - Imin) / th_num);
	66	+ vector<T> var_b;
	67	+ for (unsigned int t0 = 0; t0 < th_num; t0++) {
	68	+ T th = t0 * th_step + Imin;
	69	+ unsigned int n_b(0), n_o(0); //these variables save the number of elements that are below and over the threshold
	70	+ T m_b(0), m_o(0); //these variables save the mean value for each cluster
	71	+ for (unsigned int idx = 0; idx < pixels; idx++) {
	72	+ if (pts[idx] <= th) {
	73	+ m_b += pts[idx];
	74	+ n_b += 1;
	75	+ }
	76	+ else {
	77	+ m_o += pts[idx];
	78	+ n_o += 1;
	79	+ }
	80	+ }
	81	+
	82	+ m_b = m_b / n_b; //calculate the mean value for the below threshold cluster
	83	+ m_o = m_o / n_o; //calculate the mean value for the over threshold cluster
	84	+
	85	+ var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
	86	+ }
	87	+
	88	+ vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end()); //finding maximum elements in the vector
	89	+ size_t th_idx = std::distance(var_b.begin(), max_var);
	90	+ T threshold = Imin + (T)(th_idx * th_step);
	91	+ return threshold;
	92	+ }
	93	+
	94	+ //this function performs the 2D iterative voting algorithm on the image stored in the gpu
	95	+ template<typename T>
	96	+ void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
	97	+ int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
	98	+
	99	+ size_t pixels = x * y; //compute the size of input image
	100	+ //
	101	+ if (invert) { //if inversion is required call the kernel to invert the image
	102	+ unsigned int max_threads = stim::maxThreadsPerBlock();
	103	+ dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads));
	104	+ dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
	105	+ cuda_invert << <blocks, threads >> > (gpuI, x, y);
	106	+ }
	107	+ //
	108	+ size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T)); // create the atan2 table
	109	+ T* cpuTable = (T*)malloc(table_bytes); //assign memory on the cpu for atan2 table
	110	+ atan_2<T>(cpuTable, rmax); //call the function to precompute the atan2 table
	111	+ T* gpuTable; HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
	112	+ HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice)); //copy atan2 table to the gpu
	113	+
	114	+ size_t bytes = pixels* sizeof(T); //calculate the bytes of the input
	115	+ float dphi = phi / iter; //change in phi for each iteration
	116	+
	117	+ float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2)); //allocate space to store the 2D gradient
	118	+ float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image
	119	+
	120	+ stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient
	121	+ //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI);
	122	+ //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
	123	+ stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar
	124	+
	125	+ for (int i = 0; i < iter; i++) { //for each iteration
	126	+ cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0
	127	+ stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting
	128	+ stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions
	129	+ phi = phi - dphi; //decrement phi
	130	+ }
	131	+ stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima
	132	+
	133	+ T* pts = (T*)malloc(bytes); //allocate memory on the cpu to store the output of iterative voting
	134	+ HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output from gpu to the cpu memory
	135	+
	136	+ T threshold;
	137	+ if (t == 0) threshold = stim::th_otsu<T>(pts, pixels); //if threshold value is not set call the function to compute the threshold
	138	+ else threshold = t;
	139	+
	140	+ std::ofstream output; //save the thresholded detected seeds in a text file
	141	+ output.open(outname_txt);
	142	+ output << "X" << " " << "Y" << " " << "threshold" << "\n";
	143	+ size_t ind;
	144	+ for (size_t ix = 0; ix < x; ix++) {
	145	+ for (size_t iy = 0; iy < y; iy++) {
	146	+ ind = iy * x + ix;
	147	+ if (pts[ind] > threshold) {
	148	+ output << ix << " " << iy << " " << pts[ind] << "\n";
	149	+ pts[ind] = 1;
	150	+ }
	151	+ else pts[ind] = 0;
	152	+ }
	153	+ }
	154	+ output.close();
	155	+
	156	+ HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice)); //copy the points to the gpu
	157	+ stim::cpu2image(pts, outname_img, x, y); //output the image
	158	+
	159	+ }
	160	+
	161	+
	162	+ template<typename T>
	163	+ void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
	164	+ int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
	165	+ size_t bytes = xy sizeof(T);
	166	+ T* gpuI; //allocate space on the gpu to save the input image
	167	+ HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
	168	+ HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu
	169	+ stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote
	170	+ HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu
	171	+ }
	172	+}
	173	+#endif
0	174	\ No newline at end of file
...	...

stim/cuda/ivote_atomic_bb.cuh renamed to stim/iVote/ivote2/iter_vote2.cuh

Wrap text Show/Hide comments View file @9d087c9

1		-#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H
2		-#define STIM_CUDA_IVOTE_ATOMIC_BB_H
	1	+#ifndef STIM_CUDA_ITER_VOTE2_H
	2	+#define STIM_CUDA_ITER_VOTE2_H
3	3
4	4	extern bool DEBUG;
5		-#include <stim/cuda/ivote/down_sample.cuh>
6		-#include <stim/cuda/ivote/local_max.cuh>
7		-#include <stim/cuda/ivote/update_dir_bb.cuh>
8		-#include <stim/cuda/ivote/vote_atomic_bb.cuh>
	5	+
	6	+#include "local_max.cuh"
	7	+#include "update_dir_bb.cuh"
	8	+#include "vote_atomic_bb.cuh"
9	9
10	10	namespace stim{
11	11	namespace cuda{
...	...

stim/cuda/ivote/local_max.cuh renamed to stim/iVote/ivote2/local_max.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir.cuh renamed to stim/iVote/ivote2/update_dir.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir_bb.cuh renamed to stim/iVote/ivote2/update_dir_bb.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir_shared.cuh renamed to stim/iVote/ivote2/update_dir_shared.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir_threshold_global.cuh renamed to stim/iVote/ivote2/update_dir_threshold_global.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote.cuh renamed to stim/iVote/ivote2/vote.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_atomic.cuh renamed to stim/iVote/ivote2/vote_atomic.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_atomic_bb.cuh renamed to stim/iVote/ivote2/vote_atomic_bb.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_atomic_shared.cuh renamed to stim/iVote/ivote2/vote_atomic_shared.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_shared.cuh renamed to stim/iVote/ivote2/vote_shared.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_shared_32-32.cuh renamed to stim/iVote/ivote2/vote_shared_32-32.cuh

Wrap text Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_threshold_global.cuh renamed to stim/iVote/ivote2/vote_threshold_global.cuh

Wrap text Show/Hide comments View file @9d087c9