codebase / stimlib

Commit 9d087c90863fb562c9838d25f73620aa0b84027e

Authored by David Mayerich 2017-03-10 10:51:12 -0600

Merge branch 'ivote2' into 'master'

changing the ivote directory from stim/cuda/ to stim/ and add a cuda header file…

… to include ivote2 function

See merge request !33

Showing 17 changed files with 179 additions and 229 deletions Show diff stats

Inline Side-by-side

stim/cuda/ivote.cuh deleted

View file @3c2d162

1	-#ifndef STIM_CUDA_IVOTE_H
2	-#define STIM_CUDA_IVOTE_H
3	-
4	-#include <stim/cuda/ivote/down_sample.cuh>
5	-#include <stim/cuda/ivote/local_max.cuh>
6	-#include <stim/cuda/ivote/update_dir.cuh>
7	-#include <stim/cuda/ivote/vote.cuh>
8	-
9	-namespace stim{
10	- namespace cuda{
11	-
12	- }
13	-}
14	-
15	-
16	-
17	-#endif
18	\ No newline at end of file	0	\ No newline at end of file

stim/cuda/ivote/down_sample.cuh deleted

View file @3c2d162

1	-#ifndef STIM_CUDA_DOWN_SAMPLE_H
2	-#define STIM_CUDA_DOWN_SAMPLE_H
3	-
4	-#include <iostream>
5	-#include <cuda.h>
6	-#include <stim/cuda/cudatools.h>
7	-#include <stim/cuda/templates/gaussian_blur.cuh>
8	-
9	-namespace stim{
10	- namespace cuda{
11	-
12	- template<typename T>
13	- __global__ void down_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
14	-
15	- unsigned int sigma_ds = 1/resize;
16	- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
17	- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
18	-
19	-
20	- // calculate the 2D coordinates for this current thread.
21	- int xi = blockIdx.x * blockDim.x + threadIdx.x;
22	- int yi = blockIdx.y;
23	- // convert 2D coordinates to 1D
24	- int i = yi * x_ds + xi;
25	-
26	- if(xi< x_ds && yi< y_ds){
27	-
28	- int x_org = xi * sigma_ds ;
29	- int y_org = yi * sigma_ds ;
30	- int i_org = y_org * x + x_org;
31	- gpuI[i] = gpuI0[i_org];
32	- }
33	-
34	- }
35	-
36	-
37	- /// Applies a Gaussian blur to a 2D image stored on the GPU
38	- template<typename T>
39	- void gpu_down_sample(T* gpuI, T* gpuI0, T resize, size_t x, size_t y){
40	-
41	-
42	- unsigned int sigma_ds = (unsigned int)(1.0f/resize);
43	- size_t x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
44	- size_t y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
45	-
46	- //get the number of pixels in the image
47	-// unsigned int pixels_ds = x_ds * y_ds;
48	-
49	- unsigned int max_threads = stim::maxThreadsPerBlock();
50	- dim3 threads(max_threads, 1);
51	- dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds);
52	-
53	- stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
54	-
55	- //resample the image
56	- down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
57	-
58	- }
59	-
60	- /// Applies a Gaussian blur to a 2D image stored on the CPU
61	- template<typename T>
62	- void cpu_down_sample(T* re_img, T* image, T resize, unsigned int x, unsigned int y){
63	-
64	- //get the number of pixels in the image
65	- unsigned int pixels = x * y;
66	- unsigned int bytes = sizeof(T) * pixels;
67	-
68	- unsigned int sigma_ds = 1/resize;
69	- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
70	- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
71	- unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
72	-
73	-
74	-
75	- //allocate space on the GPU for the original image
76	- T* gpuI0;
77	- cudaMalloc(&gpuI0, bytes);
78	-
79	-
80	- //copy the image data to the GPU
81	- cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice);
82	-
83	- //allocate space on the GPU for the down sampled image
84	- T* gpuI;
85	- cudaMalloc(&gpuI, bytes_ds);
86	-
87	- //run the GPU-based version of the algorithm
88	- gpu_down_sample<T>(gpuI, gpuI0, resize, x, y);
89	-
90	- //copy the image data to the GPU
91	- cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
92	-
93	- cudaFree(gpuI0);
94	- cudeFree(gpuI);
95	- }
96	-
97	- }
98	-}
99	-
100	-#endif

stim/cuda/ivote/re_sample.cuh deleted

View file @3c2d162

1	-#ifndef STIM_CUDA_RE_SAMPLE_H
2	-#define STIM_CUDA_RE_SAMPLE_H
3	-
4	-#include <iostream>
5	-#include <cuda.h>
6	-#include <stim/cuda/cudatools.h>
7	-#include <stim/cuda/templates/gaussian_blur.cuh>
8	-
9	-namespace stim{
10	- namespace cuda{
11	-
12	- template<typename T>
13	- __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
14	-
15	- unsigned int sigma_ds = 1/resize;
16	- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
17	- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
18	-
19	-
20	- // calculate the 2D coordinates for this current thread.
21	- int xi = blockIdx.x * blockDim.x + threadIdx.x;
22	- int yi = blockIdx.y;
23	- // convert 2D coordinates to 1D
24	- int i = yi * x + xi;
25	-
26	- if(xi< x && yi< y){
27	- if(xi%sigma_ds==0){
28	- if(yi%sigma_ds==0){
29	- gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
30	- }
31	- }
32	- else gpuI[i] = 0;
33	-
34	- //int x_org = xi * sigma_ds ;
35	- //int y_org = yi * sigma_ds ;
36	- //int i_org = y_org * x + x_org;
37	- //gpuI[i] = gpuI0[i_org];
38	- }
39	-
40	- }
41	-
42	-
43	- /// Applies a Gaussian blur to a 2D image stored on the GPU
44	- template<typename T>
45	- void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
46	-
47	-
48	- //unsigned int sigma_ds = 1/resize;
49	- //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
50	- //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
51	-
52	- //get the number of pixels in the image
53	- //unsigned int pixels_ds = x_ds * y_ds;
54	-
55	- unsigned int max_threads = stim::maxThreadsPerBlock();
56	- dim3 threads(max_threads, 1);
57	- dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
58	-
59	- //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
60	-
61	- //resample the image
62	- cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
63	-
64	- }
65	-
66	- /// Applies a Gaussian blur to a 2D image stored on the CPU
67	- template<typename T>
68	- void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){
69	-
70	- //get the number of pixels in the image
71	- unsigned int pixels = x*y;
72	- unsigned int bytes = sizeof(T) * pixels;
73	-
74	- unsigned int sigma_ds = 1/resize;
75	- unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
76	- unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
77	- unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
78	-
79	-
80	-
81	- //allocate space on the GPU for the original image
82	- T* gpuI0;
83	- cudaMalloc(&gpuI0, bytes_ds);
84	-
85	-
86	- //copy the image data to the GPU
87	- cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);
88	-
89	- //allocate space on the GPU for the down sampled image
90	- T* gpuI;
91	- cudaMalloc(&gpuI, bytes);
92	-
93	- //run the GPU-based version of the algorithm
94	- gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);
95	-
96	- //copy the image data to the GPU
97	- cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
98	-
99	- cudaFree(gpuI0);
100	- cudeFree(gpuI);
101	- }
102	-
103	- }
104	-}
105	-
106	-#endif
107	\ No newline at end of file	0	\ No newline at end of file

stim/iVote/ivote2.cuh 0 → 100644

Show/Hide comments View file @9d087c9

		1	+#ifndef STIM_IVOTE2_CUH
		2	+#define STIM_IVOTE2_CUH
		3	+
		4	+#include <iostream>
		5	+#include <fstream>
		6	+#include <stim/cuda/cudatools/error.h>
		7	+#include <stim/cuda/templates/gradient.cuh>
		8	+#include <stim/cuda/arraymath.cuh>
		9	+#include <stim/iVote/ivote2/ivote2.cuh>
		10	+#include <stim/math/constants.h>
		11	+#include <stim/math/vector.h>
		12	+#include <stim/visualization/colormap.h>
		13	+
		14	+namespace stim {
		15	+
		16	+ // this function precomputes the atan2 values
		17	+ template<typename T>
		18	+ void atan_2(T* cpuTable, unsigned int rmax) {
		19	+ int xsize = 2 * rmax + 1; //initialize the width and height of the window which atan2 are computed in.
		20	+ int ysize = 2 * rmax + 1;
		21	+ int yi = rmax; // assign the center coordinates of the atan2 window to yi and xi
		22	+ int xi = rmax;
		23	+ for (int xt = 0; xt < xsize; xt++) { //for each element in the atan2 table
		24	+ for (int yt = 0; yt < ysize; yt++) {
		25	+ int id = yt * xsize + xt; //convert the current 2D coordinates to 1D
		26	+ int xd = xi - xt; // calculate the distance between the pixel and the center of the atan2 window
		27	+ int yd = yi - yt;
		28	+ T atan_2d = atan2((T)yd, (T)xd); // calculate the angle between the pixel and the center of the atan2 window and store the result.
		29	+ cpuTable[id] = atan_2d;
		30	+ }
		31	+ }
		32	+ }
		33	+
		34	+ //this kernel invert the 2D image
		35	+ template<typename T>
		36	+ __global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
		37	+ // calculate the 2D coordinates for this current thread.
		38	+ size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
		39	+ size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
		40	+
		41	+ if (xi >= x \|\| yi >= y) return;
		42	+ size_t i = yi * x + xi; // convert 2D coordinates to 1D
		43	+ gpuI[i] = 255 - gpuI[i]; //invert the pixel intensity
		44	+ }
		45	+
		46	+
		47	+
		48	+ //this function calculate the threshold using OTSU method
		49	+ template<typename T>
		50	+ T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
		51	+ T Imax = pts[0]; //initialize the maximum value to the first one
		52	+ T Imin = pts[0]; //initialize the maximum value to the first on
		53	+
		54	+ for (size_t n = 0; n < pixels; n++) { //for every value
		55	+ if (pts[n] > Imax) { //if the value is higher than the current max
		56	+ Imax = pts[n];
		57	+ }
		58	+ }
		59	+ for (size_t n = 0; n< pixels; n++) { //for every value
		60	+ if (pts[n] < Imin) { //if the value is higher than the current max
		61	+ Imin = pts[n];
		62	+ }
		63	+ }
		64	+
		65	+ T th_step = ((Imax - Imin) / th_num);
		66	+ vector<T> var_b;
		67	+ for (unsigned int t0 = 0; t0 < th_num; t0++) {
		68	+ T th = t0 * th_step + Imin;
		69	+ unsigned int n_b(0), n_o(0); //these variables save the number of elements that are below and over the threshold
		70	+ T m_b(0), m_o(0); //these variables save the mean value for each cluster
		71	+ for (unsigned int idx = 0; idx < pixels; idx++) {
		72	+ if (pts[idx] <= th) {
		73	+ m_b += pts[idx];
		74	+ n_b += 1;
		75	+ }
		76	+ else {
		77	+ m_o += pts[idx];
		78	+ n_o += 1;
		79	+ }
		80	+ }
		81	+
		82	+ m_b = m_b / n_b; //calculate the mean value for the below threshold cluster
		83	+ m_o = m_o / n_o; //calculate the mean value for the over threshold cluster
		84	+
		85	+ var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
		86	+ }
		87	+
		88	+ vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end()); //finding maximum elements in the vector
		89	+ size_t th_idx = std::distance(var_b.begin(), max_var);
		90	+ T threshold = Imin + (T)(th_idx * th_step);
		91	+ return threshold;
		92	+ }
		93	+
		94	+ //this function performs the 2D iterative voting algorithm on the image stored in the gpu
		95	+ template<typename T>
		96	+ void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
		97	+ int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
		98	+
		99	+ size_t pixels = x * y; //compute the size of input image
		100	+ //
		101	+ if (invert) { //if inversion is required call the kernel to invert the image
		102	+ unsigned int max_threads = stim::maxThreadsPerBlock();
		103	+ dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads));
		104	+ dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
		105	+ cuda_invert << <blocks, threads >> > (gpuI, x, y);
		106	+ }
		107	+ //
		108	+ size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T)); // create the atan2 table
		109	+ T* cpuTable = (T*)malloc(table_bytes); //assign memory on the cpu for atan2 table
		110	+ atan_2<T>(cpuTable, rmax); //call the function to precompute the atan2 table
		111	+ T* gpuTable; HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
		112	+ HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice)); //copy atan2 table to the gpu
		113	+
		114	+ size_t bytes = pixels* sizeof(T); //calculate the bytes of the input
		115	+ float dphi = phi / iter; //change in phi for each iteration
		116	+
		117	+ float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2)); //allocate space to store the 2D gradient
		118	+ float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image
		119	+
		120	+ stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient
		121	+ //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI);
		122	+ //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
		123	+ stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar
		124	+
		125	+ for (int i = 0; i < iter; i++) { //for each iteration
		126	+ cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0
		127	+ stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting
		128	+ stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions
		129	+ phi = phi - dphi; //decrement phi
		130	+ }
		131	+ stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima
		132	+
		133	+ T* pts = (T*)malloc(bytes); //allocate memory on the cpu to store the output of iterative voting
		134	+ HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output from gpu to the cpu memory
		135	+
		136	+ T threshold;
		137	+ if (t == 0) threshold = stim::th_otsu<T>(pts, pixels); //if threshold value is not set call the function to compute the threshold
		138	+ else threshold = t;
		139	+
		140	+ std::ofstream output; //save the thresholded detected seeds in a text file
		141	+ output.open(outname_txt);
		142	+ output << "X" << " " << "Y" << " " << "threshold" << "\n";
		143	+ size_t ind;
		144	+ for (size_t ix = 0; ix < x; ix++) {
		145	+ for (size_t iy = 0; iy < y; iy++) {
		146	+ ind = iy * x + ix;
		147	+ if (pts[ind] > threshold) {
		148	+ output << ix << " " << iy << " " << pts[ind] << "\n";
		149	+ pts[ind] = 1;
		150	+ }
		151	+ else pts[ind] = 0;
		152	+ }
		153	+ }
		154	+ output.close();
		155	+
		156	+ HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice)); //copy the points to the gpu
		157	+ stim::cpu2image(pts, outname_img, x, y); //output the image
		158	+
		159	+ }
		160	+
		161	+
		162	+ template<typename T>
		163	+ void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
		164	+ int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) {
		165	+ size_t bytes = xy sizeof(T);
		166	+ T* gpuI; //allocate space on the gpu to save the input image
		167	+ HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
		168	+ HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu
		169	+ stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote
		170	+ HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu
		171	+ }
		172	+}
		173	+#endif
0	\ No newline at end of file	174	\ No newline at end of file

stim/cuda/ivote_atomic_bb.cuh renamed to stim/iVote/ivote2/iter_vote2.cuh

Show/Hide comments View file @9d087c9

--#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H
--#define STIM_CUDA_IVOTE_ATOMIC_BB_H
++#ifndef STIM_CUDA_ITER_VOTE2_H
++#define STIM_CUDA_ITER_VOTE2_H
  extern bool DEBUG;
--#include <stim/cuda/ivote/down_sample.cuh>
--#include <stim/cuda/ivote/local_max.cuh>
--#include <stim/cuda/ivote/update_dir_bb.cuh>
--#include <stim/cuda/ivote/vote_atomic_bb.cuh>
++
++#include "local_max.cuh"
++#include "update_dir_bb.cuh"
++#include "vote_atomic_bb.cuh"
  namespace stim{
  	namespace cuda{

stim/cuda/ivote/local_max.cuh renamed to stim/iVote/ivote2/local_max.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir.cuh renamed to stim/iVote/ivote2/update_dir.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir_bb.cuh renamed to stim/iVote/ivote2/update_dir_bb.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir_shared.cuh renamed to stim/iVote/ivote2/update_dir_shared.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/update_dir_threshold_global.cuh renamed to stim/iVote/ivote2/update_dir_threshold_global.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote.cuh renamed to stim/iVote/ivote2/vote.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_atomic.cuh renamed to stim/iVote/ivote2/vote_atomic.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_atomic_bb.cuh renamed to stim/iVote/ivote2/vote_atomic_bb.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_atomic_shared.cuh renamed to stim/iVote/ivote2/vote_atomic_shared.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_shared.cuh renamed to stim/iVote/ivote2/vote_shared.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_shared_32-32.cuh renamed to stim/iVote/ivote2/vote_shared_32-32.cuh

Show/Hide comments View file @9d087c9

stim/cuda/ivote/vote_threshold_global.cuh renamed to stim/iVote/ivote2/vote_threshold_global.cuh

Show/Hide comments View file @9d087c9