Commit 3f2d0f9f5da4d1055f25345e3e54ef5d004f0e7c
1 parent
66304f4f
changing the ivote directory from stim/cuda/ to stim/ and add a cuda header file…
… to include ivote2 function
Showing
17 changed files
with
179 additions
and
229 deletions
Show diff stats
stim/cuda/ivote.cuh deleted
1 | -#ifndef STIM_CUDA_IVOTE_H | ||
2 | -#define STIM_CUDA_IVOTE_H | ||
3 | - | ||
4 | -#include <stim/cuda/ivote/down_sample.cuh> | ||
5 | -#include <stim/cuda/ivote/local_max.cuh> | ||
6 | -#include <stim/cuda/ivote/update_dir.cuh> | ||
7 | -#include <stim/cuda/ivote/vote.cuh> | ||
8 | - | ||
9 | -namespace stim{ | ||
10 | - namespace cuda{ | ||
11 | - | ||
12 | - } | ||
13 | -} | ||
14 | - | ||
15 | - | ||
16 | - | ||
17 | -#endif | ||
18 | \ No newline at end of file | 0 | \ No newline at end of file |
stim/cuda/ivote/down_sample.cuh deleted
1 | -#ifndef STIM_CUDA_DOWN_SAMPLE_H | ||
2 | -#define STIM_CUDA_DOWN_SAMPLE_H | ||
3 | - | ||
4 | -#include <iostream> | ||
5 | -#include <cuda.h> | ||
6 | -#include <stim/cuda/cudatools.h> | ||
7 | -#include <stim/cuda/templates/gaussian_blur.cuh> | ||
8 | - | ||
9 | -namespace stim{ | ||
10 | - namespace cuda{ | ||
11 | - | ||
12 | - template<typename T> | ||
13 | - __global__ void down_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){ | ||
14 | - | ||
15 | - unsigned int sigma_ds = 1/resize; | ||
16 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | ||
17 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | ||
18 | - | ||
19 | - | ||
20 | - // calculate the 2D coordinates for this current thread. | ||
21 | - int xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
22 | - int yi = blockIdx.y; | ||
23 | - // convert 2D coordinates to 1D | ||
24 | - int i = yi * x_ds + xi; | ||
25 | - | ||
26 | - if(xi< x_ds && yi< y_ds){ | ||
27 | - | ||
28 | - int x_org = xi * sigma_ds ; | ||
29 | - int y_org = yi * sigma_ds ; | ||
30 | - int i_org = y_org * x + x_org; | ||
31 | - gpuI[i] = gpuI0[i_org]; | ||
32 | - } | ||
33 | - | ||
34 | - } | ||
35 | - | ||
36 | - | ||
37 | - /// Applies a Gaussian blur to a 2D image stored on the GPU | ||
38 | - template<typename T> | ||
39 | - void gpu_down_sample(T* gpuI, T* gpuI0, T resize, size_t x, size_t y){ | ||
40 | - | ||
41 | - | ||
42 | - unsigned int sigma_ds = (unsigned int)(1.0f/resize); | ||
43 | - size_t x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | ||
44 | - size_t y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | ||
45 | - | ||
46 | - //get the number of pixels in the image | ||
47 | -// unsigned int pixels_ds = x_ds * y_ds; | ||
48 | - | ||
49 | - unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
50 | - dim3 threads(max_threads, 1); | ||
51 | - dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds); | ||
52 | - | ||
53 | - stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y); | ||
54 | - | ||
55 | - //resample the image | ||
56 | - down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y); | ||
57 | - | ||
58 | - } | ||
59 | - | ||
60 | - /// Applies a Gaussian blur to a 2D image stored on the CPU | ||
61 | - template<typename T> | ||
62 | - void cpu_down_sample(T* re_img, T* image, T resize, unsigned int x, unsigned int y){ | ||
63 | - | ||
64 | - //get the number of pixels in the image | ||
65 | - unsigned int pixels = x * y; | ||
66 | - unsigned int bytes = sizeof(T) * pixels; | ||
67 | - | ||
68 | - unsigned int sigma_ds = 1/resize; | ||
69 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | ||
70 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | ||
71 | - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds; | ||
72 | - | ||
73 | - | ||
74 | - | ||
75 | - //allocate space on the GPU for the original image | ||
76 | - T* gpuI0; | ||
77 | - cudaMalloc(&gpuI0, bytes); | ||
78 | - | ||
79 | - | ||
80 | - //copy the image data to the GPU | ||
81 | - cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice); | ||
82 | - | ||
83 | - //allocate space on the GPU for the down sampled image | ||
84 | - T* gpuI; | ||
85 | - cudaMalloc(&gpuI, bytes_ds); | ||
86 | - | ||
87 | - //run the GPU-based version of the algorithm | ||
88 | - gpu_down_sample<T>(gpuI, gpuI0, resize, x, y); | ||
89 | - | ||
90 | - //copy the image data to the GPU | ||
91 | - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice); | ||
92 | - | ||
93 | - cudaFree(gpuI0); | ||
94 | - cudeFree(gpuI); | ||
95 | - } | ||
96 | - | ||
97 | - } | ||
98 | -} | ||
99 | - | ||
100 | -#endif |
stim/cuda/ivote/re_sample.cuh deleted
1 | -#ifndef STIM_CUDA_RE_SAMPLE_H | ||
2 | -#define STIM_CUDA_RE_SAMPLE_H | ||
3 | - | ||
4 | -#include <iostream> | ||
5 | -#include <cuda.h> | ||
6 | -#include <stim/cuda/cudatools.h> | ||
7 | -#include <stim/cuda/templates/gaussian_blur.cuh> | ||
8 | - | ||
9 | -namespace stim{ | ||
10 | - namespace cuda{ | ||
11 | - | ||
12 | - template<typename T> | ||
13 | - __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){ | ||
14 | - | ||
15 | - unsigned int sigma_ds = 1/resize; | ||
16 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | ||
17 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | ||
18 | - | ||
19 | - | ||
20 | - // calculate the 2D coordinates for this current thread. | ||
21 | - int xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
22 | - int yi = blockIdx.y; | ||
23 | - // convert 2D coordinates to 1D | ||
24 | - int i = yi * x + xi; | ||
25 | - | ||
26 | - if(xi< x && yi< y){ | ||
27 | - if(xi%sigma_ds==0){ | ||
28 | - if(yi%sigma_ds==0){ | ||
29 | - gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds]; | ||
30 | - } | ||
31 | - } | ||
32 | - else gpuI[i] = 0; | ||
33 | - | ||
34 | - //int x_org = xi * sigma_ds ; | ||
35 | - //int y_org = yi * sigma_ds ; | ||
36 | - //int i_org = y_org * x + x_org; | ||
37 | - //gpuI[i] = gpuI0[i_org]; | ||
38 | - } | ||
39 | - | ||
40 | - } | ||
41 | - | ||
42 | - | ||
43 | - /// Applies a Gaussian blur to a 2D image stored on the GPU | ||
44 | - template<typename T> | ||
45 | - void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){ | ||
46 | - | ||
47 | - | ||
48 | - //unsigned int sigma_ds = 1/resize; | ||
49 | - //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | ||
50 | - //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | ||
51 | - | ||
52 | - //get the number of pixels in the image | ||
53 | - //unsigned int pixels_ds = x_ds * y_ds; | ||
54 | - | ||
55 | - unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
56 | - dim3 threads(max_threads, 1); | ||
57 | - dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y); | ||
58 | - | ||
59 | - //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y); | ||
60 | - | ||
61 | - //resample the image | ||
62 | - cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y); | ||
63 | - | ||
64 | - } | ||
65 | - | ||
66 | - /// Applies a Gaussian blur to a 2D image stored on the CPU | ||
67 | - template<typename T> | ||
68 | - void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){ | ||
69 | - | ||
70 | - //get the number of pixels in the image | ||
71 | - unsigned int pixels = x*y; | ||
72 | - unsigned int bytes = sizeof(T) * pixels; | ||
73 | - | ||
74 | - unsigned int sigma_ds = 1/resize; | ||
75 | - unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); | ||
76 | - unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); | ||
77 | - unsigned int bytes_ds = sizeof(T) * x_ds * y_ds; | ||
78 | - | ||
79 | - | ||
80 | - | ||
81 | - //allocate space on the GPU for the original image | ||
82 | - T* gpuI0; | ||
83 | - cudaMalloc(&gpuI0, bytes_ds); | ||
84 | - | ||
85 | - | ||
86 | - //copy the image data to the GPU | ||
87 | - cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice); | ||
88 | - | ||
89 | - //allocate space on the GPU for the down sampled image | ||
90 | - T* gpuI; | ||
91 | - cudaMalloc(&gpuI, bytes); | ||
92 | - | ||
93 | - //run the GPU-based version of the algorithm | ||
94 | - gpu_re_sample<T>(gpuI, gpuI0, resize, x, y); | ||
95 | - | ||
96 | - //copy the image data to the GPU | ||
97 | - cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice); | ||
98 | - | ||
99 | - cudaFree(gpuI0); | ||
100 | - cudeFree(gpuI); | ||
101 | - } | ||
102 | - | ||
103 | - } | ||
104 | -} | ||
105 | - | ||
106 | -#endif | ||
107 | \ No newline at end of file | 0 | \ No newline at end of file |
1 | +#ifndef STIM_IVOTE2_CUH | ||
2 | +#define STIM_IVOTE2_CUH | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <fstream> | ||
6 | +#include <stim/cuda/cudatools/error.h> | ||
7 | +#include <stim/cuda/templates/gradient.cuh> | ||
8 | +#include <stim/cuda/arraymath.cuh> | ||
9 | +#include <stim/iVote/ivote2/ivote2.cuh> | ||
10 | +#include <stim/math/constants.h> | ||
11 | +#include <stim/math/vector.h> | ||
12 | +#include <stim/visualization/colormap.h> | ||
13 | + | ||
14 | +namespace stim { | ||
15 | + | ||
16 | + // this function precomputes the atan2 values | ||
17 | + template<typename T> | ||
18 | + void atan_2(T* cpuTable, unsigned int rmax) { | ||
19 | + int xsize = 2 * rmax + 1; //initialize the width and height of the window which atan2 are computed in. | ||
20 | + int ysize = 2 * rmax + 1; | ||
21 | + int yi = rmax; // assign the center coordinates of the atan2 window to yi and xi | ||
22 | + int xi = rmax; | ||
23 | + for (int xt = 0; xt < xsize; xt++) { //for each element in the atan2 table | ||
24 | + for (int yt = 0; yt < ysize; yt++) { | ||
25 | + int id = yt * xsize + xt; //convert the current 2D coordinates to 1D | ||
26 | + int xd = xi - xt; // calculate the distance between the pixel and the center of the atan2 window | ||
27 | + int yd = yi - yt; | ||
28 | + T atan_2d = atan2((T)yd, (T)xd); // calculate the angle between the pixel and the center of the atan2 window and store the result. | ||
29 | + cpuTable[id] = atan_2d; | ||
30 | + } | ||
31 | + } | ||
32 | + } | ||
33 | + | ||
34 | + //this kernel invert the 2D image | ||
35 | + template<typename T> | ||
36 | + __global__ void cuda_invert(T* gpuI, size_t x, size_t y) { | ||
37 | + // calculate the 2D coordinates for this current thread. | ||
38 | + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
39 | + size_t yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
40 | + | ||
41 | + if (xi >= x || yi >= y) return; | ||
42 | + size_t i = yi * x + xi; // convert 2D coordinates to 1D | ||
43 | + gpuI[i] = 255 - gpuI[i]; //invert the pixel intensity | ||
44 | + } | ||
45 | + | ||
46 | + | ||
47 | + | ||
48 | + //this function calculate the threshold using OTSU method | ||
49 | + template<typename T> | ||
50 | + T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) { | ||
51 | + T Imax = pts[0]; //initialize the maximum value to the first one | ||
52 | + T Imin = pts[0]; //initialize the maximum value to the first on | ||
53 | + | ||
54 | + for (size_t n = 0; n < pixels; n++) { //for every value | ||
55 | + if (pts[n] > Imax) { //if the value is higher than the current max | ||
56 | + Imax = pts[n]; | ||
57 | + } | ||
58 | + } | ||
59 | + for (size_t n = 0; n< pixels; n++) { //for every value | ||
60 | + if (pts[n] < Imin) { //if the value is higher than the current max | ||
61 | + Imin = pts[n]; | ||
62 | + } | ||
63 | + } | ||
64 | + | ||
65 | + T th_step = ((Imax - Imin) / th_num); | ||
66 | + vector<T> var_b; | ||
67 | + for (unsigned int t0 = 0; t0 < th_num; t0++) { | ||
68 | + T th = t0 * th_step + Imin; | ||
69 | + unsigned int n_b(0), n_o(0); //these variables save the number of elements that are below and over the threshold | ||
70 | + T m_b(0), m_o(0); //these variables save the mean value for each cluster | ||
71 | + for (unsigned int idx = 0; idx < pixels; idx++) { | ||
72 | + if (pts[idx] <= th) { | ||
73 | + m_b += pts[idx]; | ||
74 | + n_b += 1; | ||
75 | + } | ||
76 | + else { | ||
77 | + m_o += pts[idx]; | ||
78 | + n_o += 1; | ||
79 | + } | ||
80 | + } | ||
81 | + | ||
82 | + m_b = m_b / n_b; //calculate the mean value for the below threshold cluster | ||
83 | + m_o = m_o / n_o; //calculate the mean value for the over threshold cluster | ||
84 | + | ||
85 | + var_b.push_back(n_b * n_o * pow((m_b - m_o), 2)); | ||
86 | + } | ||
87 | + | ||
88 | + vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end()); //finding maximum elements in the vector | ||
89 | + size_t th_idx = std::distance(var_b.begin(), max_var); | ||
90 | + T threshold = Imin + (T)(th_idx * th_step); | ||
91 | + return threshold; | ||
92 | + } | ||
93 | + | ||
94 | + //this function performs the 2D iterative voting algorithm on the image stored in the gpu | ||
95 | + template<typename T> | ||
96 | + void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | ||
97 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | ||
98 | + | ||
99 | + size_t pixels = x * y; //compute the size of input image | ||
100 | + // | ||
101 | + if (invert) { //if inversion is required call the kernel to invert the image | ||
102 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
103 | + dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads)); | ||
104 | + dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1); | ||
105 | + cuda_invert << <blocks, threads >> > (gpuI, x, y); | ||
106 | + } | ||
107 | + // | ||
108 | + size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T)); // create the atan2 table | ||
109 | + T* cpuTable = (T*)malloc(table_bytes); //assign memory on the cpu for atan2 table | ||
110 | + atan_2<T>(cpuTable, rmax); //call the function to precompute the atan2 table | ||
111 | + T* gpuTable; HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes)); | ||
112 | + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice)); //copy atan2 table to the gpu | ||
113 | + | ||
114 | + size_t bytes = pixels* sizeof(T); //calculate the bytes of the input | ||
115 | + float dphi = phi / iter; //change in phi for each iteration | ||
116 | + | ||
117 | + float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2)); //allocate space to store the 2D gradient | ||
118 | + float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image | ||
119 | + | ||
120 | + stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient | ||
121 | + //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI); | ||
122 | + //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); | ||
123 | + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar | ||
124 | + | ||
125 | + for (int i = 0; i < iter; i++) { //for each iteration | ||
126 | + cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0 | ||
127 | + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting | ||
128 | + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions | ||
129 | + phi = phi - dphi; //decrement phi | ||
130 | + } | ||
131 | + stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima | ||
132 | + | ||
133 | + T* pts = (T*)malloc(bytes); //allocate memory on the cpu to store the output of iterative voting | ||
134 | + HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output from gpu to the cpu memory | ||
135 | + | ||
136 | + T threshold; | ||
137 | + if (t == 0) threshold = stim::th_otsu<T>(pts, pixels); //if threshold value is not set call the function to compute the threshold | ||
138 | + else threshold = t; | ||
139 | + | ||
140 | + std::ofstream output; //save the thresholded detected seeds in a text file | ||
141 | + output.open(outname_txt); | ||
142 | + output << "X" << " " << "Y" << " " << "threshold" << "\n"; | ||
143 | + size_t ind; | ||
144 | + for (size_t ix = 0; ix < x; ix++) { | ||
145 | + for (size_t iy = 0; iy < y; iy++) { | ||
146 | + ind = iy * x + ix; | ||
147 | + if (pts[ind] > threshold) { | ||
148 | + output << ix << " " << iy << " " << pts[ind] << "\n"; | ||
149 | + pts[ind] = 1; | ||
150 | + } | ||
151 | + else pts[ind] = 0; | ||
152 | + } | ||
153 | + } | ||
154 | + output.close(); | ||
155 | + | ||
156 | + HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice)); //copy the points to the gpu | ||
157 | + stim::cpu2image(pts, outname_img, x, y); //output the image | ||
158 | + | ||
159 | + } | ||
160 | + | ||
161 | + | ||
162 | + template<typename T> | ||
163 | + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | ||
164 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | ||
165 | + size_t bytes = x*y * sizeof(T); | ||
166 | + T* gpuI; //allocate space on the gpu to save the input image | ||
167 | + HANDLE_ERROR(cudaMalloc(&gpuI, bytes)); | ||
168 | + HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu | ||
169 | + stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote | ||
170 | + HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu | ||
171 | + } | ||
172 | +} | ||
173 | +#endif | ||
0 | \ No newline at end of file | 174 | \ No newline at end of file |
stim/cuda/ivote_atomic_bb.cuh renamed to stim/iVote/ivote2/iter_vote2.cuh
1 | -#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H | ||
2 | -#define STIM_CUDA_IVOTE_ATOMIC_BB_H | 1 | +#ifndef STIM_CUDA_ITER_VOTE2_H |
2 | +#define STIM_CUDA_ITER_VOTE2_H | ||
3 | 3 | ||
4 | extern bool DEBUG; | 4 | extern bool DEBUG; |
5 | -#include <stim/cuda/ivote/down_sample.cuh> | ||
6 | -#include <stim/cuda/ivote/local_max.cuh> | ||
7 | -#include <stim/cuda/ivote/update_dir_bb.cuh> | ||
8 | -#include <stim/cuda/ivote/vote_atomic_bb.cuh> | 5 | + |
6 | +#include "local_max.cuh" | ||
7 | +#include "update_dir_bb.cuh" | ||
8 | +#include "vote_atomic_bb.cuh" | ||
9 | 9 | ||
10 | namespace stim{ | 10 | namespace stim{ |
11 | namespace cuda{ | 11 | namespace cuda{ |
stim/cuda/ivote/local_max.cuh renamed to stim/iVote/ivote2/local_max.cuh
stim/cuda/ivote/update_dir.cuh renamed to stim/iVote/ivote2/update_dir.cuh
stim/cuda/ivote/update_dir_bb.cuh renamed to stim/iVote/ivote2/update_dir_bb.cuh
stim/cuda/ivote/update_dir_shared.cuh renamed to stim/iVote/ivote2/update_dir_shared.cuh
stim/cuda/ivote/update_dir_threshold_global.cuh renamed to stim/iVote/ivote2/update_dir_threshold_global.cuh
stim/cuda/ivote/vote.cuh renamed to stim/iVote/ivote2/vote.cuh
stim/cuda/ivote/vote_atomic.cuh renamed to stim/iVote/ivote2/vote_atomic.cuh
stim/cuda/ivote/vote_atomic_bb.cuh renamed to stim/iVote/ivote2/vote_atomic_bb.cuh
stim/cuda/ivote/vote_atomic_shared.cuh renamed to stim/iVote/ivote2/vote_atomic_shared.cuh
stim/cuda/ivote/vote_shared.cuh renamed to stim/iVote/ivote2/vote_shared.cuh
stim/cuda/ivote/vote_shared_32-32.cuh renamed to stim/iVote/ivote2/vote_shared_32-32.cuh
stim/cuda/ivote/vote_threshold_global.cuh renamed to stim/iVote/ivote2/vote_threshold_global.cuh