Commit f12505fb9c8dd14e19310250c06f1665f111a21a
1 parent
c9aba18a
upload the ivote code that works well on the small size of image ( not working f…
…or 512 cube) and float precision
Showing
5 changed files
with
242 additions
and
164 deletions
Show diff stats
cpp/CMakeLists.txt
@@ -46,4 +46,5 @@ target_link_libraries(ivote3 | @@ -46,4 +46,5 @@ target_link_libraries(ivote3 | ||
46 | ) | 46 | ) |
47 | 47 | ||
48 | #copy an image test case | 48 | #copy an image test case |
49 | -configure_file(data/nissl/nissl-rat.vol nissl-rat.vol COPYONLY) | 49 | +configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) |
50 | +configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) |
cpp/cudafunc.cu
1 | #include "gaussian_blur3.cuh" | 1 | #include "gaussian_blur3.cuh" |
2 | #include "gradient3.cuh" | 2 | #include "gradient3.cuh" |
3 | +#include "mag3.cuh" | ||
4 | +#include "vote3.cuh" | ||
5 | +#include "update_dir3.cuh" | ||
6 | +#include "local_max3.cuh" | ||
3 | 7 | ||
4 | 8 | ||
5 | -void ivote3(float* out, float* img, float sigma, unsigned int x, unsigned int y, unsigned int z){ | ||
6 | - //cpu_gaussian_blur_conv3<float>(Ib, img, rhs, x, y, z); | ||
7 | - | 9 | +void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[], |
10 | + int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ | ||
11 | + | ||
12 | + | ||
13 | + // compute the number of bytes in the input data | ||
8 | unsigned int bytes = x * y * z * sizeof(float); | 14 | unsigned int bytes = x * y * z * sizeof(float); |
15 | + | ||
16 | + //assign memory on gpu for the input data. | ||
9 | float* gpuI0; | 17 | float* gpuI0; |
10 | - cudaMalloc(&gpuI0, bytes); | ||
11 | - //copy the image data to the GPU | 18 | + cudaMalloc(&gpuI0, bytes); |
19 | + | ||
20 | + //copy the image data to the GPU. | ||
12 | cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); | 21 | cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); |
13 | 22 | ||
23 | + //call the blurring function from the gpu. | ||
14 | gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); | 24 | gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); |
15 | 25 | ||
26 | + cudaDeviceSynchronize(); | ||
27 | + | ||
28 | + //copy the blur data back to the cpu | ||
29 | + //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); | ||
30 | + | ||
31 | + //assign memory on the gpu for the gradient along the X, y, z. | ||
16 | float* gpu_grad; | 32 | float* gpu_grad; |
17 | cudaMalloc(&gpu_grad, bytes*3); | 33 | cudaMalloc(&gpu_grad, bytes*3); |
18 | 34 | ||
19 | - gpu_gradient3(gpu_grad, gpuI0, x, y, z); | 35 | + //call the gradient function from the gpu. |
36 | + gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z); | ||
37 | + cudaFree(gpuI0); | ||
38 | + | ||
39 | + //assign memory on the gpu for the gradient magnitude | ||
40 | + float* gpu_mag; | ||
41 | + cudaMalloc(&gpu_mag, bytes); | ||
42 | + | ||
43 | + //call the magnitude function | ||
44 | + gpu_mag3<float>(gpu_mag, gpu_grad, x, y, z); | ||
45 | + //cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost); | ||
46 | + //assign memory on the gpu for the vote. | ||
47 | + float* gpu_vote; | ||
48 | + cudaMalloc(&gpu_vote, bytes); | ||
20 | 49 | ||
21 | - //copy the image data back to the cpu | ||
22 | - cudaMemcpy(img, gpuI0, x*y*z*sizeof(float), cudaMemcpyDeviceToHost); | ||
23 | - cudaMemcpy(out, gpu_grad, bytes*3, cudaMemcpyDeviceToHost); | 50 | + float cos_phi = cos(phi); |
51 | + | ||
52 | + //call the vote function. | ||
53 | + for (int i = 0; i < iter; i++){ | ||
54 | + | ||
55 | + gpu_vote3<float>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z); | ||
56 | + cudaDeviceSynchronize(); | ||
57 | + if (i==0) | ||
58 | + cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); | ||
59 | + | ||
60 | + if (phi >= d_phi){ | ||
61 | + gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); | ||
62 | + cudaDeviceSynchronize(); | ||
63 | + phi = phi - d_phi; | ||
64 | + cos_phi = cos(phi); | ||
65 | + } | ||
66 | + | ||
67 | + } | ||
24 | 68 | ||
25 | - cudaFree(gpuI0); | ||
26 | cudaFree(gpu_grad); | 69 | cudaFree(gpu_grad); |
70 | + cudaFree(gpu_mag); | ||
71 | + //cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost); | ||
72 | + | ||
73 | + //allocate space on the gpu for the final detected cells. | ||
74 | + float* gpu_output; | ||
75 | + cudaMalloc(&gpu_output, bytes); | ||
76 | + | ||
77 | + //call the local max function | ||
78 | + gpu_local_max3<float>(gpu_output, gpu_vote, t, conn, x, y, z); | ||
79 | + | ||
80 | + //copy the final result to the cpu. | ||
81 | + cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); | ||
82 | + | ||
83 | + | ||
84 | + cudaFree(gpu_vote); | ||
85 | + cudaFree(gpu_output); | ||
86 | + | ||
27 | } | 87 | } |
28 | \ No newline at end of file | 88 | \ No newline at end of file |
cpp/gaussian_blur3.cuh
@@ -6,6 +6,7 @@ | @@ -6,6 +6,7 @@ | ||
6 | #include <stim/cuda/cudatools.h> | 6 | #include <stim/cuda/cudatools.h> |
7 | #include <stim/cuda/sharedmem.cuh> | 7 | #include <stim/cuda/sharedmem.cuh> |
8 | #include <stim/cuda/cudatools/error.h> | 8 | #include <stim/cuda/cudatools/error.h> |
9 | +#include "cuda_fp16.h" | ||
9 | 10 | ||
10 | #define pi 3.14159 | 11 | #define pi 3.14159 |
11 | 12 | ||
@@ -13,14 +14,16 @@ | @@ -13,14 +14,16 @@ | ||
13 | template<typename T> | 14 | template<typename T> |
14 | __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){ | 15 | __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){ |
15 | 16 | ||
16 | - | ||
17 | //calculate x,y,z coordinates for this thread | 17 | //calculate x,y,z coordinates for this thread |
18 | int xi = blockIdx.x * blockDim.x + threadIdx.x; | 18 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
19 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
20 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | 19 | + //find the grid size along y |
20 | + int grid_y = y / blockDim.y; | ||
21 | + int blockidx_y = blockIdx.y % grid_y; | ||
22 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
23 | + int zi = blockIdx.y / grid_y; | ||
21 | int i = zi * x * y + yi * x + xi; | 24 | int i = zi * x * y + yi * x + xi; |
22 | 25 | ||
23 | - // calculate the kernel size | 26 | + // calculate the kernel size |
24 | int k_size = sigma * 4; | 27 | int k_size = sigma * 4; |
25 | 28 | ||
26 | //if the current pixel is outside of the image | 29 | //if the current pixel is outside of the image |
@@ -28,37 +31,26 @@ | @@ -28,37 +31,26 @@ | ||
28 | return; | 31 | return; |
29 | 32 | ||
30 | 33 | ||
31 | - int gx, gy, gz, gi; | 34 | + int gx, gi; |
32 | T G; | 35 | T G; |
33 | T sum = 0; //running weighted sum across the kernel | 36 | T sum = 0; //running weighted sum across the kernel |
34 | out[i] = 0; | 37 | out[i] = 0; |
35 | - | 38 | + T sigma_sq = 2 * sigma * sigma; |
39 | + T a = 1.0 / (sigma * sqrt(2 * pi)); | ||
36 | 40 | ||
37 | //for each element of the kernel | 41 | //for each element of the kernel |
38 | for(int ki = -k_size; ki <= k_size; ki++){ | 42 | for(int ki = -k_size; ki <= k_size; ki++){ |
39 | 43 | ||
40 | //calculate the gaussian value | 44 | //calculate the gaussian value |
41 | - G = (1.0 / (sigma * sqrt(2 * pi))) * exp(-(ki*ki) / (2*sigma*sigma)); | ||
42 | - | ||
43 | - | ||
44 | - | 45 | + G = a * exp(-(ki*ki) / (sigma_sq)); |
45 | //calculate the global coordinates for this point in the kernel | 46 | //calculate the global coordinates for this point in the kernel |
46 | - gx = xi + ki; | ||
47 | - gy = yi; | ||
48 | - gz = zi; | ||
49 | - | ||
50 | - //make sure we are inside the bounds of the image | ||
51 | - if(gx >= 0 && gx < x ){ | ||
52 | - gi = gz * x * y + gy * x + gx; | ||
53 | - //perform the integration | ||
54 | - sum += G * in[gi]; | ||
55 | - } | ||
56 | - | 47 | + gx = (xi + ki) % x; |
48 | + gi = zi * x * y + yi * x + gx; | ||
49 | + sum += G * in[gi]; | ||
57 | } | 50 | } |
58 | 51 | ||
59 | //output the result to global memory | 52 | //output the result to global memory |
60 | out[i] = sum; | 53 | out[i] = sum; |
61 | - | ||
62 | } | 54 | } |
63 | 55 | ||
64 | 56 | ||
@@ -67,11 +59,14 @@ | @@ -67,11 +59,14 @@ | ||
67 | 59 | ||
68 | //calculate x,y,z coordinates for this thread | 60 | //calculate x,y,z coordinates for this thread |
69 | int xi = blockIdx.x * blockDim.x + threadIdx.x; | 61 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
70 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
71 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | 62 | + //find the grid size along y |
63 | + int grid_y = y / blockDim.y; | ||
64 | + int blockidx_y = blockIdx.y % grid_y; | ||
65 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
66 | + int zi = blockIdx.y / grid_y; | ||
72 | int i = zi * x * y + yi * x + xi; | 67 | int i = zi * x * y + yi * x + xi; |
73 | - | ||
74 | - // calculate the kernel size | 68 | + |
69 | + // calculate the kernel size | ||
75 | int k_size = sigma * 4; | 70 | int k_size = sigma * 4; |
76 | 71 | ||
77 | //if the current pixel is outside of the image | 72 | //if the current pixel is outside of the image |
@@ -79,34 +74,25 @@ | @@ -79,34 +74,25 @@ | ||
79 | return; | 74 | return; |
80 | 75 | ||
81 | 76 | ||
82 | - int gx, gy, gz, gi; | 77 | + int gy, gi; |
83 | T G; | 78 | T G; |
84 | T sum = 0; //running weighted sum across the kernel | 79 | T sum = 0; //running weighted sum across the kernel |
85 | out[i] = 0; | 80 | out[i] = 0; |
86 | - | 81 | + T sigma_sq = 2 * sigma * sigma; |
82 | + T a = 1.0 / (sigma * sqrt(2 * pi)); | ||
87 | 83 | ||
88 | //for each element of the kernel | 84 | //for each element of the kernel |
89 | for(int ki = -k_size; ki <= k_size; ki++){ | 85 | for(int ki = -k_size; ki <= k_size; ki++){ |
90 | 86 | ||
91 | //calculate the gaussian value | 87 | //calculate the gaussian value |
92 | - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | ||
93 | - | ||
94 | - | ||
95 | - | 88 | + G = a * exp(-(ki*ki) / sigma_sq); |
96 | //calculate the global coordinates for this point in the kernel | 89 | //calculate the global coordinates for this point in the kernel |
97 | - gx = xi; | ||
98 | - gy = yi + ki; | ||
99 | - gz = zi; | ||
100 | - | ||
101 | - //make sure we are inside the bounds of the image | ||
102 | - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){ | ||
103 | - gi = gz * x * y + gy * x + gx; | ||
104 | - //perform the integration | ||
105 | - sum += G * in[gi]; | ||
106 | - } | ||
107 | - | 90 | + gy = (yi + ki ) % y; |
91 | + gi = zi * x * y + gy * x + xi; | ||
92 | + sum += G * in[gi]; | ||
108 | } | 93 | } |
109 | 94 | ||
95 | + | ||
110 | //output the result to global memory | 96 | //output the result to global memory |
111 | out[i] = sum; | 97 | out[i] = sum; |
112 | } | 98 | } |
@@ -116,11 +102,14 @@ | @@ -116,11 +102,14 @@ | ||
116 | 102 | ||
117 | //calculate x,y,z coordinates for this thread | 103 | //calculate x,y,z coordinates for this thread |
118 | int xi = blockIdx.x * blockDim.x + threadIdx.x; | 104 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
119 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
120 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | 105 | + //find the grid size along y |
106 | + int grid_y = y / blockDim.y; | ||
107 | + int blockidx_y = blockIdx.y % grid_y; | ||
108 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
109 | + int zi = blockIdx.y / grid_y; | ||
121 | int i = zi * x * y + yi * x + xi; | 110 | int i = zi * x * y + yi * x + xi; |
122 | - | ||
123 | - // calculate the kernel size | 111 | + |
112 | + // calculate the kernel size | ||
124 | int k_size = sigma * 4; | 113 | int k_size = sigma * 4; |
125 | 114 | ||
126 | //if the current pixel is outside of the image | 115 | //if the current pixel is outside of the image |
@@ -128,32 +117,22 @@ | @@ -128,32 +117,22 @@ | ||
128 | return; | 117 | return; |
129 | 118 | ||
130 | 119 | ||
131 | - int gx, gy, gz, gi; | 120 | + int gz, gi; |
132 | T G; | 121 | T G; |
133 | T sum = 0; //running weighted sum across the kernel | 122 | T sum = 0; //running weighted sum across the kernel |
134 | out[i] = 0; | 123 | out[i] = 0; |
135 | - | 124 | + T sigma_sq = 2 * sigma * sigma; |
125 | + T a = 1.0 / (sigma * sqrt(2 * pi)); | ||
136 | 126 | ||
137 | //for each element of the kernel | 127 | //for each element of the kernel |
138 | for(int ki = -k_size; ki <= k_size; ki++){ | 128 | for(int ki = -k_size; ki <= k_size; ki++){ |
139 | 129 | ||
140 | //calculate the gaussian value | 130 | //calculate the gaussian value |
141 | - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | ||
142 | - | ||
143 | - | ||
144 | - | 131 | + G = a * exp(-(ki*ki) / sigma_sq); |
145 | //calculate the global coordinates for this point in the kernel | 132 | //calculate the global coordinates for this point in the kernel |
146 | - gx = xi; | ||
147 | - gy = yi; | ||
148 | - gz = zi + ki; | ||
149 | - | ||
150 | - //make sure we are inside the bounds of the image | ||
151 | - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){ | ||
152 | - gi = gz * x * y + gy * x + gx; | ||
153 | - //perform the integration | ||
154 | - sum += G * in[gi]; | ||
155 | - } | ||
156 | - | 133 | + gz = (zi + ki) % z; |
134 | + gi = gz * x * y + yi * x + xi; | ||
135 | + sum += G * in[gi]; | ||
157 | } | 136 | } |
158 | 137 | ||
159 | //output the result to global memory | 138 | //output the result to global memory |
@@ -161,15 +140,15 @@ | @@ -161,15 +140,15 @@ | ||
161 | } | 140 | } |
162 | 141 | ||
163 | template<typename T> | 142 | template<typename T> |
164 | - void gpu_gaussian_blur3(T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){ | 143 | + void gpu_gaussian_blur3(T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){ |
165 | 144 | ||
166 | //get the number of pixels in the image | 145 | //get the number of pixels in the image |
167 | unsigned int pixels = x * y * z; | 146 | unsigned int pixels = x * y * z; |
168 | unsigned int bytes = sizeof(T) * pixels; | 147 | unsigned int bytes = sizeof(T) * pixels; |
169 | 148 | ||
170 | int max_threads = stim::maxThreadsPerBlock(); | 149 | int max_threads = stim::maxThreadsPerBlock(); |
171 | - dim3 threads(512, 1, 1); | ||
172 | - dim3 blocks(x / threads.x + 1, y, z); | 150 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
151 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | ||
173 | 152 | ||
174 | //allocate temporary space on the GPU | 153 | //allocate temporary space on the GPU |
175 | T* gpuIb_x; | 154 | T* gpuIb_x; |
@@ -179,26 +158,25 @@ | @@ -179,26 +158,25 @@ | ||
179 | T* gpuIb_y; | 158 | T* gpuIb_y; |
180 | cudaMalloc(&gpuIb_y, bytes); | 159 | cudaMalloc(&gpuIb_y, bytes); |
181 | 160 | ||
182 | - // blur the original image in the x direction | ||
183 | - blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma, x, y, z); | 161 | + // blur the original image along the x direction |
162 | + blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma[0], x, y, z); | ||
184 | 163 | ||
185 | - // blur the original image in the y direction | ||
186 | - blur_y<float> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma, x, y, z); | 164 | + // blur the x-blurred image along the y direction |
165 | + blur_y<T> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma[1], x, y, z); | ||
187 | 166 | ||
188 | - // blur the original image in the z direction | ||
189 | - blur_z<float> <<< blocks, threads >>>(image, gpuIb_y, sigma, x, y, z); | 167 | + // blur the xy-blurred image along the z direction |
168 | + blur_z<T> <<< blocks, threads >>>(image, gpuIb_y, sigma[2], x, y, z); | ||
190 | 169 | ||
191 | //cudaMemcpy(image, gpuIb_y, bytes, cudaMemcpyDeviceToDevice); | 170 | //cudaMemcpy(image, gpuIb_y, bytes, cudaMemcpyDeviceToDevice); |
192 | 171 | ||
193 | cudaFree(gpuIb_x); | 172 | cudaFree(gpuIb_x); |
194 | cudaFree(gpuIb_y); | 173 | cudaFree(gpuIb_y); |
195 | - | ||
196 | } | 174 | } |
197 | 175 | ||
198 | 176 | ||
199 | /// Applies a Gaussian blur to a 2D image stored on the CPU | 177 | /// Applies a Gaussian blur to a 2D image stored on the CPU |
200 | template<typename T> | 178 | template<typename T> |
201 | - void cpu_gaussian_blur3(T* blur, T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){ | 179 | + void cpu_gaussian_blur3(T* blur, T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){ |
202 | 180 | ||
203 | 181 | ||
204 | //get the number of pixels in the image | 182 | //get the number of pixels in the image |
cpp/gradient3.cuh
@@ -7,23 +7,30 @@ | @@ -7,23 +7,30 @@ | ||
7 | #include <stim/cuda/cudatools/error.h> | 7 | #include <stim/cuda/cudatools/error.h> |
8 | 8 | ||
9 | template<typename T> | 9 | template<typename T> |
10 | -__global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigned int z){ | 10 | +__global__ void gradient3(T* out, T* in, int x, int y, int z){ |
11 | 11 | ||
12 | //calculate x,y,z coordinates for this thread | 12 | //calculate x,y,z coordinates for this thread |
13 | int xi = blockIdx.x * blockDim.x + threadIdx.x; | 13 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
14 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
15 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | 14 | + //find the grid size along y |
15 | + int grid_y = y / blockDim.y; | ||
16 | + int blockidx_y = blockIdx.y % grid_y; | ||
17 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
18 | + int zi = blockIdx.y / grid_y; | ||
16 | int i = zi * x * y + yi * x + xi; | 19 | int i = zi * x * y + yi * x + xi; |
17 | 20 | ||
18 | //return if the pixel is outside of the image | 21 | //return if the pixel is outside of the image |
19 | - if(xi >= x || yi >= y || zi>=z) | ||
20 | - return; | 22 | + if(xi >= x || yi >= y || zi>=z) return; |
21 | 23 | ||
22 | //calculate indices for the forward difference | 24 | //calculate indices for the forward difference |
23 | int i_xp = zi * x * y + yi * x + (xi + 1); | 25 | int i_xp = zi * x * y + yi * x + (xi + 1); |
24 | int i_yp = zi * x * y + (yi + 1) * x + xi; | 26 | int i_yp = zi * x * y + (yi + 1) * x + xi; |
25 | int i_zp = (zi + 1) * x * y + yi * x + xi; | 27 | int i_zp = (zi + 1) * x * y + yi * x + xi; |
26 | 28 | ||
29 | + //calculate indices for the backward difference | ||
30 | + int i_xn = zi * x * y + yi * x + (xi - 1); | ||
31 | + int i_yn = zi * x * y + (yi - 1) * x + xi; | ||
32 | + int i_zn = (zi - 1) * x * y + yi * x + xi; | ||
33 | + | ||
27 | //use forward differences if a coordinate is zero | 34 | //use forward differences if a coordinate is zero |
28 | if(xi == 0) | 35 | if(xi == 0) |
29 | out[i * 3 + 0] = in[i_xp] - in[i]; | 36 | out[i * 3 + 0] = in[i_xp] - in[i]; |
@@ -32,11 +39,6 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne | @@ -32,11 +39,6 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne | ||
32 | if (zi==0) | 39 | if (zi==0) |
33 | out[i * 3 + 2] = in[i_zp] - in[i]; | 40 | out[i * 3 + 2] = in[i_zp] - in[i]; |
34 | 41 | ||
35 | - //calculate indices for the backward difference | ||
36 | - int i_xn = zi * x * y + yi * x + (xi - 1); | ||
37 | - int i_yn = zi * x * y + (yi - 1) * x + xi; | ||
38 | - int i_zn = (zi - 1) * x * y + yi * x + xi; | ||
39 | - | ||
40 | //use backward differences if the coordinate is at the maximum edge | 42 | //use backward differences if the coordinate is at the maximum edge |
41 | if(xi == x-1) | 43 | if(xi == x-1) |
42 | out[i * 3 + 0] = in[i] - in[i_xn]; | 44 | out[i * 3 + 0] = in[i] - in[i_xn]; |
@@ -48,11 +50,10 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne | @@ -48,11 +50,10 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne | ||
48 | //otherwise use central differences | 50 | //otherwise use central differences |
49 | if(xi > 0 && xi < x-1) | 51 | if(xi > 0 && xi < x-1) |
50 | out[i * 3 + 0] = (in[i_xp] - in[i_xn]) / 2; | 52 | out[i * 3 + 0] = (in[i_xp] - in[i_xn]) / 2; |
51 | - | ||
52 | if(yi > 0 && yi < y-1) | 53 | if(yi > 0 && yi < y-1) |
53 | out[i * 3 + 1] = (in[i_yp] - in[i_yn]) / 2; | 54 | out[i * 3 + 1] = (in[i_yp] - in[i_yn]) / 2; |
54 | if(zi > 0 && zi < z-1) | 55 | if(zi > 0 && zi < z-1) |
55 | - out[i * 3 + 1] = (in[i_zp] - in[i_zn]) / 2; | 56 | + out[i * 3 + 2] = (in[i_zp] - in[i_zn]) / 2; |
56 | 57 | ||
57 | } | 58 | } |
58 | 59 | ||
@@ -60,12 +61,10 @@ template<typename T> | @@ -60,12 +61,10 @@ template<typename T> | ||
60 | 61 | ||
61 | void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned int z){ | 62 | void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned int z){ |
62 | 63 | ||
63 | - //get the number of pixels in the image | ||
64 | - unsigned int pixels = x * y * z; | ||
65 | - | 64 | + |
66 | int max_threads = stim::maxThreadsPerBlock(); | 65 | int max_threads = stim::maxThreadsPerBlock(); |
67 | - dim3 threads(512, 1, 1); | ||
68 | - dim3 blocks(x / threads.x + 1, y, z); | 66 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); |
67 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | ||
69 | 68 | ||
70 | //call the GPU kernel to determine the gradient | 69 | //call the GPU kernel to determine the gradient |
71 | gradient3<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y, z); | 70 | gradient3<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y, z); |
cpp/main.cpp
1 | #include <iostream> | 1 | #include <iostream> |
2 | #include <fstream> | 2 | #include <fstream> |
3 | -//#include<stdio.h> | ||
4 | #include <cuda_runtime.h> | 3 | #include <cuda_runtime.h> |
5 | #include <stim/math/vector.h> | 4 | #include <stim/math/vector.h> |
6 | #include <stim/parser/arguments.h> | 5 | #include <stim/parser/arguments.h> |
@@ -8,85 +7,126 @@ | @@ -8,85 +7,126 @@ | ||
8 | #include <stim/grids/image_stack.h> | 7 | #include <stim/grids/image_stack.h> |
9 | #include <stim/grids/grid.h> | 8 | #include <stim/grids/grid.h> |
10 | #include <stim/visualization/colormap.h> | 9 | #include <stim/visualization/colormap.h> |
11 | - | 10 | +#include <stim/image/image.h> |
12 | #define pi 3.14159 | 11 | #define pi 3.14159 |
13 | 12 | ||
14 | 13 | ||
15 | -void ivote3(float* out, float* img, float std, unsigned int x, unsigned int y, unsigned int z); | ||
16 | - | ||
17 | -int main(){ | ||
18 | - | ||
19 | - float sigma =3; | ||
20 | - | ||
21 | - std::string filename = "nissl-float-512.512.512.vol"; | ||
22 | - unsigned int x = 512; | ||
23 | - unsigned int y = 512; | ||
24 | - unsigned int z = 512; | 14 | +void ivote3(float* center, float* img, float std[], float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], |
15 | + unsigned int x, unsigned int y, unsigned int z); | ||
16 | + | ||
17 | +void invert_data(float* cpuI, unsigned int x, unsigned int y, unsigned int z){ | ||
18 | + for(int ix = 0; ix < x; ix++){ | ||
19 | + for (int iy = 0; iy < y; iy++){ | ||
20 | + for (int iz = 0; iz < z; iz++){ | ||
21 | + int idx = iz * x * y + iy * x + ix; | ||
22 | + cpuI[idx] = 255 - cpuI[idx]; | ||
23 | + } | ||
24 | + } | ||
25 | + } | ||
26 | + } | ||
27 | + | ||
28 | +int main(int argc, char** argv){ | ||
29 | + | ||
30 | + //output advertisement | ||
31 | + std::cout<<std::endl<<std::endl; | ||
32 | + std::cout<<"========================================================================="<<std::endl; | ||
33 | + std::cout<<"Thank you for using the ivote3 segmentation tool!"<<std::endl; | ||
34 | + std::cout<<"Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston"<<std::endl; | ||
35 | + std::cout<<"Developers: Laila Saadatifard and David Mayerich"<<std::endl; | ||
36 | + std::cout<<"Source: https://git.stim.ee.uh.edu/segmentation/ivote3"<<std::endl; | ||
37 | + std::cout<<"========================================================================="<<std::endl<<std::endl; | ||
38 | + | ||
39 | + stim::arglist args; | ||
40 | + | ||
41 | +#ifdef _WIN32 | ||
42 | + args.set_ansi(false); | ||
43 | +#endif | ||
44 | + | ||
45 | + //add arduments | ||
46 | + args.add("help", "prints this help"); | ||
47 | + args.add("x", "size of the dataset along X axis", "positive value"); | ||
48 | + args.add("y", "size of the dataset along Y axis", "positive value"); | ||
49 | + args.add("z", "size of the dataset along Z axis", "positive value"); | ||
50 | + args.add("t", "threshold value for the final result", "positive valu"); | ||
51 | + //parse the command line arguments. | ||
52 | + args.parse(argc, argv); | ||
53 | + | ||
54 | + //display the help text if requested | ||
55 | + if(args["help"].is_set()){ | ||
56 | + std::cout<<std::endl<<"usage: ivote input_image output_list --option [A B C ...]"<<std::endl; | ||
57 | + std::cout<<std::endl<<std::endl | ||
58 | + << "examples: ivote blue.bmp list.txt "<<std::endl; | ||
59 | + | ||
60 | + std::cout<<std::endl<<std::endl; | ||
61 | + std::cout<<args.str()<<std::endl; | ||
62 | + exit(1); | ||
63 | + } | ||
64 | + | ||
65 | + //if the input and output files aren't specified, throw an error and exit | ||
66 | + if(args.nargs() < 2){ | ||
67 | + std::cout<<"ERROR: two files must be specified for segmentation, enter ivote --help for options."<<std::endl<<std::endl; | ||
68 | + exit(1); | ||
69 | + } | ||
70 | + | ||
71 | + //get the input image file | ||
72 | + stim::filename Ifilename(args.arg(0)); | ||
73 | + | ||
74 | + //get the output file name | ||
75 | + stim::filename OutName(args.arg(1)); | ||
76 | + | ||
77 | + //set the x, y, z. | ||
78 | + int x = args["x"].as_int(); | ||
79 | + int y = args["y"].as_int(); | ||
80 | + int z = args["z"].as_int(); | ||
81 | + | ||
82 | + //set the threshold. | ||
83 | + float t = args["t"].as_float(); | ||
84 | + | ||
85 | + unsigned int r[3] = { 9, 9, 5}; | ||
86 | + float sigma[3] = { 3, 3, 1.5}; | ||
87 | + unsigned int conn[3] = { 5, 5, 3}; | ||
88 | + float phi_deg = 20.1; | ||
89 | + float phi = phi_deg * pi /180; | ||
90 | + int iter = 2; | ||
91 | + float d_phi = phi/(iter -1); | ||
92 | + | ||
93 | + std::string filename = Ifilename.str(); | ||
25 | unsigned int bytes = x*y*z*sizeof(float); | 94 | unsigned int bytes = x*y*z*sizeof(float); |
26 | 95 | ||
96 | + //allocate space on the cpu for the input data | ||
27 | float* cpuI = (float*) malloc(bytes); | 97 | float* cpuI = (float*) malloc(bytes); |
28 | 98 | ||
99 | + //load the input file into the cpuI | ||
29 | std::ifstream nissl(filename, std::ios::in | std::ios::binary); | 100 | std::ifstream nissl(filename, std::ios::in | std::ios::binary); |
30 | nissl.read((char*)cpuI, bytes); | 101 | nissl.read((char*)cpuI, bytes); |
31 | nissl.close(); | 102 | nissl.close(); |
32 | 103 | ||
33 | - | ||
34 | - std::ofstream original("original.vol", std::ofstream::out | std::ofstream::binary); | 104 | + invert_data(cpuI, x, y, z); |
105 | + | ||
106 | + //write a new file from the cpuI. | ||
107 | + std::ofstream original("output/0-original_invert.vol", std::ofstream::out | std::ofstream::binary); | ||
35 | original.write((char*)cpuI, bytes); | 108 | original.write((char*)cpuI, bytes); |
36 | original.close(); | 109 | original.close(); |
37 | - | ||
38 | - float* cpu_grad = (float*) malloc(bytes*3); | ||
39 | - | ||
40 | - ivote3(cpu_grad, cpuI, sigma, x, y, z); | ||
41 | - | ||
42 | - std::ofstream blur("blur.vol", std::ofstream::out | std::ofstream::binary); | ||
43 | - blur.write((char*)cpuI, bytes); | ||
44 | - blur.close(); | ||
45 | - | ||
46 | - | ||
47 | 110 | ||
48 | - | ||
49 | - // load the imagestack from the given directory | ||
50 | - //std::string file_mask = "D:\\source\\ivote3\\data\\nissl\\*.png"; | ||
51 | - /*stim::image_stack<float> I; | ||
52 | - unsigned int x =512; | ||
53 | - unsigned int y =512; | ||
54 | - unsigned int z =512; | ||
55 | - unsigned int bytes = x*y*z* sizeof(float); | 111 | + //allocate space on the cpu for the output result |
112 | + float* cpu_out = (float*) malloc(bytes); | ||
56 | 113 | ||
57 | - I.read("nissl-float-512.512.512.vol", 512, 512, 512, 1, 0); | ||
58 | - //float test = I.get(128, 128, 128); | ||
59 | - //std::cout<< test<<std::endl; | ||
60 | - float* ptr0 = I.data(); | ||
61 | - | ||
62 | - //stim::cpu2image<float>(I.data(), "after_loading.bmp", 512, 512); | ||
63 | - | ||
64 | - float* cpuI = (float*) malloc(bytes); | 114 | + // call the ivote function |
115 | + ivote3(cpu_out, cpuI, sigma, phi, d_phi, r, iter, t, conn, x, y, z); | ||
65 | 116 | ||
66 | - memcpy (cpuI, ptr0, bytes); | ||
67 | - | ||
68 | - | ||
69 | - std::ofstream infile ("original-2.vol"); | ||
70 | - infile.write((char*)cpuI, bytes); | ||
71 | - infile.close();*/ | ||
72 | - //stim::cpu2image(cpuI, "00-cpuI.bmp", 512, 1024); | 117 | + //write the blurred file from the cpuI. |
118 | + std::ofstream fblur("output/test0.vol", std::ofstream::out | std::ofstream::binary); | ||
119 | + fblur.write((char*)cpuI, bytes); | ||
120 | + fblur.close(); | ||
121 | + | ||
122 | + //write the output file. | ||
123 | + std::ofstream fo("output/" + OutName.str(), std::ofstream::out | std::ofstream::binary); | ||
124 | + fo.write((char*)cpu_out, bytes); | ||
125 | + fo.close(); | ||
73 | 126 | ||
74 | 127 | ||
75 | - //std::fstream file2; | ||
76 | - //file2.write((char*)cpuI, 512*512*512*sizeof(unsigned char)); | ||
77 | - //I.write("data\\volume1.raw"); | ||
78 | - //test = ptr0[128*128*128]; | ||
79 | - //std::cout<< test<<std::endl; | ||
80 | 128 | ||
81 | - //memset(I.data(), 0, 512*512*512); | ||
82 | - //I.write("data\\volume.raw"); | ||
83 | - //I.save_images("data\\????.bmp"); | 129 | + cudaDeviceReset(); |
84 | 130 | ||
85 | - | ||
86 | - | ||
87 | - | ||
88 | - | ||
89 | - | ||
90 | - cudaDeviceReset(); | ||
91 | } | 131 | } |
92 | 132 |