Commit f12505fb9c8dd14e19310250c06f1665f111a21a
1 parent
c9aba18a
upload the ivote code that works well on the small size of image ( not working f…
…or 512 cube) and float precision
Showing
5 changed files
with
242 additions
and
164 deletions
Show diff stats
cpp/CMakeLists.txt
... | ... | @@ -46,4 +46,5 @@ target_link_libraries(ivote3 |
46 | 46 | ) |
47 | 47 | |
48 | 48 | #copy an image test case |
49 | -configure_file(data/nissl/nissl-rat.vol nissl-rat.vol COPYONLY) | |
49 | +configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) | |
50 | +configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) | ... | ... |
cpp/cudafunc.cu
1 | 1 | #include "gaussian_blur3.cuh" |
2 | 2 | #include "gradient3.cuh" |
3 | +#include "mag3.cuh" | |
4 | +#include "vote3.cuh" | |
5 | +#include "update_dir3.cuh" | |
6 | +#include "local_max3.cuh" | |
3 | 7 | |
4 | 8 | |
5 | -void ivote3(float* out, float* img, float sigma, unsigned int x, unsigned int y, unsigned int z){ | |
6 | - //cpu_gaussian_blur_conv3<float>(Ib, img, rhs, x, y, z); | |
7 | - | |
9 | +void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[], | |
10 | + int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ | |
11 | + | |
12 | + | |
13 | + // compute the number of bytes in the input data | |
8 | 14 | unsigned int bytes = x * y * z * sizeof(float); |
15 | + | |
16 | + //assign memory on gpu for the input data. | |
9 | 17 | float* gpuI0; |
10 | - cudaMalloc(&gpuI0, bytes); | |
11 | - //copy the image data to the GPU | |
18 | + cudaMalloc(&gpuI0, bytes); | |
19 | + | |
20 | + //copy the image data to the GPU. | |
12 | 21 | cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); |
13 | 22 | |
23 | + //call the blurring function from the gpu. | |
14 | 24 | gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); |
15 | 25 | |
26 | + cudaDeviceSynchronize(); | |
27 | + | |
28 | + //copy the blur data back to the cpu | |
29 | + //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); | |
30 | + | |
31 | + //assign memory on the gpu for the gradient along the X, y, z. | |
16 | 32 | float* gpu_grad; |
17 | 33 | cudaMalloc(&gpu_grad, bytes*3); |
18 | 34 | |
19 | - gpu_gradient3(gpu_grad, gpuI0, x, y, z); | |
35 | + //call the gradient function from the gpu. | |
36 | + gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z); | |
37 | + cudaFree(gpuI0); | |
38 | + | |
39 | + //assign memory on the gpu for the gradient magnitude | |
40 | + float* gpu_mag; | |
41 | + cudaMalloc(&gpu_mag, bytes); | |
42 | + | |
43 | + //call the magnitude function | |
44 | + gpu_mag3<float>(gpu_mag, gpu_grad, x, y, z); | |
45 | + //cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost); | |
46 | + //assign memory on the gpu for the vote. | |
47 | + float* gpu_vote; | |
48 | + cudaMalloc(&gpu_vote, bytes); | |
20 | 49 | |
21 | - //copy the image data back to the cpu | |
22 | - cudaMemcpy(img, gpuI0, x*y*z*sizeof(float), cudaMemcpyDeviceToHost); | |
23 | - cudaMemcpy(out, gpu_grad, bytes*3, cudaMemcpyDeviceToHost); | |
50 | + float cos_phi = cos(phi); | |
51 | + | |
52 | + //call the vote function. | |
53 | + for (int i = 0; i < iter; i++){ | |
54 | + | |
55 | + gpu_vote3<float>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z); | |
56 | + cudaDeviceSynchronize(); | |
57 | + if (i==0) | |
58 | + cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); | |
59 | + | |
60 | + if (phi >= d_phi){ | |
61 | + gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z); | |
62 | + cudaDeviceSynchronize(); | |
63 | + phi = phi - d_phi; | |
64 | + cos_phi = cos(phi); | |
65 | + } | |
66 | + | |
67 | + } | |
24 | 68 | |
25 | - cudaFree(gpuI0); | |
26 | 69 | cudaFree(gpu_grad); |
70 | + cudaFree(gpu_mag); | |
71 | + //cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost); | |
72 | + | |
73 | + //allocate space on the gpu for the final detected cells. | |
74 | + float* gpu_output; | |
75 | + cudaMalloc(&gpu_output, bytes); | |
76 | + | |
77 | + //call the local max function | |
78 | + gpu_local_max3<float>(gpu_output, gpu_vote, t, conn, x, y, z); | |
79 | + | |
80 | + //copy the final result to the cpu. | |
81 | + cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); | |
82 | + | |
83 | + | |
84 | + cudaFree(gpu_vote); | |
85 | + cudaFree(gpu_output); | |
86 | + | |
27 | 87 | } |
28 | 88 | \ No newline at end of file | ... | ... |
cpp/gaussian_blur3.cuh
... | ... | @@ -6,6 +6,7 @@ |
6 | 6 | #include <stim/cuda/cudatools.h> |
7 | 7 | #include <stim/cuda/sharedmem.cuh> |
8 | 8 | #include <stim/cuda/cudatools/error.h> |
9 | +#include "cuda_fp16.h" | |
9 | 10 | |
10 | 11 | #define pi 3.14159 |
11 | 12 | |
... | ... | @@ -13,14 +14,16 @@ |
13 | 14 | template<typename T> |
14 | 15 | __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){ |
15 | 16 | |
16 | - | |
17 | 17 | //calculate x,y,z coordinates for this thread |
18 | 18 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
19 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | |
20 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | |
19 | + //find the grid size along y | |
20 | + int grid_y = y / blockDim.y; | |
21 | + int blockidx_y = blockIdx.y % grid_y; | |
22 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
23 | + int zi = blockIdx.y / grid_y; | |
21 | 24 | int i = zi * x * y + yi * x + xi; |
22 | 25 | |
23 | - // calculate the kernel size | |
26 | + // calculate the kernel size | |
24 | 27 | int k_size = sigma * 4; |
25 | 28 | |
26 | 29 | //if the current pixel is outside of the image |
... | ... | @@ -28,37 +31,26 @@ |
28 | 31 | return; |
29 | 32 | |
30 | 33 | |
31 | - int gx, gy, gz, gi; | |
34 | + int gx, gi; | |
32 | 35 | T G; |
33 | 36 | T sum = 0; //running weighted sum across the kernel |
34 | 37 | out[i] = 0; |
35 | - | |
38 | + T sigma_sq = 2 * sigma * sigma; | |
39 | + T a = 1.0 / (sigma * sqrt(2 * pi)); | |
36 | 40 | |
37 | 41 | //for each element of the kernel |
38 | 42 | for(int ki = -k_size; ki <= k_size; ki++){ |
39 | 43 | |
40 | 44 | //calculate the gaussian value |
41 | - G = (1.0 / (sigma * sqrt(2 * pi))) * exp(-(ki*ki) / (2*sigma*sigma)); | |
42 | - | |
43 | - | |
44 | - | |
45 | + G = a * exp(-(ki*ki) / (sigma_sq)); | |
45 | 46 | //calculate the global coordinates for this point in the kernel |
46 | - gx = xi + ki; | |
47 | - gy = yi; | |
48 | - gz = zi; | |
49 | - | |
50 | - //make sure we are inside the bounds of the image | |
51 | - if(gx >= 0 && gx < x ){ | |
52 | - gi = gz * x * y + gy * x + gx; | |
53 | - //perform the integration | |
54 | - sum += G * in[gi]; | |
55 | - } | |
56 | - | |
47 | + gx = (xi + ki) % x; | |
48 | + gi = zi * x * y + yi * x + gx; | |
49 | + sum += G * in[gi]; | |
57 | 50 | } |
58 | 51 | |
59 | 52 | //output the result to global memory |
60 | 53 | out[i] = sum; |
61 | - | |
62 | 54 | } |
63 | 55 | |
64 | 56 | |
... | ... | @@ -67,11 +59,14 @@ |
67 | 59 | |
68 | 60 | //calculate x,y,z coordinates for this thread |
69 | 61 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
70 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | |
71 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | |
62 | + //find the grid size along y | |
63 | + int grid_y = y / blockDim.y; | |
64 | + int blockidx_y = blockIdx.y % grid_y; | |
65 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
66 | + int zi = blockIdx.y / grid_y; | |
72 | 67 | int i = zi * x * y + yi * x + xi; |
73 | - | |
74 | - // calculate the kernel size | |
68 | + | |
69 | + // calculate the kernel size | |
75 | 70 | int k_size = sigma * 4; |
76 | 71 | |
77 | 72 | //if the current pixel is outside of the image |
... | ... | @@ -79,34 +74,25 @@ |
79 | 74 | return; |
80 | 75 | |
81 | 76 | |
82 | - int gx, gy, gz, gi; | |
77 | + int gy, gi; | |
83 | 78 | T G; |
84 | 79 | T sum = 0; //running weighted sum across the kernel |
85 | 80 | out[i] = 0; |
86 | - | |
81 | + T sigma_sq = 2 * sigma * sigma; | |
82 | + T a = 1.0 / (sigma * sqrt(2 * pi)); | |
87 | 83 | |
88 | 84 | //for each element of the kernel |
89 | 85 | for(int ki = -k_size; ki <= k_size; ki++){ |
90 | 86 | |
91 | 87 | //calculate the gaussian value |
92 | - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | |
93 | - | |
94 | - | |
95 | - | |
88 | + G = a * exp(-(ki*ki) / sigma_sq); | |
96 | 89 | //calculate the global coordinates for this point in the kernel |
97 | - gx = xi; | |
98 | - gy = yi + ki; | |
99 | - gz = zi; | |
100 | - | |
101 | - //make sure we are inside the bounds of the image | |
102 | - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){ | |
103 | - gi = gz * x * y + gy * x + gx; | |
104 | - //perform the integration | |
105 | - sum += G * in[gi]; | |
106 | - } | |
107 | - | |
90 | + gy = (yi + ki ) % y; | |
91 | + gi = zi * x * y + gy * x + xi; | |
92 | + sum += G * in[gi]; | |
108 | 93 | } |
109 | 94 | |
95 | + | |
110 | 96 | //output the result to global memory |
111 | 97 | out[i] = sum; |
112 | 98 | } |
... | ... | @@ -116,11 +102,14 @@ |
116 | 102 | |
117 | 103 | //calculate x,y,z coordinates for this thread |
118 | 104 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
119 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | |
120 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | |
105 | + //find the grid size along y | |
106 | + int grid_y = y / blockDim.y; | |
107 | + int blockidx_y = blockIdx.y % grid_y; | |
108 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
109 | + int zi = blockIdx.y / grid_y; | |
121 | 110 | int i = zi * x * y + yi * x + xi; |
122 | - | |
123 | - // calculate the kernel size | |
111 | + | |
112 | + // calculate the kernel size | |
124 | 113 | int k_size = sigma * 4; |
125 | 114 | |
126 | 115 | //if the current pixel is outside of the image |
... | ... | @@ -128,32 +117,22 @@ |
128 | 117 | return; |
129 | 118 | |
130 | 119 | |
131 | - int gx, gy, gz, gi; | |
120 | + int gz, gi; | |
132 | 121 | T G; |
133 | 122 | T sum = 0; //running weighted sum across the kernel |
134 | 123 | out[i] = 0; |
135 | - | |
124 | + T sigma_sq = 2 * sigma * sigma; | |
125 | + T a = 1.0 / (sigma * sqrt(2 * pi)); | |
136 | 126 | |
137 | 127 | //for each element of the kernel |
138 | 128 | for(int ki = -k_size; ki <= k_size; ki++){ |
139 | 129 | |
140 | 130 | //calculate the gaussian value |
141 | - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | |
142 | - | |
143 | - | |
144 | - | |
131 | + G = a * exp(-(ki*ki) / sigma_sq); | |
145 | 132 | //calculate the global coordinates for this point in the kernel |
146 | - gx = xi; | |
147 | - gy = yi; | |
148 | - gz = zi + ki; | |
149 | - | |
150 | - //make sure we are inside the bounds of the image | |
151 | - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){ | |
152 | - gi = gz * x * y + gy * x + gx; | |
153 | - //perform the integration | |
154 | - sum += G * in[gi]; | |
155 | - } | |
156 | - | |
133 | + gz = (zi + ki) % z; | |
134 | + gi = gz * x * y + yi * x + xi; | |
135 | + sum += G * in[gi]; | |
157 | 136 | } |
158 | 137 | |
159 | 138 | //output the result to global memory |
... | ... | @@ -161,15 +140,15 @@ |
161 | 140 | } |
162 | 141 | |
163 | 142 | template<typename T> |
164 | - void gpu_gaussian_blur3(T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){ | |
143 | + void gpu_gaussian_blur3(T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){ | |
165 | 144 | |
166 | 145 | //get the number of pixels in the image |
167 | 146 | unsigned int pixels = x * y * z; |
168 | 147 | unsigned int bytes = sizeof(T) * pixels; |
169 | 148 | |
170 | 149 | int max_threads = stim::maxThreadsPerBlock(); |
171 | - dim3 threads(512, 1, 1); | |
172 | - dim3 blocks(x / threads.x + 1, y, z); | |
150 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | |
151 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | |
173 | 152 | |
174 | 153 | //allocate temporary space on the GPU |
175 | 154 | T* gpuIb_x; |
... | ... | @@ -179,26 +158,25 @@ |
179 | 158 | T* gpuIb_y; |
180 | 159 | cudaMalloc(&gpuIb_y, bytes); |
181 | 160 | |
182 | - // blur the original image in the x direction | |
183 | - blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma, x, y, z); | |
161 | + // blur the original image along the x direction | |
162 | + blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma[0], x, y, z); | |
184 | 163 | |
185 | - // blur the original image in the y direction | |
186 | - blur_y<float> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma, x, y, z); | |
164 | + // blur the x-blurred image along the y direction | |
165 | + blur_y<T> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma[1], x, y, z); | |
187 | 166 | |
188 | - // blur the original image in the z direction | |
189 | - blur_z<float> <<< blocks, threads >>>(image, gpuIb_y, sigma, x, y, z); | |
167 | + // blur the xy-blurred image along the z direction | |
168 | + blur_z<T> <<< blocks, threads >>>(image, gpuIb_y, sigma[2], x, y, z); | |
190 | 169 | |
191 | 170 | //cudaMemcpy(image, gpuIb_y, bytes, cudaMemcpyDeviceToDevice); |
192 | 171 | |
193 | 172 | cudaFree(gpuIb_x); |
194 | 173 | cudaFree(gpuIb_y); |
195 | - | |
196 | 174 | } |
197 | 175 | |
198 | 176 | |
199 | 177 | /// Applies a Gaussian blur to a 2D image stored on the CPU |
200 | 178 | template<typename T> |
201 | - void cpu_gaussian_blur3(T* blur, T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){ | |
179 | + void cpu_gaussian_blur3(T* blur, T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){ | |
202 | 180 | |
203 | 181 | |
204 | 182 | //get the number of pixels in the image | ... | ... |
cpp/gradient3.cuh
... | ... | @@ -7,23 +7,30 @@ |
7 | 7 | #include <stim/cuda/cudatools/error.h> |
8 | 8 | |
9 | 9 | template<typename T> |
10 | -__global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigned int z){ | |
10 | +__global__ void gradient3(T* out, T* in, int x, int y, int z){ | |
11 | 11 | |
12 | 12 | //calculate x,y,z coordinates for this thread |
13 | 13 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
14 | - int yi = blockIdx.y * blockDim.y + threadIdx.y; | |
15 | - int zi = blockIdx.z * blockDim.z + threadIdx.z; | |
14 | + //find the grid size along y | |
15 | + int grid_y = y / blockDim.y; | |
16 | + int blockidx_y = blockIdx.y % grid_y; | |
17 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | |
18 | + int zi = blockIdx.y / grid_y; | |
16 | 19 | int i = zi * x * y + yi * x + xi; |
17 | 20 | |
18 | 21 | //return if the pixel is outside of the image |
19 | - if(xi >= x || yi >= y || zi>=z) | |
20 | - return; | |
22 | + if(xi >= x || yi >= y || zi>=z) return; | |
21 | 23 | |
22 | 24 | //calculate indices for the forward difference |
23 | 25 | int i_xp = zi * x * y + yi * x + (xi + 1); |
24 | 26 | int i_yp = zi * x * y + (yi + 1) * x + xi; |
25 | 27 | int i_zp = (zi + 1) * x * y + yi * x + xi; |
26 | 28 | |
29 | + //calculate indices for the backward difference | |
30 | + int i_xn = zi * x * y + yi * x + (xi - 1); | |
31 | + int i_yn = zi * x * y + (yi - 1) * x + xi; | |
32 | + int i_zn = (zi - 1) * x * y + yi * x + xi; | |
33 | + | |
27 | 34 | //use forward differences if a coordinate is zero |
28 | 35 | if(xi == 0) |
29 | 36 | out[i * 3 + 0] = in[i_xp] - in[i]; |
... | ... | @@ -32,11 +39,6 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne |
32 | 39 | if (zi==0) |
33 | 40 | out[i * 3 + 2] = in[i_zp] - in[i]; |
34 | 41 | |
35 | - //calculate indices for the backward difference | |
36 | - int i_xn = zi * x * y + yi * x + (xi - 1); | |
37 | - int i_yn = zi * x * y + (yi - 1) * x + xi; | |
38 | - int i_zn = (zi - 1) * x * y + yi * x + xi; | |
39 | - | |
40 | 42 | //use backward differences if the coordinate is at the maximum edge |
41 | 43 | if(xi == x-1) |
42 | 44 | out[i * 3 + 0] = in[i] - in[i_xn]; |
... | ... | @@ -48,11 +50,10 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne |
48 | 50 | //otherwise use central differences |
49 | 51 | if(xi > 0 && xi < x-1) |
50 | 52 | out[i * 3 + 0] = (in[i_xp] - in[i_xn]) / 2; |
51 | - | |
52 | 53 | if(yi > 0 && yi < y-1) |
53 | 54 | out[i * 3 + 1] = (in[i_yp] - in[i_yn]) / 2; |
54 | 55 | if(zi > 0 && zi < z-1) |
55 | - out[i * 3 + 1] = (in[i_zp] - in[i_zn]) / 2; | |
56 | + out[i * 3 + 2] = (in[i_zp] - in[i_zn]) / 2; | |
56 | 57 | |
57 | 58 | } |
58 | 59 | |
... | ... | @@ -60,12 +61,10 @@ template<typename T> |
60 | 61 | |
61 | 62 | void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned int z){ |
62 | 63 | |
63 | - //get the number of pixels in the image | |
64 | - unsigned int pixels = x * y * z; | |
65 | - | |
64 | + | |
66 | 65 | int max_threads = stim::maxThreadsPerBlock(); |
67 | - dim3 threads(512, 1, 1); | |
68 | - dim3 blocks(x / threads.x + 1, y, z); | |
66 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | |
67 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | |
69 | 68 | |
70 | 69 | //call the GPU kernel to determine the gradient |
71 | 70 | gradient3<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y, z); | ... | ... |
cpp/main.cpp
1 | 1 | #include <iostream> |
2 | 2 | #include <fstream> |
3 | -//#include<stdio.h> | |
4 | 3 | #include <cuda_runtime.h> |
5 | 4 | #include <stim/math/vector.h> |
6 | 5 | #include <stim/parser/arguments.h> |
... | ... | @@ -8,85 +7,126 @@ |
8 | 7 | #include <stim/grids/image_stack.h> |
9 | 8 | #include <stim/grids/grid.h> |
10 | 9 | #include <stim/visualization/colormap.h> |
11 | - | |
10 | +#include <stim/image/image.h> | |
12 | 11 | #define pi 3.14159 |
13 | 12 | |
14 | 13 | |
15 | -void ivote3(float* out, float* img, float std, unsigned int x, unsigned int y, unsigned int z); | |
16 | - | |
17 | -int main(){ | |
18 | - | |
19 | - float sigma =3; | |
20 | - | |
21 | - std::string filename = "nissl-float-512.512.512.vol"; | |
22 | - unsigned int x = 512; | |
23 | - unsigned int y = 512; | |
24 | - unsigned int z = 512; | |
14 | +void ivote3(float* center, float* img, float std[], float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], | |
15 | + unsigned int x, unsigned int y, unsigned int z); | |
16 | + | |
17 | +void invert_data(float* cpuI, unsigned int x, unsigned int y, unsigned int z){ | |
18 | + for(int ix = 0; ix < x; ix++){ | |
19 | + for (int iy = 0; iy < y; iy++){ | |
20 | + for (int iz = 0; iz < z; iz++){ | |
21 | + int idx = iz * x * y + iy * x + ix; | |
22 | + cpuI[idx] = 255 - cpuI[idx]; | |
23 | + } | |
24 | + } | |
25 | + } | |
26 | + } | |
27 | + | |
28 | +int main(int argc, char** argv){ | |
29 | + | |
30 | + //output advertisement | |
31 | + std::cout<<std::endl<<std::endl; | |
32 | + std::cout<<"========================================================================="<<std::endl; | |
33 | + std::cout<<"Thank you for using the ivote3 segmentation tool!"<<std::endl; | |
34 | + std::cout<<"Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston"<<std::endl; | |
35 | + std::cout<<"Developers: Laila Saadatifard and David Mayerich"<<std::endl; | |
36 | + std::cout<<"Source: https://git.stim.ee.uh.edu/segmentation/ivote3"<<std::endl; | |
37 | + std::cout<<"========================================================================="<<std::endl<<std::endl; | |
38 | + | |
39 | + stim::arglist args; | |
40 | + | |
41 | +#ifdef _WIN32 | |
42 | + args.set_ansi(false); | |
43 | +#endif | |
44 | + | |
45 | + //add arduments | |
46 | + args.add("help", "prints this help"); | |
47 | + args.add("x", "size of the dataset along X axis", "positive value"); | |
48 | + args.add("y", "size of the dataset along Y axis", "positive value"); | |
49 | + args.add("z", "size of the dataset along Z axis", "positive value"); | |
50 | + args.add("t", "threshold value for the final result", "positive valu"); | |
51 | + //parse the command line arguments. | |
52 | + args.parse(argc, argv); | |
53 | + | |
54 | + //display the help text if requested | |
55 | + if(args["help"].is_set()){ | |
56 | + std::cout<<std::endl<<"usage: ivote input_image output_list --option [A B C ...]"<<std::endl; | |
57 | + std::cout<<std::endl<<std::endl | |
58 | + << "examples: ivote blue.bmp list.txt "<<std::endl; | |
59 | + | |
60 | + std::cout<<std::endl<<std::endl; | |
61 | + std::cout<<args.str()<<std::endl; | |
62 | + exit(1); | |
63 | + } | |
64 | + | |
65 | + //if the input and output files aren't specified, throw an error and exit | |
66 | + if(args.nargs() < 2){ | |
67 | + std::cout<<"ERROR: two files must be specified for segmentation, enter ivote --help for options."<<std::endl<<std::endl; | |
68 | + exit(1); | |
69 | + } | |
70 | + | |
71 | + //get the input image file | |
72 | + stim::filename Ifilename(args.arg(0)); | |
73 | + | |
74 | + //get the output file name | |
75 | + stim::filename OutName(args.arg(1)); | |
76 | + | |
77 | + //set the x, y, z. | |
78 | + int x = args["x"].as_int(); | |
79 | + int y = args["y"].as_int(); | |
80 | + int z = args["z"].as_int(); | |
81 | + | |
82 | + //set the threshold. | |
83 | + float t = args["t"].as_float(); | |
84 | + | |
85 | + unsigned int r[3] = { 9, 9, 5}; | |
86 | + float sigma[3] = { 3, 3, 1.5}; | |
87 | + unsigned int conn[3] = { 5, 5, 3}; | |
88 | + float phi_deg = 20.1; | |
89 | + float phi = phi_deg * pi /180; | |
90 | + int iter = 2; | |
91 | + float d_phi = phi/(iter -1); | |
92 | + | |
93 | + std::string filename = Ifilename.str(); | |
25 | 94 | unsigned int bytes = x*y*z*sizeof(float); |
26 | 95 | |
96 | + //allocate space on the cpu for the input data | |
27 | 97 | float* cpuI = (float*) malloc(bytes); |
28 | 98 | |
99 | + //load the input file into the cpuI | |
29 | 100 | std::ifstream nissl(filename, std::ios::in | std::ios::binary); |
30 | 101 | nissl.read((char*)cpuI, bytes); |
31 | 102 | nissl.close(); |
32 | 103 | |
33 | - | |
34 | - std::ofstream original("original.vol", std::ofstream::out | std::ofstream::binary); | |
104 | + invert_data(cpuI, x, y, z); | |
105 | + | |
106 | + //write a new file from the cpuI. | |
107 | + std::ofstream original("output/0-original_invert.vol", std::ofstream::out | std::ofstream::binary); | |
35 | 108 | original.write((char*)cpuI, bytes); |
36 | 109 | original.close(); |
37 | - | |
38 | - float* cpu_grad = (float*) malloc(bytes*3); | |
39 | - | |
40 | - ivote3(cpu_grad, cpuI, sigma, x, y, z); | |
41 | - | |
42 | - std::ofstream blur("blur.vol", std::ofstream::out | std::ofstream::binary); | |
43 | - blur.write((char*)cpuI, bytes); | |
44 | - blur.close(); | |
45 | - | |
46 | - | |
47 | 110 | |
48 | - | |
49 | - // load the imagestack from the given directory | |
50 | - //std::string file_mask = "D:\\source\\ivote3\\data\\nissl\\*.png"; | |
51 | - /*stim::image_stack<float> I; | |
52 | - unsigned int x =512; | |
53 | - unsigned int y =512; | |
54 | - unsigned int z =512; | |
55 | - unsigned int bytes = x*y*z* sizeof(float); | |
111 | + //allocate space on the cpu for the output result | |
112 | + float* cpu_out = (float*) malloc(bytes); | |
56 | 113 | |
57 | - I.read("nissl-float-512.512.512.vol", 512, 512, 512, 1, 0); | |
58 | - //float test = I.get(128, 128, 128); | |
59 | - //std::cout<< test<<std::endl; | |
60 | - float* ptr0 = I.data(); | |
61 | - | |
62 | - //stim::cpu2image<float>(I.data(), "after_loading.bmp", 512, 512); | |
63 | - | |
64 | - float* cpuI = (float*) malloc(bytes); | |
114 | + // call the ivote function | |
115 | + ivote3(cpu_out, cpuI, sigma, phi, d_phi, r, iter, t, conn, x, y, z); | |
65 | 116 | |
66 | - memcpy (cpuI, ptr0, bytes); | |
67 | - | |
68 | - | |
69 | - std::ofstream infile ("original-2.vol"); | |
70 | - infile.write((char*)cpuI, bytes); | |
71 | - infile.close();*/ | |
72 | - //stim::cpu2image(cpuI, "00-cpuI.bmp", 512, 1024); | |
117 | + //write the blurred file from the cpuI. | |
118 | + std::ofstream fblur("output/test0.vol", std::ofstream::out | std::ofstream::binary); | |
119 | + fblur.write((char*)cpuI, bytes); | |
120 | + fblur.close(); | |
121 | + | |
122 | + //write the output file. | |
123 | + std::ofstream fo("output/" + OutName.str(), std::ofstream::out | std::ofstream::binary); | |
124 | + fo.write((char*)cpu_out, bytes); | |
125 | + fo.close(); | |
73 | 126 | |
74 | 127 | |
75 | - //std::fstream file2; | |
76 | - //file2.write((char*)cpuI, 512*512*512*sizeof(unsigned char)); | |
77 | - //I.write("data\\volume1.raw"); | |
78 | - //test = ptr0[128*128*128]; | |
79 | - //std::cout<< test<<std::endl; | |
80 | 128 | |
81 | - //memset(I.data(), 0, 512*512*512); | |
82 | - //I.write("data\\volume.raw"); | |
83 | - //I.save_images("data\\????.bmp"); | |
129 | + cudaDeviceReset(); | |
84 | 130 | |
85 | - | |
86 | - | |
87 | - | |
88 | - | |
89 | - | |
90 | - cudaDeviceReset(); | |
91 | 131 | } |
92 | 132 | ... | ... |