Commit f12505fb9c8dd14e19310250c06f1665f111a21a

Authored by Laila Saadatifard
1 parent c9aba18a

upload the ivote code that works well on the small size of image ( not working f…

…or 512 cube) and float precision
cpp/CMakeLists.txt
@@ -46,4 +46,5 @@ target_link_libraries(ivote3 @@ -46,4 +46,5 @@ target_link_libraries(ivote3
46 ) 46 )
47 47
48 #copy an image test case 48 #copy an image test case
49 -configure_file(data/nissl/nissl-rat.vol nissl-rat.vol COPYONLY) 49 +configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY)
  50 +configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY)
1 #include "gaussian_blur3.cuh" 1 #include "gaussian_blur3.cuh"
2 #include "gradient3.cuh" 2 #include "gradient3.cuh"
  3 +#include "mag3.cuh"
  4 +#include "vote3.cuh"
  5 +#include "update_dir3.cuh"
  6 +#include "local_max3.cuh"
3 7
4 8
5 -void ivote3(float* out, float* img, float sigma, unsigned int x, unsigned int y, unsigned int z){  
6 - //cpu_gaussian_blur_conv3<float>(Ib, img, rhs, x, y, z);  
7 - 9 +void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[],
  10 + int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
  11 +
  12 +
  13 + // compute the number of bytes in the input data
8 unsigned int bytes = x * y * z * sizeof(float); 14 unsigned int bytes = x * y * z * sizeof(float);
  15 +
  16 + //assign memory on gpu for the input data.
9 float* gpuI0; 17 float* gpuI0;
10 - cudaMalloc(&gpuI0, bytes);  
11 - //copy the image data to the GPU 18 + cudaMalloc(&gpuI0, bytes);
  19 +
  20 + //copy the image data to the GPU.
12 cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); 21 cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice);
13 22
  23 + //call the blurring function from the gpu.
14 gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); 24 gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z);
15 25
  26 + cudaDeviceSynchronize();
  27 +
  28 + //copy the blur data back to the cpu
  29 + //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost);
  30 +
  31 + //assign memory on the gpu for the gradient along the X, y, z.
16 float* gpu_grad; 32 float* gpu_grad;
17 cudaMalloc(&gpu_grad, bytes*3); 33 cudaMalloc(&gpu_grad, bytes*3);
18 34
19 - gpu_gradient3(gpu_grad, gpuI0, x, y, z); 35 + //call the gradient function from the gpu.
  36 + gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z);
  37 + cudaFree(gpuI0);
  38 +
  39 + //assign memory on the gpu for the gradient magnitude
  40 + float* gpu_mag;
  41 + cudaMalloc(&gpu_mag, bytes);
  42 +
  43 + //call the magnitude function
  44 + gpu_mag3<float>(gpu_mag, gpu_grad, x, y, z);
  45 + //cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost);
  46 + //assign memory on the gpu for the vote.
  47 + float* gpu_vote;
  48 + cudaMalloc(&gpu_vote, bytes);
20 49
21 - //copy the image data back to the cpu  
22 - cudaMemcpy(img, gpuI0, x*y*z*sizeof(float), cudaMemcpyDeviceToHost);  
23 - cudaMemcpy(out, gpu_grad, bytes*3, cudaMemcpyDeviceToHost); 50 + float cos_phi = cos(phi);
  51 +
  52 + //call the vote function.
  53 + for (int i = 0; i < iter; i++){
  54 +
  55 + gpu_vote3<float>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z);
  56 + cudaDeviceSynchronize();
  57 + if (i==0)
  58 + cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);
  59 +
  60 + if (phi >= d_phi){
  61 + gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z);
  62 + cudaDeviceSynchronize();
  63 + phi = phi - d_phi;
  64 + cos_phi = cos(phi);
  65 + }
  66 +
  67 + }
24 68
25 - cudaFree(gpuI0);  
26 cudaFree(gpu_grad); 69 cudaFree(gpu_grad);
  70 + cudaFree(gpu_mag);
  71 + //cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);
  72 +
  73 + //allocate space on the gpu for the final detected cells.
  74 + float* gpu_output;
  75 + cudaMalloc(&gpu_output, bytes);
  76 +
  77 + //call the local max function
  78 + gpu_local_max3<float>(gpu_output, gpu_vote, t, conn, x, y, z);
  79 +
  80 + //copy the final result to the cpu.
  81 + cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost);
  82 +
  83 +
  84 + cudaFree(gpu_vote);
  85 + cudaFree(gpu_output);
  86 +
27 } 87 }
28 \ No newline at end of file 88 \ No newline at end of file
cpp/gaussian_blur3.cuh
@@ -6,6 +6,7 @@ @@ -6,6 +6,7 @@
6 #include <stim/cuda/cudatools.h> 6 #include <stim/cuda/cudatools.h>
7 #include <stim/cuda/sharedmem.cuh> 7 #include <stim/cuda/sharedmem.cuh>
8 #include <stim/cuda/cudatools/error.h> 8 #include <stim/cuda/cudatools/error.h>
  9 +#include "cuda_fp16.h"
9 10
10 #define pi 3.14159 11 #define pi 3.14159
11 12
@@ -13,14 +14,16 @@ @@ -13,14 +14,16 @@
13 template<typename T> 14 template<typename T>
14 __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){ 15 __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
15 16
16 -  
17 //calculate x,y,z coordinates for this thread 17 //calculate x,y,z coordinates for this thread
18 int xi = blockIdx.x * blockDim.x + threadIdx.x; 18 int xi = blockIdx.x * blockDim.x + threadIdx.x;
19 - int yi = blockIdx.y * blockDim.y + threadIdx.y;  
20 - int zi = blockIdx.z * blockDim.z + threadIdx.z; 19 + //find the grid size along y
  20 + int grid_y = y / blockDim.y;
  21 + int blockidx_y = blockIdx.y % grid_y;
  22 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  23 + int zi = blockIdx.y / grid_y;
21 int i = zi * x * y + yi * x + xi; 24 int i = zi * x * y + yi * x + xi;
22 25
23 - // calculate the kernel size 26 + // calculate the kernel size
24 int k_size = sigma * 4; 27 int k_size = sigma * 4;
25 28
26 //if the current pixel is outside of the image 29 //if the current pixel is outside of the image
@@ -28,37 +31,26 @@ @@ -28,37 +31,26 @@
28 return; 31 return;
29 32
30 33
31 - int gx, gy, gz, gi; 34 + int gx, gi;
32 T G; 35 T G;
33 T sum = 0; //running weighted sum across the kernel 36 T sum = 0; //running weighted sum across the kernel
34 out[i] = 0; 37 out[i] = 0;
35 - 38 + T sigma_sq = 2 * sigma * sigma;
  39 + T a = 1.0 / (sigma * sqrt(2 * pi));
36 40
37 //for each element of the kernel 41 //for each element of the kernel
38 for(int ki = -k_size; ki <= k_size; ki++){ 42 for(int ki = -k_size; ki <= k_size; ki++){
39 43
40 //calculate the gaussian value 44 //calculate the gaussian value
41 - G = (1.0 / (sigma * sqrt(2 * pi))) * exp(-(ki*ki) / (2*sigma*sigma));  
42 -  
43 -  
44 - 45 + G = a * exp(-(ki*ki) / (sigma_sq));
45 //calculate the global coordinates for this point in the kernel 46 //calculate the global coordinates for this point in the kernel
46 - gx = xi + ki;  
47 - gy = yi;  
48 - gz = zi;  
49 -  
50 - //make sure we are inside the bounds of the image  
51 - if(gx >= 0 && gx < x ){  
52 - gi = gz * x * y + gy * x + gx;  
53 - //perform the integration  
54 - sum += G * in[gi];  
55 - }  
56 - 47 + gx = (xi + ki) % x;
  48 + gi = zi * x * y + yi * x + gx;
  49 + sum += G * in[gi];
57 } 50 }
58 51
59 //output the result to global memory 52 //output the result to global memory
60 out[i] = sum; 53 out[i] = sum;
61 -  
62 } 54 }
63 55
64 56
@@ -67,11 +59,14 @@ @@ -67,11 +59,14 @@
67 59
68 //calculate x,y,z coordinates for this thread 60 //calculate x,y,z coordinates for this thread
69 int xi = blockIdx.x * blockDim.x + threadIdx.x; 61 int xi = blockIdx.x * blockDim.x + threadIdx.x;
70 - int yi = blockIdx.y * blockDim.y + threadIdx.y;  
71 - int zi = blockIdx.z * blockDim.z + threadIdx.z; 62 + //find the grid size along y
  63 + int grid_y = y / blockDim.y;
  64 + int blockidx_y = blockIdx.y % grid_y;
  65 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  66 + int zi = blockIdx.y / grid_y;
72 int i = zi * x * y + yi * x + xi; 67 int i = zi * x * y + yi * x + xi;
73 -  
74 - // calculate the kernel size 68 +
  69 + // calculate the kernel size
75 int k_size = sigma * 4; 70 int k_size = sigma * 4;
76 71
77 //if the current pixel is outside of the image 72 //if the current pixel is outside of the image
@@ -79,34 +74,25 @@ @@ -79,34 +74,25 @@
79 return; 74 return;
80 75
81 76
82 - int gx, gy, gz, gi; 77 + int gy, gi;
83 T G; 78 T G;
84 T sum = 0; //running weighted sum across the kernel 79 T sum = 0; //running weighted sum across the kernel
85 out[i] = 0; 80 out[i] = 0;
86 - 81 + T sigma_sq = 2 * sigma * sigma;
  82 + T a = 1.0 / (sigma * sqrt(2 * pi));
87 83
88 //for each element of the kernel 84 //for each element of the kernel
89 for(int ki = -k_size; ki <= k_size; ki++){ 85 for(int ki = -k_size; ki <= k_size; ki++){
90 86
91 //calculate the gaussian value 87 //calculate the gaussian value
92 - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma));  
93 -  
94 -  
95 - 88 + G = a * exp(-(ki*ki) / sigma_sq);
96 //calculate the global coordinates for this point in the kernel 89 //calculate the global coordinates for this point in the kernel
97 - gx = xi;  
98 - gy = yi + ki;  
99 - gz = zi;  
100 -  
101 - //make sure we are inside the bounds of the image  
102 - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){  
103 - gi = gz * x * y + gy * x + gx;  
104 - //perform the integration  
105 - sum += G * in[gi];  
106 - }  
107 - 90 + gy = (yi + ki ) % y;
  91 + gi = zi * x * y + gy * x + xi;
  92 + sum += G * in[gi];
108 } 93 }
109 94
  95 +
110 //output the result to global memory 96 //output the result to global memory
111 out[i] = sum; 97 out[i] = sum;
112 } 98 }
@@ -116,11 +102,14 @@ @@ -116,11 +102,14 @@
116 102
117 //calculate x,y,z coordinates for this thread 103 //calculate x,y,z coordinates for this thread
118 int xi = blockIdx.x * blockDim.x + threadIdx.x; 104 int xi = blockIdx.x * blockDim.x + threadIdx.x;
119 - int yi = blockIdx.y * blockDim.y + threadIdx.y;  
120 - int zi = blockIdx.z * blockDim.z + threadIdx.z; 105 + //find the grid size along y
  106 + int grid_y = y / blockDim.y;
  107 + int blockidx_y = blockIdx.y % grid_y;
  108 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  109 + int zi = blockIdx.y / grid_y;
121 int i = zi * x * y + yi * x + xi; 110 int i = zi * x * y + yi * x + xi;
122 -  
123 - // calculate the kernel size 111 +
  112 + // calculate the kernel size
124 int k_size = sigma * 4; 113 int k_size = sigma * 4;
125 114
126 //if the current pixel is outside of the image 115 //if the current pixel is outside of the image
@@ -128,32 +117,22 @@ @@ -128,32 +117,22 @@
128 return; 117 return;
129 118
130 119
131 - int gx, gy, gz, gi; 120 + int gz, gi;
132 T G; 121 T G;
133 T sum = 0; //running weighted sum across the kernel 122 T sum = 0; //running weighted sum across the kernel
134 out[i] = 0; 123 out[i] = 0;
135 - 124 + T sigma_sq = 2 * sigma * sigma;
  125 + T a = 1.0 / (sigma * sqrt(2 * pi));
136 126
137 //for each element of the kernel 127 //for each element of the kernel
138 for(int ki = -k_size; ki <= k_size; ki++){ 128 for(int ki = -k_size; ki <= k_size; ki++){
139 129
140 //calculate the gaussian value 130 //calculate the gaussian value
141 - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma));  
142 -  
143 -  
144 - 131 + G = a * exp(-(ki*ki) / sigma_sq);
145 //calculate the global coordinates for this point in the kernel 132 //calculate the global coordinates for this point in the kernel
146 - gx = xi;  
147 - gy = yi;  
148 - gz = zi + ki;  
149 -  
150 - //make sure we are inside the bounds of the image  
151 - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){  
152 - gi = gz * x * y + gy * x + gx;  
153 - //perform the integration  
154 - sum += G * in[gi];  
155 - }  
156 - 133 + gz = (zi + ki) % z;
  134 + gi = gz * x * y + yi * x + xi;
  135 + sum += G * in[gi];
157 } 136 }
158 137
159 //output the result to global memory 138 //output the result to global memory
@@ -161,15 +140,15 @@ @@ -161,15 +140,15 @@
161 } 140 }
162 141
163 template<typename T> 142 template<typename T>
164 - void gpu_gaussian_blur3(T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){ 143 + void gpu_gaussian_blur3(T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){
165 144
166 //get the number of pixels in the image 145 //get the number of pixels in the image
167 unsigned int pixels = x * y * z; 146 unsigned int pixels = x * y * z;
168 unsigned int bytes = sizeof(T) * pixels; 147 unsigned int bytes = sizeof(T) * pixels;
169 148
170 int max_threads = stim::maxThreadsPerBlock(); 149 int max_threads = stim::maxThreadsPerBlock();
171 - dim3 threads(512, 1, 1);  
172 - dim3 blocks(x / threads.x + 1, y, z); 150 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  151 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
173 152
174 //allocate temporary space on the GPU 153 //allocate temporary space on the GPU
175 T* gpuIb_x; 154 T* gpuIb_x;
@@ -179,26 +158,25 @@ @@ -179,26 +158,25 @@
179 T* gpuIb_y; 158 T* gpuIb_y;
180 cudaMalloc(&gpuIb_y, bytes); 159 cudaMalloc(&gpuIb_y, bytes);
181 160
182 - // blur the original image in the x direction  
183 - blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma, x, y, z); 161 + // blur the original image along the x direction
  162 + blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma[0], x, y, z);
184 163
185 - // blur the original image in the y direction  
186 - blur_y<float> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma, x, y, z); 164 + // blur the x-blurred image along the y direction
  165 + blur_y<T> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma[1], x, y, z);
187 166
188 - // blur the original image in the z direction  
189 - blur_z<float> <<< blocks, threads >>>(image, gpuIb_y, sigma, x, y, z); 167 + // blur the xy-blurred image along the z direction
  168 + blur_z<T> <<< blocks, threads >>>(image, gpuIb_y, sigma[2], x, y, z);
190 169
191 //cudaMemcpy(image, gpuIb_y, bytes, cudaMemcpyDeviceToDevice); 170 //cudaMemcpy(image, gpuIb_y, bytes, cudaMemcpyDeviceToDevice);
192 171
193 cudaFree(gpuIb_x); 172 cudaFree(gpuIb_x);
194 cudaFree(gpuIb_y); 173 cudaFree(gpuIb_y);
195 -  
196 } 174 }
197 175
198 176
199 /// Applies a Gaussian blur to a 2D image stored on the CPU 177 /// Applies a Gaussian blur to a 2D image stored on the CPU
200 template<typename T> 178 template<typename T>
201 - void cpu_gaussian_blur3(T* blur, T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){ 179 + void cpu_gaussian_blur3(T* blur, T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){
202 180
203 181
204 //get the number of pixels in the image 182 //get the number of pixels in the image
@@ -7,23 +7,30 @@ @@ -7,23 +7,30 @@
7 #include <stim/cuda/cudatools/error.h> 7 #include <stim/cuda/cudatools/error.h>
8 8
9 template<typename T> 9 template<typename T>
10 -__global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigned int z){ 10 +__global__ void gradient3(T* out, T* in, int x, int y, int z){
11 11
12 //calculate x,y,z coordinates for this thread 12 //calculate x,y,z coordinates for this thread
13 int xi = blockIdx.x * blockDim.x + threadIdx.x; 13 int xi = blockIdx.x * blockDim.x + threadIdx.x;
14 - int yi = blockIdx.y * blockDim.y + threadIdx.y;  
15 - int zi = blockIdx.z * blockDim.z + threadIdx.z; 14 + //find the grid size along y
  15 + int grid_y = y / blockDim.y;
  16 + int blockidx_y = blockIdx.y % grid_y;
  17 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  18 + int zi = blockIdx.y / grid_y;
16 int i = zi * x * y + yi * x + xi; 19 int i = zi * x * y + yi * x + xi;
17 20
18 //return if the pixel is outside of the image 21 //return if the pixel is outside of the image
19 - if(xi >= x || yi >= y || zi>=z)  
20 - return; 22 + if(xi >= x || yi >= y || zi>=z) return;
21 23
22 //calculate indices for the forward difference 24 //calculate indices for the forward difference
23 int i_xp = zi * x * y + yi * x + (xi + 1); 25 int i_xp = zi * x * y + yi * x + (xi + 1);
24 int i_yp = zi * x * y + (yi + 1) * x + xi; 26 int i_yp = zi * x * y + (yi + 1) * x + xi;
25 int i_zp = (zi + 1) * x * y + yi * x + xi; 27 int i_zp = (zi + 1) * x * y + yi * x + xi;
26 28
  29 + //calculate indices for the backward difference
  30 + int i_xn = zi * x * y + yi * x + (xi - 1);
  31 + int i_yn = zi * x * y + (yi - 1) * x + xi;
  32 + int i_zn = (zi - 1) * x * y + yi * x + xi;
  33 +
27 //use forward differences if a coordinate is zero 34 //use forward differences if a coordinate is zero
28 if(xi == 0) 35 if(xi == 0)
29 out[i * 3 + 0] = in[i_xp] - in[i]; 36 out[i * 3 + 0] = in[i_xp] - in[i];
@@ -32,11 +39,6 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne @@ -32,11 +39,6 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne
32 if (zi==0) 39 if (zi==0)
33 out[i * 3 + 2] = in[i_zp] - in[i]; 40 out[i * 3 + 2] = in[i_zp] - in[i];
34 41
35 - //calculate indices for the backward difference  
36 - int i_xn = zi * x * y + yi * x + (xi - 1);  
37 - int i_yn = zi * x * y + (yi - 1) * x + xi;  
38 - int i_zn = (zi - 1) * x * y + yi * x + xi;  
39 -  
40 //use backward differences if the coordinate is at the maximum edge 42 //use backward differences if the coordinate is at the maximum edge
41 if(xi == x-1) 43 if(xi == x-1)
42 out[i * 3 + 0] = in[i] - in[i_xn]; 44 out[i * 3 + 0] = in[i] - in[i_xn];
@@ -48,11 +50,10 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne @@ -48,11 +50,10 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne
48 //otherwise use central differences 50 //otherwise use central differences
49 if(xi > 0 && xi < x-1) 51 if(xi > 0 && xi < x-1)
50 out[i * 3 + 0] = (in[i_xp] - in[i_xn]) / 2; 52 out[i * 3 + 0] = (in[i_xp] - in[i_xn]) / 2;
51 -  
52 if(yi > 0 && yi < y-1) 53 if(yi > 0 && yi < y-1)
53 out[i * 3 + 1] = (in[i_yp] - in[i_yn]) / 2; 54 out[i * 3 + 1] = (in[i_yp] - in[i_yn]) / 2;
54 if(zi > 0 && zi < z-1) 55 if(zi > 0 && zi < z-1)
55 - out[i * 3 + 1] = (in[i_zp] - in[i_zn]) / 2; 56 + out[i * 3 + 2] = (in[i_zp] - in[i_zn]) / 2;
56 57
57 } 58 }
58 59
@@ -60,12 +61,10 @@ template&lt;typename T&gt; @@ -60,12 +61,10 @@ template&lt;typename T&gt;
60 61
61 void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned int z){ 62 void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned int z){
62 63
63 - //get the number of pixels in the image  
64 - unsigned int pixels = x * y * z;  
65 - 64 +
66 int max_threads = stim::maxThreadsPerBlock(); 65 int max_threads = stim::maxThreadsPerBlock();
67 - dim3 threads(512, 1, 1);  
68 - dim3 blocks(x / threads.x + 1, y, z); 66 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  67 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
69 68
70 //call the GPU kernel to determine the gradient 69 //call the GPU kernel to determine the gradient
71 gradient3<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y, z); 70 gradient3<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y, z);
1 #include <iostream> 1 #include <iostream>
2 #include <fstream> 2 #include <fstream>
3 -//#include<stdio.h>  
4 #include <cuda_runtime.h> 3 #include <cuda_runtime.h>
5 #include <stim/math/vector.h> 4 #include <stim/math/vector.h>
6 #include <stim/parser/arguments.h> 5 #include <stim/parser/arguments.h>
@@ -8,85 +7,126 @@ @@ -8,85 +7,126 @@
8 #include <stim/grids/image_stack.h> 7 #include <stim/grids/image_stack.h>
9 #include <stim/grids/grid.h> 8 #include <stim/grids/grid.h>
10 #include <stim/visualization/colormap.h> 9 #include <stim/visualization/colormap.h>
11 - 10 +#include <stim/image/image.h>
12 #define pi 3.14159 11 #define pi 3.14159
13 12
14 13
15 -void ivote3(float* out, float* img, float std, unsigned int x, unsigned int y, unsigned int z);  
16 -  
17 -int main(){  
18 -  
19 - float sigma =3;  
20 -  
21 - std::string filename = "nissl-float-512.512.512.vol";  
22 - unsigned int x = 512;  
23 - unsigned int y = 512;  
24 - unsigned int z = 512; 14 +void ivote3(float* center, float* img, float std[], float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[],
  15 + unsigned int x, unsigned int y, unsigned int z);
  16 +
  17 +void invert_data(float* cpuI, unsigned int x, unsigned int y, unsigned int z){
  18 + for(int ix = 0; ix < x; ix++){
  19 + for (int iy = 0; iy < y; iy++){
  20 + for (int iz = 0; iz < z; iz++){
  21 + int idx = iz * x * y + iy * x + ix;
  22 + cpuI[idx] = 255 - cpuI[idx];
  23 + }
  24 + }
  25 + }
  26 + }
  27 +
  28 +int main(int argc, char** argv){
  29 +
  30 + //output advertisement
  31 + std::cout<<std::endl<<std::endl;
  32 + std::cout<<"========================================================================="<<std::endl;
  33 + std::cout<<"Thank you for using the ivote3 segmentation tool!"<<std::endl;
  34 + std::cout<<"Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston"<<std::endl;
  35 + std::cout<<"Developers: Laila Saadatifard and David Mayerich"<<std::endl;
  36 + std::cout<<"Source: https://git.stim.ee.uh.edu/segmentation/ivote3"<<std::endl;
  37 + std::cout<<"========================================================================="<<std::endl<<std::endl;
  38 +
  39 + stim::arglist args;
  40 +
  41 +#ifdef _WIN32
  42 + args.set_ansi(false);
  43 +#endif
  44 +
  45 + //add arduments
  46 + args.add("help", "prints this help");
  47 + args.add("x", "size of the dataset along X axis", "positive value");
  48 + args.add("y", "size of the dataset along Y axis", "positive value");
  49 + args.add("z", "size of the dataset along Z axis", "positive value");
  50 + args.add("t", "threshold value for the final result", "positive valu");
  51 + //parse the command line arguments.
  52 + args.parse(argc, argv);
  53 +
  54 + //display the help text if requested
  55 + if(args["help"].is_set()){
  56 + std::cout<<std::endl<<"usage: ivote input_image output_list --option [A B C ...]"<<std::endl;
  57 + std::cout<<std::endl<<std::endl
  58 + << "examples: ivote blue.bmp list.txt "<<std::endl;
  59 +
  60 + std::cout<<std::endl<<std::endl;
  61 + std::cout<<args.str()<<std::endl;
  62 + exit(1);
  63 + }
  64 +
  65 + //if the input and output files aren't specified, throw an error and exit
  66 + if(args.nargs() < 2){
  67 + std::cout<<"ERROR: two files must be specified for segmentation, enter ivote --help for options."<<std::endl<<std::endl;
  68 + exit(1);
  69 + }
  70 +
  71 + //get the input image file
  72 + stim::filename Ifilename(args.arg(0));
  73 +
  74 + //get the output file name
  75 + stim::filename OutName(args.arg(1));
  76 +
  77 + //set the x, y, z.
  78 + int x = args["x"].as_int();
  79 + int y = args["y"].as_int();
  80 + int z = args["z"].as_int();
  81 +
  82 + //set the threshold.
  83 + float t = args["t"].as_float();
  84 +
  85 + unsigned int r[3] = { 9, 9, 5};
  86 + float sigma[3] = { 3, 3, 1.5};
  87 + unsigned int conn[3] = { 5, 5, 3};
  88 + float phi_deg = 20.1;
  89 + float phi = phi_deg * pi /180;
  90 + int iter = 2;
  91 + float d_phi = phi/(iter -1);
  92 +
  93 + std::string filename = Ifilename.str();
25 unsigned int bytes = x*y*z*sizeof(float); 94 unsigned int bytes = x*y*z*sizeof(float);
26 95
  96 + //allocate space on the cpu for the input data
27 float* cpuI = (float*) malloc(bytes); 97 float* cpuI = (float*) malloc(bytes);
28 98
  99 + //load the input file into the cpuI
29 std::ifstream nissl(filename, std::ios::in | std::ios::binary); 100 std::ifstream nissl(filename, std::ios::in | std::ios::binary);
30 nissl.read((char*)cpuI, bytes); 101 nissl.read((char*)cpuI, bytes);
31 nissl.close(); 102 nissl.close();
32 103
33 -  
34 - std::ofstream original("original.vol", std::ofstream::out | std::ofstream::binary); 104 + invert_data(cpuI, x, y, z);
  105 +
  106 + //write a new file from the cpuI.
  107 + std::ofstream original("output/0-original_invert.vol", std::ofstream::out | std::ofstream::binary);
35 original.write((char*)cpuI, bytes); 108 original.write((char*)cpuI, bytes);
36 original.close(); 109 original.close();
37 -  
38 - float* cpu_grad = (float*) malloc(bytes*3);  
39 -  
40 - ivote3(cpu_grad, cpuI, sigma, x, y, z);  
41 -  
42 - std::ofstream blur("blur.vol", std::ofstream::out | std::ofstream::binary);  
43 - blur.write((char*)cpuI, bytes);  
44 - blur.close();  
45 -  
46 -  
47 110
48 -  
49 - // load the imagestack from the given directory  
50 - //std::string file_mask = "D:\\source\\ivote3\\data\\nissl\\*.png";  
51 - /*stim::image_stack<float> I;  
52 - unsigned int x =512;  
53 - unsigned int y =512;  
54 - unsigned int z =512;  
55 - unsigned int bytes = x*y*z* sizeof(float); 111 + //allocate space on the cpu for the output result
  112 + float* cpu_out = (float*) malloc(bytes);
56 113
57 - I.read("nissl-float-512.512.512.vol", 512, 512, 512, 1, 0);  
58 - //float test = I.get(128, 128, 128);  
59 - //std::cout<< test<<std::endl;  
60 - float* ptr0 = I.data();  
61 -  
62 - //stim::cpu2image<float>(I.data(), "after_loading.bmp", 512, 512);  
63 -  
64 - float* cpuI = (float*) malloc(bytes); 114 + // call the ivote function
  115 + ivote3(cpu_out, cpuI, sigma, phi, d_phi, r, iter, t, conn, x, y, z);
65 116
66 - memcpy (cpuI, ptr0, bytes);  
67 -  
68 -  
69 - std::ofstream infile ("original-2.vol");  
70 - infile.write((char*)cpuI, bytes);  
71 - infile.close();*/  
72 - //stim::cpu2image(cpuI, "00-cpuI.bmp", 512, 1024); 117 + //write the blurred file from the cpuI.
  118 + std::ofstream fblur("output/test0.vol", std::ofstream::out | std::ofstream::binary);
  119 + fblur.write((char*)cpuI, bytes);
  120 + fblur.close();
  121 +
  122 + //write the output file.
  123 + std::ofstream fo("output/" + OutName.str(), std::ofstream::out | std::ofstream::binary);
  124 + fo.write((char*)cpu_out, bytes);
  125 + fo.close();
73 126
74 127
75 - //std::fstream file2;  
76 - //file2.write((char*)cpuI, 512*512*512*sizeof(unsigned char));  
77 - //I.write("data\\volume1.raw");  
78 - //test = ptr0[128*128*128];  
79 - //std::cout<< test<<std::endl;  
80 128
81 - //memset(I.data(), 0, 512*512*512);  
82 - //I.write("data\\volume.raw");  
83 - //I.save_images("data\\????.bmp"); 129 + cudaDeviceReset();
84 130
85 -  
86 -  
87 -  
88 -  
89 -  
90 - cudaDeviceReset();  
91 } 131 }
92 132