Commit f12505fb9c8dd14e19310250c06f1665f111a21a

Authored by Laila Saadatifard
1 parent c9aba18a

upload the ivote code that works well on the small size of image ( not working f…

…or 512 cube) and float precision
cpp/CMakeLists.txt
... ... @@ -46,4 +46,5 @@ target_link_libraries(ivote3
46 46 )
47 47  
48 48 #copy an image test case
49   -configure_file(data/nissl/nissl-rat.vol nissl-rat.vol COPYONLY)
  49 +configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY)
  50 +configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY)
... ...
cpp/cudafunc.cu
1 1 #include "gaussian_blur3.cuh"
2 2 #include "gradient3.cuh"
  3 +#include "mag3.cuh"
  4 +#include "vote3.cuh"
  5 +#include "update_dir3.cuh"
  6 +#include "local_max3.cuh"
3 7  
4 8  
5   -void ivote3(float* out, float* img, float sigma, unsigned int x, unsigned int y, unsigned int z){
6   - //cpu_gaussian_blur_conv3<float>(Ib, img, rhs, x, y, z);
7   -
  9 +void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[],
  10 + int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
  11 +
  12 +
  13 + // compute the number of bytes in the input data
8 14 unsigned int bytes = x * y * z * sizeof(float);
  15 +
  16 + //assign memory on gpu for the input data.
9 17 float* gpuI0;
10   - cudaMalloc(&gpuI0, bytes);
11   - //copy the image data to the GPU
  18 + cudaMalloc(&gpuI0, bytes);
  19 +
  20 + //copy the image data to the GPU.
12 21 cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice);
13 22  
  23 + //call the blurring function from the gpu.
14 24 gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z);
15 25  
  26 + cudaDeviceSynchronize();
  27 +
  28 + //copy the blur data back to the cpu
  29 + //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost);
  30 +
  31 + //assign memory on the gpu for the gradient along the X, y, z.
16 32 float* gpu_grad;
17 33 cudaMalloc(&gpu_grad, bytes*3);
18 34  
19   - gpu_gradient3(gpu_grad, gpuI0, x, y, z);
  35 + //call the gradient function from the gpu.
  36 + gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z);
  37 + cudaFree(gpuI0);
  38 +
  39 + //assign memory on the gpu for the gradient magnitude
  40 + float* gpu_mag;
  41 + cudaMalloc(&gpu_mag, bytes);
  42 +
  43 + //call the magnitude function
  44 + gpu_mag3<float>(gpu_mag, gpu_grad, x, y, z);
  45 + //cudaMemcpy(img, gpu_mag, bytes, cudaMemcpyDeviceToHost);
  46 + //assign memory on the gpu for the vote.
  47 + float* gpu_vote;
  48 + cudaMalloc(&gpu_vote, bytes);
20 49  
21   - //copy the image data back to the cpu
22   - cudaMemcpy(img, gpuI0, x*y*z*sizeof(float), cudaMemcpyDeviceToHost);
23   - cudaMemcpy(out, gpu_grad, bytes*3, cudaMemcpyDeviceToHost);
  50 + float cos_phi = cos(phi);
  51 +
  52 + //call the vote function.
  53 + for (int i = 0; i < iter; i++){
  54 +
  55 + gpu_vote3<float>(gpu_vote, gpu_grad, gpu_mag, cos_phi, r, x, y, z);
  56 + cudaDeviceSynchronize();
  57 + if (i==0)
  58 + cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);
  59 +
  60 + if (phi >= d_phi){
  61 + gpu_update_dir3<float>(gpu_grad, gpu_vote, cos_phi, r, x, y, z);
  62 + cudaDeviceSynchronize();
  63 + phi = phi - d_phi;
  64 + cos_phi = cos(phi);
  65 + }
  66 +
  67 + }
24 68  
25   - cudaFree(gpuI0);
26 69 cudaFree(gpu_grad);
  70 + cudaFree(gpu_mag);
  71 + //cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);
  72 +
  73 + //allocate space on the gpu for the final detected cells.
  74 + float* gpu_output;
  75 + cudaMalloc(&gpu_output, bytes);
  76 +
  77 + //call the local max function
  78 + gpu_local_max3<float>(gpu_output, gpu_vote, t, conn, x, y, z);
  79 +
  80 + //copy the final result to the cpu.
  81 + cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost);
  82 +
  83 +
  84 + cudaFree(gpu_vote);
  85 + cudaFree(gpu_output);
  86 +
27 87 }
28 88 \ No newline at end of file
... ...
cpp/gaussian_blur3.cuh
... ... @@ -6,6 +6,7 @@
6 6 #include <stim/cuda/cudatools.h>
7 7 #include <stim/cuda/sharedmem.cuh>
8 8 #include <stim/cuda/cudatools/error.h>
  9 +#include "cuda_fp16.h"
9 10  
10 11 #define pi 3.14159
11 12  
... ... @@ -13,14 +14,16 @@
13 14 template<typename T>
14 15 __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
15 16  
16   -
17 17 //calculate x,y,z coordinates for this thread
18 18 int xi = blockIdx.x * blockDim.x + threadIdx.x;
19   - int yi = blockIdx.y * blockDim.y + threadIdx.y;
20   - int zi = blockIdx.z * blockDim.z + threadIdx.z;
  19 + //find the grid size along y
  20 + int grid_y = y / blockDim.y;
  21 + int blockidx_y = blockIdx.y % grid_y;
  22 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  23 + int zi = blockIdx.y / grid_y;
21 24 int i = zi * x * y + yi * x + xi;
22 25  
23   - // calculate the kernel size
  26 + // calculate the kernel size
24 27 int k_size = sigma * 4;
25 28  
26 29 //if the current pixel is outside of the image
... ... @@ -28,37 +31,26 @@
28 31 return;
29 32  
30 33  
31   - int gx, gy, gz, gi;
  34 + int gx, gi;
32 35 T G;
33 36 T sum = 0; //running weighted sum across the kernel
34 37 out[i] = 0;
35   -
  38 + T sigma_sq = 2 * sigma * sigma;
  39 + T a = 1.0 / (sigma * sqrt(2 * pi));
36 40  
37 41 //for each element of the kernel
38 42 for(int ki = -k_size; ki <= k_size; ki++){
39 43  
40 44 //calculate the gaussian value
41   - G = (1.0 / (sigma * sqrt(2 * pi))) * exp(-(ki*ki) / (2*sigma*sigma));
42   -
43   -
44   -
  45 + G = a * exp(-(ki*ki) / (sigma_sq));
45 46 //calculate the global coordinates for this point in the kernel
46   - gx = xi + ki;
47   - gy = yi;
48   - gz = zi;
49   -
50   - //make sure we are inside the bounds of the image
51   - if(gx >= 0 && gx < x ){
52   - gi = gz * x * y + gy * x + gx;
53   - //perform the integration
54   - sum += G * in[gi];
55   - }
56   -
  47 + gx = (xi + ki) % x;
  48 + gi = zi * x * y + yi * x + gx;
  49 + sum += G * in[gi];
57 50 }
58 51  
59 52 //output the result to global memory
60 53 out[i] = sum;
61   -
62 54 }
63 55  
64 56  
... ... @@ -67,11 +59,14 @@
67 59  
68 60 //calculate x,y,z coordinates for this thread
69 61 int xi = blockIdx.x * blockDim.x + threadIdx.x;
70   - int yi = blockIdx.y * blockDim.y + threadIdx.y;
71   - int zi = blockIdx.z * blockDim.z + threadIdx.z;
  62 + //find the grid size along y
  63 + int grid_y = y / blockDim.y;
  64 + int blockidx_y = blockIdx.y % grid_y;
  65 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  66 + int zi = blockIdx.y / grid_y;
72 67 int i = zi * x * y + yi * x + xi;
73   -
74   - // calculate the kernel size
  68 +
  69 + // calculate the kernel size
75 70 int k_size = sigma * 4;
76 71  
77 72 //if the current pixel is outside of the image
... ... @@ -79,34 +74,25 @@
79 74 return;
80 75  
81 76  
82   - int gx, gy, gz, gi;
  77 + int gy, gi;
83 78 T G;
84 79 T sum = 0; //running weighted sum across the kernel
85 80 out[i] = 0;
86   -
  81 + T sigma_sq = 2 * sigma * sigma;
  82 + T a = 1.0 / (sigma * sqrt(2 * pi));
87 83  
88 84 //for each element of the kernel
89 85 for(int ki = -k_size; ki <= k_size; ki++){
90 86  
91 87 //calculate the gaussian value
92   - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma));
93   -
94   -
95   -
  88 + G = a * exp(-(ki*ki) / sigma_sq);
96 89 //calculate the global coordinates for this point in the kernel
97   - gx = xi;
98   - gy = yi + ki;
99   - gz = zi;
100   -
101   - //make sure we are inside the bounds of the image
102   - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){
103   - gi = gz * x * y + gy * x + gx;
104   - //perform the integration
105   - sum += G * in[gi];
106   - }
107   -
  90 + gy = (yi + ki ) % y;
  91 + gi = zi * x * y + gy * x + xi;
  92 + sum += G * in[gi];
108 93 }
109 94  
  95 +
110 96 //output the result to global memory
111 97 out[i] = sum;
112 98 }
... ... @@ -116,11 +102,14 @@
116 102  
117 103 //calculate x,y,z coordinates for this thread
118 104 int xi = blockIdx.x * blockDim.x + threadIdx.x;
119   - int yi = blockIdx.y * blockDim.y + threadIdx.y;
120   - int zi = blockIdx.z * blockDim.z + threadIdx.z;
  105 + //find the grid size along y
  106 + int grid_y = y / blockDim.y;
  107 + int blockidx_y = blockIdx.y % grid_y;
  108 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  109 + int zi = blockIdx.y / grid_y;
121 110 int i = zi * x * y + yi * x + xi;
122   -
123   - // calculate the kernel size
  111 +
  112 + // calculate the kernel size
124 113 int k_size = sigma * 4;
125 114  
126 115 //if the current pixel is outside of the image
... ... @@ -128,32 +117,22 @@
128 117 return;
129 118  
130 119  
131   - int gx, gy, gz, gi;
  120 + int gz, gi;
132 121 T G;
133 122 T sum = 0; //running weighted sum across the kernel
134 123 out[i] = 0;
135   -
  124 + T sigma_sq = 2 * sigma * sigma;
  125 + T a = 1.0 / (sigma * sqrt(2 * pi));
136 126  
137 127 //for each element of the kernel
138 128 for(int ki = -k_size; ki <= k_size; ki++){
139 129  
140 130 //calculate the gaussian value
141   - G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma));
142   -
143   -
144   -
  131 + G = a * exp(-(ki*ki) / sigma_sq);
145 132 //calculate the global coordinates for this point in the kernel
146   - gx = xi;
147   - gy = yi;
148   - gz = zi + ki;
149   -
150   - //make sure we are inside the bounds of the image
151   - if(gx >= 0 && gy >= 0 && gz >= 0 && gx < x && gy < y && gz < z){
152   - gi = gz * x * y + gy * x + gx;
153   - //perform the integration
154   - sum += G * in[gi];
155   - }
156   -
  133 + gz = (zi + ki) % z;
  134 + gi = gz * x * y + yi * x + xi;
  135 + sum += G * in[gi];
157 136 }
158 137  
159 138 //output the result to global memory
... ... @@ -161,15 +140,15 @@
161 140 }
162 141  
163 142 template<typename T>
164   - void gpu_gaussian_blur3(T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){
  143 + void gpu_gaussian_blur3(T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){
165 144  
166 145 //get the number of pixels in the image
167 146 unsigned int pixels = x * y * z;
168 147 unsigned int bytes = sizeof(T) * pixels;
169 148  
170 149 int max_threads = stim::maxThreadsPerBlock();
171   - dim3 threads(512, 1, 1);
172   - dim3 blocks(x / threads.x + 1, y, z);
  150 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  151 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
173 152  
174 153 //allocate temporary space on the GPU
175 154 T* gpuIb_x;
... ... @@ -179,26 +158,25 @@
179 158 T* gpuIb_y;
180 159 cudaMalloc(&gpuIb_y, bytes);
181 160  
182   - // blur the original image in the x direction
183   - blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma, x, y, z);
  161 + // blur the original image along the x direction
  162 + blur_x<T> <<< blocks, threads >>>(gpuIb_x, image, sigma[0], x, y, z);
184 163  
185   - // blur the original image in the y direction
186   - blur_y<float> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma, x, y, z);
  164 + // blur the x-blurred image along the y direction
  165 + blur_y<T> <<< blocks, threads >>>(gpuIb_y, gpuIb_x, sigma[1], x, y, z);
187 166  
188   - // blur the original image in the z direction
189   - blur_z<float> <<< blocks, threads >>>(image, gpuIb_y, sigma, x, y, z);
  167 + // blur the xy-blurred image along the z direction
  168 + blur_z<T> <<< blocks, threads >>>(image, gpuIb_y, sigma[2], x, y, z);
190 169  
191 170 //cudaMemcpy(image, gpuIb_y, bytes, cudaMemcpyDeviceToDevice);
192 171  
193 172 cudaFree(gpuIb_x);
194 173 cudaFree(gpuIb_y);
195   -
196 174 }
197 175  
198 176  
199 177 /// Applies a Gaussian blur to a 2D image stored on the CPU
200 178 template<typename T>
201   - void cpu_gaussian_blur3(T* blur, T* image, T sigma, unsigned int x, unsigned int y, unsigned int z){
  179 + void cpu_gaussian_blur3(T* blur, T* image, T sigma[], unsigned int x, unsigned int y, unsigned int z){
202 180  
203 181  
204 182 //get the number of pixels in the image
... ...
cpp/gradient3.cuh
... ... @@ -7,23 +7,30 @@
7 7 #include <stim/cuda/cudatools/error.h>
8 8  
9 9 template<typename T>
10   -__global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigned int z){
  10 +__global__ void gradient3(T* out, T* in, int x, int y, int z){
11 11  
12 12 //calculate x,y,z coordinates for this thread
13 13 int xi = blockIdx.x * blockDim.x + threadIdx.x;
14   - int yi = blockIdx.y * blockDim.y + threadIdx.y;
15   - int zi = blockIdx.z * blockDim.z + threadIdx.z;
  14 + //find the grid size along y
  15 + int grid_y = y / blockDim.y;
  16 + int blockidx_y = blockIdx.y % grid_y;
  17 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  18 + int zi = blockIdx.y / grid_y;
16 19 int i = zi * x * y + yi * x + xi;
17 20  
18 21 //return if the pixel is outside of the image
19   - if(xi >= x || yi >= y || zi>=z)
20   - return;
  22 + if(xi >= x || yi >= y || zi>=z) return;
21 23  
22 24 //calculate indices for the forward difference
23 25 int i_xp = zi * x * y + yi * x + (xi + 1);
24 26 int i_yp = zi * x * y + (yi + 1) * x + xi;
25 27 int i_zp = (zi + 1) * x * y + yi * x + xi;
26 28  
  29 + //calculate indices for the backward difference
  30 + int i_xn = zi * x * y + yi * x + (xi - 1);
  31 + int i_yn = zi * x * y + (yi - 1) * x + xi;
  32 + int i_zn = (zi - 1) * x * y + yi * x + xi;
  33 +
27 34 //use forward differences if a coordinate is zero
28 35 if(xi == 0)
29 36 out[i * 3 + 0] = in[i_xp] - in[i];
... ... @@ -32,11 +39,6 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne
32 39 if (zi==0)
33 40 out[i * 3 + 2] = in[i_zp] - in[i];
34 41  
35   - //calculate indices for the backward difference
36   - int i_xn = zi * x * y + yi * x + (xi - 1);
37   - int i_yn = zi * x * y + (yi - 1) * x + xi;
38   - int i_zn = (zi - 1) * x * y + yi * x + xi;
39   -
40 42 //use backward differences if the coordinate is at the maximum edge
41 43 if(xi == x-1)
42 44 out[i * 3 + 0] = in[i] - in[i_xn];
... ... @@ -48,11 +50,10 @@ __global__ void gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigne
48 50 //otherwise use central differences
49 51 if(xi > 0 && xi < x-1)
50 52 out[i * 3 + 0] = (in[i_xp] - in[i_xn]) / 2;
51   -
52 53 if(yi > 0 && yi < y-1)
53 54 out[i * 3 + 1] = (in[i_yp] - in[i_yn]) / 2;
54 55 if(zi > 0 && zi < z-1)
55   - out[i * 3 + 1] = (in[i_zp] - in[i_zn]) / 2;
  56 + out[i * 3 + 2] = (in[i_zp] - in[i_zn]) / 2;
56 57  
57 58 }
58 59  
... ... @@ -60,12 +61,10 @@ template&lt;typename T&gt;
60 61  
61 62 void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned int z){
62 63  
63   - //get the number of pixels in the image
64   - unsigned int pixels = x * y * z;
65   -
  64 +
66 65 int max_threads = stim::maxThreadsPerBlock();
67   - dim3 threads(512, 1, 1);
68   - dim3 blocks(x / threads.x + 1, y, z);
  66 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  67 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
69 68  
70 69 //call the GPU kernel to determine the gradient
71 70 gradient3<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y, z);
... ...
cpp/main.cpp
1 1 #include <iostream>
2 2 #include <fstream>
3   -//#include<stdio.h>
4 3 #include <cuda_runtime.h>
5 4 #include <stim/math/vector.h>
6 5 #include <stim/parser/arguments.h>
... ... @@ -8,85 +7,126 @@
8 7 #include <stim/grids/image_stack.h>
9 8 #include <stim/grids/grid.h>
10 9 #include <stim/visualization/colormap.h>
11   -
  10 +#include <stim/image/image.h>
12 11 #define pi 3.14159
13 12  
14 13  
15   -void ivote3(float* out, float* img, float std, unsigned int x, unsigned int y, unsigned int z);
16   -
17   -int main(){
18   -
19   - float sigma =3;
20   -
21   - std::string filename = "nissl-float-512.512.512.vol";
22   - unsigned int x = 512;
23   - unsigned int y = 512;
24   - unsigned int z = 512;
  14 +void ivote3(float* center, float* img, float std[], float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[],
  15 + unsigned int x, unsigned int y, unsigned int z);
  16 +
  17 +void invert_data(float* cpuI, unsigned int x, unsigned int y, unsigned int z){
  18 + for(int ix = 0; ix < x; ix++){
  19 + for (int iy = 0; iy < y; iy++){
  20 + for (int iz = 0; iz < z; iz++){
  21 + int idx = iz * x * y + iy * x + ix;
  22 + cpuI[idx] = 255 - cpuI[idx];
  23 + }
  24 + }
  25 + }
  26 + }
  27 +
  28 +int main(int argc, char** argv){
  29 +
  30 + //output advertisement
  31 + std::cout<<std::endl<<std::endl;
  32 + std::cout<<"========================================================================="<<std::endl;
  33 + std::cout<<"Thank you for using the ivote3 segmentation tool!"<<std::endl;
  34 + std::cout<<"Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston"<<std::endl;
  35 + std::cout<<"Developers: Laila Saadatifard and David Mayerich"<<std::endl;
  36 + std::cout<<"Source: https://git.stim.ee.uh.edu/segmentation/ivote3"<<std::endl;
  37 + std::cout<<"========================================================================="<<std::endl<<std::endl;
  38 +
  39 + stim::arglist args;
  40 +
  41 +#ifdef _WIN32
  42 + args.set_ansi(false);
  43 +#endif
  44 +
  45 + //add arduments
  46 + args.add("help", "prints this help");
  47 + args.add("x", "size of the dataset along X axis", "positive value");
  48 + args.add("y", "size of the dataset along Y axis", "positive value");
  49 + args.add("z", "size of the dataset along Z axis", "positive value");
  50 + args.add("t", "threshold value for the final result", "positive valu");
  51 + //parse the command line arguments.
  52 + args.parse(argc, argv);
  53 +
  54 + //display the help text if requested
  55 + if(args["help"].is_set()){
  56 + std::cout<<std::endl<<"usage: ivote input_image output_list --option [A B C ...]"<<std::endl;
  57 + std::cout<<std::endl<<std::endl
  58 + << "examples: ivote blue.bmp list.txt "<<std::endl;
  59 +
  60 + std::cout<<std::endl<<std::endl;
  61 + std::cout<<args.str()<<std::endl;
  62 + exit(1);
  63 + }
  64 +
  65 + //if the input and output files aren't specified, throw an error and exit
  66 + if(args.nargs() < 2){
  67 + std::cout<<"ERROR: two files must be specified for segmentation, enter ivote --help for options."<<std::endl<<std::endl;
  68 + exit(1);
  69 + }
  70 +
  71 + //get the input image file
  72 + stim::filename Ifilename(args.arg(0));
  73 +
  74 + //get the output file name
  75 + stim::filename OutName(args.arg(1));
  76 +
  77 + //set the x, y, z.
  78 + int x = args["x"].as_int();
  79 + int y = args["y"].as_int();
  80 + int z = args["z"].as_int();
  81 +
  82 + //set the threshold.
  83 + float t = args["t"].as_float();
  84 +
  85 + unsigned int r[3] = { 9, 9, 5};
  86 + float sigma[3] = { 3, 3, 1.5};
  87 + unsigned int conn[3] = { 5, 5, 3};
  88 + float phi_deg = 20.1;
  89 + float phi = phi_deg * pi /180;
  90 + int iter = 2;
  91 + float d_phi = phi/(iter -1);
  92 +
  93 + std::string filename = Ifilename.str();
25 94 unsigned int bytes = x*y*z*sizeof(float);
26 95  
  96 + //allocate space on the cpu for the input data
27 97 float* cpuI = (float*) malloc(bytes);
28 98  
  99 + //load the input file into the cpuI
29 100 std::ifstream nissl(filename, std::ios::in | std::ios::binary);
30 101 nissl.read((char*)cpuI, bytes);
31 102 nissl.close();
32 103  
33   -
34   - std::ofstream original("original.vol", std::ofstream::out | std::ofstream::binary);
  104 + invert_data(cpuI, x, y, z);
  105 +
  106 + //write a new file from the cpuI.
  107 + std::ofstream original("output/0-original_invert.vol", std::ofstream::out | std::ofstream::binary);
35 108 original.write((char*)cpuI, bytes);
36 109 original.close();
37   -
38   - float* cpu_grad = (float*) malloc(bytes*3);
39   -
40   - ivote3(cpu_grad, cpuI, sigma, x, y, z);
41   -
42   - std::ofstream blur("blur.vol", std::ofstream::out | std::ofstream::binary);
43   - blur.write((char*)cpuI, bytes);
44   - blur.close();
45   -
46   -
47 110  
48   -
49   - // load the imagestack from the given directory
50   - //std::string file_mask = "D:\\source\\ivote3\\data\\nissl\\*.png";
51   - /*stim::image_stack<float> I;
52   - unsigned int x =512;
53   - unsigned int y =512;
54   - unsigned int z =512;
55   - unsigned int bytes = x*y*z* sizeof(float);
  111 + //allocate space on the cpu for the output result
  112 + float* cpu_out = (float*) malloc(bytes);
56 113  
57   - I.read("nissl-float-512.512.512.vol", 512, 512, 512, 1, 0);
58   - //float test = I.get(128, 128, 128);
59   - //std::cout<< test<<std::endl;
60   - float* ptr0 = I.data();
61   -
62   - //stim::cpu2image<float>(I.data(), "after_loading.bmp", 512, 512);
63   -
64   - float* cpuI = (float*) malloc(bytes);
  114 + // call the ivote function
  115 + ivote3(cpu_out, cpuI, sigma, phi, d_phi, r, iter, t, conn, x, y, z);
65 116  
66   - memcpy (cpuI, ptr0, bytes);
67   -
68   -
69   - std::ofstream infile ("original-2.vol");
70   - infile.write((char*)cpuI, bytes);
71   - infile.close();*/
72   - //stim::cpu2image(cpuI, "00-cpuI.bmp", 512, 1024);
  117 + //write the blurred file from the cpuI.
  118 + std::ofstream fblur("output/test0.vol", std::ofstream::out | std::ofstream::binary);
  119 + fblur.write((char*)cpuI, bytes);
  120 + fblur.close();
  121 +
  122 + //write the output file.
  123 + std::ofstream fo("output/" + OutName.str(), std::ofstream::out | std::ofstream::binary);
  124 + fo.write((char*)cpu_out, bytes);
  125 + fo.close();
73 126  
74 127  
75   - //std::fstream file2;
76   - //file2.write((char*)cpuI, 512*512*512*sizeof(unsigned char));
77   - //I.write("data\\volume1.raw");
78   - //test = ptr0[128*128*128];
79   - //std::cout<< test<<std::endl;
80 128  
81   - //memset(I.data(), 0, 512*512*512);
82   - //I.write("data\\volume.raw");
83   - //I.save_images("data\\????.bmp");
  129 + cudaDeviceReset();
84 130  
85   -
86   -
87   -
88   -
89   -
90   - cudaDeviceReset();
91 131 }
92 132  
... ...