Commit f6438346ee2bdf15cdc3f057f58447935dfe13af
1 parent
547f6b98
initial commit after PLoS submission
Showing
3 changed files
with
1279 additions
and
0 deletions
Show diff stats
1 | +#ifndef SNAKUSCULE_H | ||
2 | +#define SNAKUSCULE_H | ||
3 | + | ||
4 | +template <typename T > | ||
5 | +class point { | ||
6 | +public: | ||
7 | + | ||
8 | + T x; | ||
9 | + T y; | ||
10 | + T z; | ||
11 | + | ||
12 | + //default constructor | ||
13 | + CUDA_CALLABLE point() { | ||
14 | + x = 0; | ||
15 | + y = 0; | ||
16 | + z = 0; | ||
17 | + }; | ||
18 | + //constructor definition | ||
19 | + CUDA_CALLABLE point(T a, T b, T c) { | ||
20 | + x = a; | ||
21 | + y = b; | ||
22 | + z = c; | ||
23 | + } | ||
24 | + | ||
25 | + CUDA_CALLABLE point <T> operator= (const point<T> &rhs) { | ||
26 | + this->x = rhs.x; | ||
27 | + this->y = rhs.y; | ||
28 | + this->z = rhs.z; | ||
29 | + return (*this); | ||
30 | + } | ||
31 | + | ||
32 | + CUDA_CALLABLE point <T> operator+ (const point<T> rhs) { | ||
33 | + point<T> temp; | ||
34 | + temp.x = this->x + rhs.x; | ||
35 | + temp.y = this->y + rhs.y; | ||
36 | + temp.z = this->z + rhs.z; | ||
37 | + return temp; | ||
38 | + | ||
39 | + } | ||
40 | + | ||
41 | + CUDA_CALLABLE point<T> operator- (const point<T> rhs) { | ||
42 | + point<T> temp; | ||
43 | + temp.x = this->x - rhs.x; | ||
44 | + temp.y = this->y - rhs.y; | ||
45 | + temp.z = this->z - rhs.z; | ||
46 | + return temp; | ||
47 | + | ||
48 | + } | ||
49 | + CUDA_CALLABLE point<T> operator*(const float rhs) { | ||
50 | + return point(rhs * this->x, rhs * this->y, rhs * this->z ); | ||
51 | + } | ||
52 | + | ||
53 | + | ||
54 | + CUDA_CALLABLE point<T> operator/ (const float rhs) { | ||
55 | + return point(this->x / rhs , this->y / rhs, this->z/ rhs ); | ||
56 | + | ||
57 | + } | ||
58 | +}; | ||
59 | + | ||
60 | + | ||
61 | +class sphere { | ||
62 | + | ||
63 | +public: | ||
64 | + | ||
65 | + point<float> p; | ||
66 | + point<float> q; | ||
67 | + //default constructor | ||
68 | + CUDA_CALLABLE sphere() { | ||
69 | + p.x = 0; | ||
70 | + p.y = 0; | ||
71 | + p.z = 0; | ||
72 | + q.x = 0; | ||
73 | + q.y = 0; | ||
74 | + q.z = 0; | ||
75 | + }; | ||
76 | + | ||
77 | + //CUDA_CALLABLE point<float> p() { return point<float>(center.x - radius, center.y); } | ||
78 | + //CUDA_CALLABLE point<float> q() { return point<float>(center.x + radius, center.y); } | ||
79 | + //CUDA_CALLABLE point<float> c() { return center; } | ||
80 | + CUDA_CALLABLE point<float> c() { | ||
81 | + point<float> center; | ||
82 | + point<float> sum; // sum of two points to get center | ||
83 | + sum = p + q; | ||
84 | + center.x = 0.5f * sum.x; | ||
85 | + center.y = 0.5f * sum.y; | ||
86 | + center.z = 0.5f * sum.z; | ||
87 | + return center; } | ||
88 | + | ||
89 | + //CUDA_CALLABLE float r() { return radius; } | ||
90 | + CUDA_CALLABLE float r() { | ||
91 | + float radius; | ||
92 | + point <float> d; //distance | ||
93 | + d = p - q; | ||
94 | + radius = 0.5f* sqrt((d.x * d.x) + (d.y * d.y) + (d.z * d.z)); | ||
95 | + return radius; } | ||
96 | + | ||
97 | + CUDA_CALLABLE void update(point<float> energyGradp , point<float> energyGradq , float dt){ | ||
98 | + p = p - (energyGradp * dt); | ||
99 | + q = q - (energyGradq * dt); | ||
100 | + } | ||
101 | + | ||
102 | + CUDA_UNCALLABLE std::string str() { | ||
103 | + std::stringstream ss; | ||
104 | + ss << "q = (" << q.x << ", " << q.y << ", " << q.z << ")" << std::endl; | ||
105 | + ss << "p = (" << p.x << ", " << p.y << ", " << p.z << ")" << std::endl; | ||
106 | + point<float> center = c(); | ||
107 | + ss << "c = (" << center.x << ", " << center.y << ", " << center.z << ")" << std::endl; | ||
108 | + ss << "r = " << r() << std::endl; | ||
109 | + return ss.str(); | ||
110 | + } | ||
111 | +}; | ||
112 | + | ||
113 | +#endif | ||
0 | \ No newline at end of file | 114 | \ No newline at end of file |
1 | +#include <iostream> | ||
2 | +#include<string> | ||
3 | +#include<cuda.h> | ||
4 | +#include <cuda_runtime.h> | ||
5 | +#include "device_launch_parameters.h" | ||
6 | +#include <fstream> | ||
7 | +#include <curand.h> | ||
8 | +#include <curand_kernel.h> | ||
9 | +#include <time.h> | ||
10 | +#include <chrono> | ||
11 | +#include <stdio.h> | ||
12 | +#include <math.h> | ||
13 | +#include "opencv2/core/core.hpp" | ||
14 | +#include <opencv2/imgproc/imgproc.hpp> | ||
15 | +#include <opencv2/opencv.hpp> | ||
16 | +#include <opencv2/highgui/highgui_c.h> | ||
17 | + | ||
18 | +#define USING_OPENCV | ||
19 | +#include<stim/image/image.h> | ||
20 | +#include <stim/cuda/cudatools/callable.h> | ||
21 | +#include <stim/cuda/cudatools/error.h> | ||
22 | +#include <stim/cuda/cudatools/timer.h> | ||
23 | +#include <stim/math/constants.h> | ||
24 | +#include "stim/parser/arguments.h" | ||
25 | +#include<random> | ||
26 | +#include <numeric> // std::iota | ||
27 | +//#include<stim/math/random.h> | ||
28 | +#include "hypersnakuscule.h" | ||
29 | +//#include "median2.cuh" | ||
30 | + | ||
31 | +#define deltaR 2.0f | ||
32 | +#define deltar 1.5874f // deltar=deltaR/cubeRoot2 | ||
33 | +#define cubeRoot2 1.2599f | ||
34 | +#define pi 3.14159f | ||
35 | +//#define Energy_th -3 | ||
36 | +//----------------------------------------------functions-------------------------------------------------------------- | ||
37 | +void stretch(float* I, size_t size, int low, int high) { | ||
38 | + //size: number of image pixel | ||
39 | + float max_val = I[0]; | ||
40 | + float min_val = I[0]; | ||
41 | + for (int n = 0; n < size; n++) { | ||
42 | + if (I[n] > max_val) { | ||
43 | + max_val = I[n]; | ||
44 | + } | ||
45 | + if (I[n] < min_val) { | ||
46 | + min_val = I[n]; | ||
47 | + } | ||
48 | + } | ||
49 | + float range = max_val - min_val; | ||
50 | + float desired_range = (float)high - (float)low; | ||
51 | + for (size_t n = 0; n < size; n++) { //for each element in the image | ||
52 | + I[n] = desired_range * (I[n] - min_val) / range + low; | ||
53 | + } | ||
54 | + | ||
55 | +} | ||
56 | + | ||
57 | +/// random generator function: generate random numbers in a sphere with radius 1 in the center (0, 0, 0) | ||
58 | +void randGenerator(point<float>* r, int sampleNum, bool debug = false) { | ||
59 | + | ||
60 | + for (size_t i = 0; i < sampleNum; i++) { | ||
61 | + double rn = (double)rand() / (double)(RAND_MAX); | ||
62 | + double theta = (double)rand() / (double)(RAND_MAX)* stim::TAU; | ||
63 | + double cosphi = 1.0 - 2.0 * ((double)rand() / (double)(RAND_MAX)); | ||
64 | + double phi = std::acos(cosphi); | ||
65 | + //double phi = std::acos(2.0 * v - 1.0); | ||
66 | + //double phi = (double)rand() / (double)(RAND_MAX)* stim::PI; | ||
67 | + //std::cout << "rn=" << rn << "\t theta=" << theta << "\tphi=" << phi << std::endl; | ||
68 | + r[i].x = (float)(std::cbrt(rn) * cos(theta) * sin(phi)); | ||
69 | + r[i].y = (float)(std::cbrt(rn) * sin(theta) * sin(phi)); | ||
70 | + r[i].z = (float)(std::cbrt(rn) * cos(phi)); | ||
71 | + | ||
72 | + | ||
73 | + } | ||
74 | + if (debug) { | ||
75 | + std::ofstream outfile("randomSamples.txt"); //open a file for writing | ||
76 | + for (size_t i = 0; i < sampleNum; i++) { | ||
77 | + outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius | ||
78 | + } | ||
79 | + outfile.close(); | ||
80 | + } | ||
81 | + | ||
82 | +} | ||
83 | +///create random numbers in a cube and delete the one outside the sphere | ||
84 | +void randGenerator_cube(point<float>* r, int sampleNum, bool debug = false) { | ||
85 | + size_t counter = 0; | ||
86 | + while (counter < sampleNum) { | ||
87 | + double x = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; | ||
88 | + double y = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; | ||
89 | + double z = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; | ||
90 | + double d = sqrt((x * x) + (y * y) + (z * z)); | ||
91 | + | ||
92 | + if (d < 1.1) { | ||
93 | + r[counter].x = (float)x; | ||
94 | + r[counter].y = (float)y; | ||
95 | + r[counter].z = (float)z; | ||
96 | + counter++; | ||
97 | + } | ||
98 | + } | ||
99 | + | ||
100 | + if (debug) { | ||
101 | + std::ofstream outfile("randomSamples.txt"); //open a file for writing | ||
102 | + for (size_t i = 0; i < sampleNum; i++) { | ||
103 | + outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius | ||
104 | + } | ||
105 | + outfile.close(); | ||
106 | + } | ||
107 | + | ||
108 | + | ||
109 | +} | ||
110 | + | ||
111 | +/// random generator function: generate random numbers in a sphere with radius 1 in the center (0 , 0, 0) | ||
112 | +//void randGenerator1(point<float>* r, int sampleNum, bool debug = false) { | ||
113 | +// for (int i = 0; i < sampleNum; i++) { | ||
114 | +// std::default_random_engine generator1(100 + i); | ||
115 | +// std::uniform_real_distribution<double> distribution1(0.0f, 1.0f); | ||
116 | +// double rn = distribution1(generator1); | ||
117 | +// std::default_random_engine generator2(50.0f + 5.0f * i); | ||
118 | +// std::uniform_real_distribution<double> distribution2(0.0, 2.0 * stim::PI); | ||
119 | +// double theta = distribution2(generator2); | ||
120 | +// std::default_random_engine generator3(30.0f + 3.0f * i); | ||
121 | +// std::uniform_real_distribution<double> distribution3(0, stim::PI); | ||
122 | +// double phi = distribution3(generator3); | ||
123 | +// r[i].x = (float)(std::cbrt(rn) * cos(theta) * sin(phi)); | ||
124 | +// r[i].y = (float)(std::cbrt(rn) * sin(theta) * sin(phi)); | ||
125 | +// r[i].z = (float)(std::cbrt(rn) * cos(phi)); | ||
126 | +// } | ||
127 | +// if (debug) { | ||
128 | +// std::ofstream outfile("randomSamples.txt"); //open a file for writing | ||
129 | +// for (size_t i = 0; i < sampleNum; i++) { | ||
130 | +// outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius | ||
131 | +// } | ||
132 | +// outfile.close(); | ||
133 | +// } | ||
134 | +//} | ||
135 | + | ||
136 | + | ||
137 | + | ||
138 | +/// saves the snakes specified by the idx array | ||
139 | +void SaveSnakes(std::string filename, sphere* snakes, std::vector<size_t> idx) { | ||
140 | + std::ofstream outfile(filename); //open a file for writing | ||
141 | + for (size_t i = 0; i < idx.size(); i++) { | ||
142 | + point<float> center = snakes[idx[i]].c(); //get the centerpoint of the snake | ||
143 | + outfile << center.x << " " << center.y << " " << center.z << " " << snakes[idx[i]].r() << std::endl; //output the center and radius | ||
144 | + } | ||
145 | + outfile.close(); | ||
146 | + | ||
147 | + | ||
148 | + | ||
149 | +} | ||
150 | +void initialSwarmSnake(sphere * snakes, size_t &counter, size_t w, size_t h, size_t d, float radius) { | ||
151 | + std::cout << "W=" << w << "\t h=" << h << "\td=" << d << std::endl; | ||
152 | + int D = int(sqrt(1.5)*radius); //distance between hypersnakes | ||
153 | + //int D = 30; //for phantom | ||
154 | + int k1 = 0; | ||
155 | + counter = 0; | ||
156 | + float startpoint = 0; | ||
157 | + for (float k = radius; k < (d - radius); k += D ) {// for phantom D | ||
158 | + for (float j = radius; j < (h - radius); j += D) { | ||
159 | + k1++; | ||
160 | + if (k1 % 2 == 1) | ||
161 | + startpoint = 0; //for phantom D | ||
162 | + else | ||
163 | + startpoint = (float)D / 2.0f;//for phantom D | ||
164 | + for (float i = startpoint; i < (w - radius); i += D) { | ||
165 | + point<float> temp_p(i, j, k); | ||
166 | + snakes[counter].p = temp_p; | ||
167 | + point<float> temp_q(i + (2 * radius), j, k); | ||
168 | + snakes[counter].q = temp_q; | ||
169 | + counter = counter + 1; | ||
170 | + } | ||
171 | + } | ||
172 | + } | ||
173 | + std::ofstream outfile("initials.txt"); | ||
174 | + for (int i = 0; i < counter; i++) { | ||
175 | + outfile << snakes[i].c().x << " " << snakes[i].c().y << " " << snakes[i].c().z << " " << snakes[i].r() << std::endl; | ||
176 | + } | ||
177 | + outfile.close(); | ||
178 | +} | ||
179 | + | ||
180 | +__host__ __device__ void sum_dE(point<float>& dEdp, point<float>& dEdq, point<float> c, point<float> s, float R, float f) { | ||
181 | + | ||
182 | + float r = R / cubeRoot2; // radius of inner snake | ||
183 | + float dz2 = (s.z - c.z)*(s.z - c.z); | ||
184 | + float dy2 = (s.y - c.y)*(s.y - c.y); | ||
185 | + float dx2 = (s.x - c.x)*(s.x - c.x); | ||
186 | + float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour | ||
187 | + | ||
188 | + float gx = 2.0f * R; // gx= snake.q.x - snake.p.x ; | ||
189 | + float dx = (c.x - s.x) / d; | ||
190 | + float dy = (c.y - s.y) / d; | ||
191 | + float dz = (c.z - s.z) / d; | ||
192 | + | ||
193 | + if (d < (r - 0.5f*deltar)) { // mouth of snake | ||
194 | + dEdp.x -= 3.0f / gx * f; //calculate the growth/shrinking term for the snake | ||
195 | + dEdq.x += 3.0f / gx * f; | ||
196 | + } | ||
197 | + else if (d < (r + 0.5f*deltar)) { // throat of snake | ||
198 | + float S = (2.0f / deltar) * (d - r); // weight function value in the given point | ||
199 | + dEdp.x += f * ((3.0f / gx) * S + (dx + (1.0f / cubeRoot2)) / deltar); | ||
200 | + dEdp.y += f *(dy / deltar); | ||
201 | + dEdp.z += f *(dz / deltar); | ||
202 | + | ||
203 | + dEdq.x += f * ((-3.0f / gx) * S + (dx - (1.0f / cubeRoot2)) / deltar); | ||
204 | + dEdq.y = dEdp.y; | ||
205 | + dEdq.z = dEdp.z; | ||
206 | + | ||
207 | + } | ||
208 | + else if (d < (R - 0.5f*deltaR)) { // coil of snake | ||
209 | + dEdp.x += 3.0f / gx * f; | ||
210 | + dEdq.x -= 3.0f / gx * f; | ||
211 | + } | ||
212 | + | ||
213 | + else if (d < (R + 0.5f*deltaR)) { // fangs of snake | ||
214 | + float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); | ||
215 | + | ||
216 | + dEdp.x += f * ((3.0f * S / gx) - (0.5f * (dx + 1.0f) / deltaR)); | ||
217 | + dEdp.y -= 0.5f *(dy / deltaR) * f; | ||
218 | + dEdp.z -= 0.5f *(dz / deltaR) * f; | ||
219 | + | ||
220 | + dEdq.x += f *((-3.0f * S / gx) - (0.5f * (dx - 1.0f) / deltaR)); | ||
221 | + dEdq.y = dEdp.y; | ||
222 | + dEdq.z = dEdp.z; | ||
223 | + } | ||
224 | + | ||
225 | +} | ||
226 | + | ||
227 | +//sum_dE in debug mode | ||
228 | +__host__ __device__ void sum_dE_debug(point<float>& dEdp, point<float>& dEdq, int &counter, point<float> c, point<float> s, float R, float f) { | ||
229 | + | ||
230 | + float r = R / cubeRoot2; // radius of inner snake | ||
231 | + float dz2 = (s.z - c.z)*(s.z - c.z); | ||
232 | + float dy2 = (s.y - c.y)*(s.y - c.y); | ||
233 | + float dx2 = (s.x - c.x)*(s.x - c.x); | ||
234 | + float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour | ||
235 | + | ||
236 | + float gx = 2.0f * R; // gx= snake.q.x - snake.p.x ; | ||
237 | + float dx = (c.x - s.x) / d; | ||
238 | + float dy = (c.y - s.y) / d; | ||
239 | + float dz = (c.z - s.z) / d; | ||
240 | + int trivial = 0; //to test if any point is out of the contour | ||
241 | + if (d < (r - 0.5f*deltar)) { // mouth of snake | ||
242 | + counter++; | ||
243 | + dEdp.x -= 3.0f / gx * f; //calculate the growth/shrinking term for the snake | ||
244 | + dEdq.x += 3.0f / gx * f; | ||
245 | + } | ||
246 | + else if (d < (r + 0.5f*deltar)) { // throat of snake | ||
247 | + counter++; | ||
248 | + float S = (2.0f / deltar) * (d - r); // weight function value in the given point | ||
249 | + dEdp.x += f * ((3.0f / gx) * S + (dx + (1.0f / cubeRoot2)) / deltar); | ||
250 | + dEdp.y += f *(dy / deltar); | ||
251 | + dEdp.z += f *(dz / deltar); | ||
252 | + | ||
253 | + dEdq.x += f * ((-3.0f / gx) * S + (dx - (1.0f / cubeRoot2)) / deltar); | ||
254 | + dEdq.y = dEdp.y; | ||
255 | + dEdq.z = dEdp.z; | ||
256 | + | ||
257 | + } | ||
258 | + else if (d < (R - 0.5f*deltaR)) { // coil of snake | ||
259 | + counter++; | ||
260 | + dEdp.x += 3.0f / gx * f; | ||
261 | + dEdq.x -= 3.0f / gx * f; | ||
262 | + } | ||
263 | + | ||
264 | + else if (d < (R + 0.5f*deltaR)) { // fangs of snake | ||
265 | + counter++; | ||
266 | + float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); | ||
267 | + | ||
268 | + dEdp.x += f * ((3.0f * S / gx) - (0.5f * (dx + 1.0f) / deltaR)); | ||
269 | + dEdp.y -= 0.5f *(dy / deltaR) * f; | ||
270 | + dEdp.z -= 0.5f *(dz / deltaR) * f; | ||
271 | + | ||
272 | + dEdq.x += f *((-3.0f * S / gx) - (0.5f * (dx - 1.0f) / deltaR)); | ||
273 | + dEdq.y = dEdp.y; | ||
274 | + dEdq.z = dEdp.z; | ||
275 | + } | ||
276 | + else | ||
277 | + trivial++; | ||
278 | + | ||
279 | +} | ||
280 | + | ||
281 | +// this function calculate gradient of energy with respect to two point p and q | ||
282 | +__host__ __device__ void snake_Engrad(point<float>&dEdp, point<float>&dEdq, sphere snake, float* I, size_t w, size_t h, size_t d, bool debug = false) { | ||
283 | + | ||
284 | + float radius = snake.r(); // radius of outer snake | ||
285 | + point<float> c = snake.c(); // center of snake | ||
286 | + dEdp = point<float>(0, 0, 0); //initialize dEdp and dEdq to zero | ||
287 | + dEdq = point<float>(0, 0, 0); | ||
288 | + | ||
289 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | ||
290 | + if (radius < threshold) { | ||
291 | + if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); | ||
292 | + return; | ||
293 | + } | ||
294 | + | ||
295 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | ||
296 | + float tempXmax = ceil(c.x + radius + 1); | ||
297 | + float tempYmin = floor(c.y - radius - 1); | ||
298 | + float tempYmax = ceil(c.y + radius + 1); | ||
299 | + float tempZmin = floor(c.z - radius - 1); | ||
300 | + float tempZmax = ceil(c.z + radius + 1); | ||
301 | + | ||
302 | + float xmin = max((float)tempXmin, (float) 0.0); //clamp the bounding box to the image edges | ||
303 | + float xmax = min((float)tempXmax, (float)(w - 1)); | ||
304 | + float ymin = max((float)tempYmin, (float)0); | ||
305 | + float ymax = min((float)tempYmax, (float)(h - 1)); | ||
306 | + float zmin = max((float)tempZmin, (float)0); | ||
307 | + float zmax = min((float)tempZmax, (float)(d - 1)); | ||
308 | + | ||
309 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | ||
310 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | ||
311 | + return; | ||
312 | + } | ||
313 | + | ||
314 | + float R = radius; //simplify radius to R | ||
315 | + float R3 = R * R * R; //calculate R^2 (radius squared) | ||
316 | + for (unsigned int z = (unsigned int)zmin; z <= (unsigned int)zmax; z++) { // for each section | ||
317 | + for (unsigned int x = (unsigned int)xmin; x <= (unsigned int)xmax; x++) { //for each column of section in the bounding box | ||
318 | + for (unsigned int y = (unsigned int)ymin; y <= (unsigned int)ymax; y++) { //for each pixel p in the column | ||
319 | + | ||
320 | + point<float> s(x, y, z); // a sample inside the contour | ||
321 | + float f; // image value in given position | ||
322 | + | ||
323 | + int position = (int)((w * h * z) + (x * h + y)); | ||
324 | + if (position < (w * h * d)) { | ||
325 | + f = I[position]; | ||
326 | + | ||
327 | + | ||
328 | + if (!f == 0) | ||
329 | + sum_dE(dEdp, dEdq, c, s, R, f); | ||
330 | + | ||
331 | + } | ||
332 | + | ||
333 | + } | ||
334 | + } | ||
335 | + } | ||
336 | + | ||
337 | + dEdp = dEdp / (8 * R3); | ||
338 | + dEdq = dEdq / (8 * R3); | ||
339 | + | ||
340 | + | ||
341 | +} | ||
342 | + | ||
343 | +//// this function calculate gradient of energy with respect to two point p and q using Monte Carlo | ||
344 | +__host__ __device__ void snake_Engrad_MC(point<float>&dEdp, point<float>&dEdq, sphere snake, float* I, point<float>* samples, size_t sampleNum, size_t w, size_t h, size_t d, bool debug = false) { | ||
345 | + | ||
346 | + float radius = snake.r(); // radius of outer snake | ||
347 | + point<float> c = snake.c(); // center of snake | ||
348 | + dEdp = point<float>(0, 0, 0); //initialize dEdp and dEdq to zero | ||
349 | + dEdq = point<float>(0, 0, 0); | ||
350 | + | ||
351 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | ||
352 | + if (radius < threshold) { | ||
353 | + if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); | ||
354 | + return; | ||
355 | + } | ||
356 | + | ||
357 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | ||
358 | + float tempXmax = ceil(c.x + radius + 1); | ||
359 | + float tempYmin = floor(c.y - radius - 1); | ||
360 | + float tempYmax = ceil(c.y + radius + 1); | ||
361 | + float tempZmin = floor(c.z - radius - 1); | ||
362 | + float tempZmax = ceil(c.z + radius + 1); | ||
363 | + | ||
364 | + float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges | ||
365 | + float xmax = min((float)tempXmax, (float)(w - 1.0)); | ||
366 | + float ymin = max((float)tempYmin, (float)0.0); | ||
367 | + float ymax = min((float)tempYmax, (float)(h - 1.0)); | ||
368 | + float zmin = max((float)tempZmin, (float)0.0); | ||
369 | + float zmax = min((float)tempZmax, (float)(d - 1.0)); | ||
370 | + | ||
371 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | ||
372 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | ||
373 | + return; | ||
374 | + } | ||
375 | + | ||
376 | + | ||
377 | + | ||
378 | + | ||
379 | + float R = radius; //simplify radius to R | ||
380 | + float R3 = R * R * R; //calculate R^3 (radius cube) | ||
381 | + int counter = 0; | ||
382 | + float sumf = 0.0; | ||
383 | + | ||
384 | + for (int i = 0; i < sampleNum; i++) { | ||
385 | + float sx = samples[i].x; | ||
386 | + float sy = samples[i].y; | ||
387 | + float sz = samples[i].z; | ||
388 | + | ||
389 | + float x = (R + 1.0f) * sx + c.x; | ||
390 | + float y = (R + 1.0f) * sy + c.y; | ||
391 | + float z = (R + 1.0f) * sz + c.z; | ||
392 | + | ||
393 | + int xi = (int)round(x); | ||
394 | + int yi = (int)round(y); | ||
395 | + int zi = (int)round(z); | ||
396 | + | ||
397 | + point<float> s(xi, yi, zi); // a sample inside the contour | ||
398 | + float f; // image value in given position | ||
399 | + | ||
400 | + int position = (int)((w * h * zi) + (xi * h + yi)); | ||
401 | + if (position < (w * h * d) && xi >= xmin && xi <= xmax && yi >= ymin && yi <= ymax && zi >= zmin && zi <= zmax) { // && d1< (R + 1) | ||
402 | + counter++; | ||
403 | + f = (float)I[position]; | ||
404 | + sumf += f; | ||
405 | + //if (debug) | ||
406 | + sum_dE_debug(dEdp, dEdq, counter, c, s, R, f); | ||
407 | + //sum_dE(dEdp, dEdq, c, s, R, f); | ||
408 | + } | ||
409 | + } | ||
410 | + | ||
411 | + | ||
412 | + | ||
413 | + | ||
414 | + | ||
415 | + float volume = pi * (((R + 1.0f) * (R + 1.0f) * (zmax - zmin)) - ((1.0f / 3.0f) * (powf((zmax - c.z), 3.0f) + powf((c.z - zmin), 3.0f)))); | ||
416 | + | ||
417 | + //if (debug) printf("volume_sphere=%f and volume=%f\n", volume_sphere, volume); | ||
418 | + //if (debug) printf("sampleNum=%u and (float)counter= %f \n", sampleNum, (float)counter); | ||
419 | + | ||
420 | + dEdp = dEdp * volume / (float)counter; | ||
421 | + | ||
422 | + dEdq = dEdq * volume / (float)counter; | ||
423 | + | ||
424 | + | ||
425 | + dEdp = dEdp / (8.0f * R3); | ||
426 | + dEdq = dEdq / (8.0f * R3); | ||
427 | + | ||
428 | +} | ||
429 | + | ||
430 | + | ||
431 | +// this function calculate gradient of energy with respect to two point p and q using Monte Carlo | ||
432 | +__host__ __device__ void snake_Engrad_MC_parallel(point<float>&dEdp, point<float>&dEdq, int &counter, sphere snake, float* I, point<float> sample, size_t w, size_t h, size_t d, bool debug = false) { | ||
433 | + | ||
434 | + float radius = snake.r(); // radius of outer snake | ||
435 | + point<float> c = snake.c(); // center of snake | ||
436 | + | ||
437 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | ||
438 | + float tempXmax = ceil(c.x + radius + 1); | ||
439 | + float tempYmin = floor(c.y - radius - 1); | ||
440 | + float tempYmax = ceil(c.y + radius + 1); | ||
441 | + float tempZmin = floor(c.z - radius - 1); | ||
442 | + float tempZmax = ceil(c.z + radius + 1); | ||
443 | + | ||
444 | + float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges | ||
445 | + float xmax = min((float)tempXmax, (float)(w - 1.0)); | ||
446 | + float ymin = max((float)tempYmin, (float)0.0); | ||
447 | + float ymax = min((float)tempYmax, (float)(h - 1.0)); | ||
448 | + float zmin = max((float)tempZmin, (float)0.0); | ||
449 | + float zmax = min((float)tempZmax, (float)(d - 1.0)); | ||
450 | + | ||
451 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | ||
452 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | ||
453 | + return; | ||
454 | + } | ||
455 | + | ||
456 | + | ||
457 | + | ||
458 | + | ||
459 | + float R = radius; //simplify radius to R | ||
460 | + float sumf = 0.0; | ||
461 | + | ||
462 | + //for (int i = 0; i < sampleNum; i++) { | ||
463 | + float sx = sample.x; | ||
464 | + float sy = sample.y; | ||
465 | + float sz = sample.z; | ||
466 | + | ||
467 | + float x = (R + 1.0f) * sx + c.x; | ||
468 | + float y = (R + 1.0f) * sy + c.y; | ||
469 | + float z = (R + 1.0f) * sz + c.z; | ||
470 | + | ||
471 | + int xi = (int)round(x); | ||
472 | + int yi = (int)round(y); | ||
473 | + int zi = (int)round(z); | ||
474 | + | ||
475 | + point<float> s(xi, yi, zi); // a sample inside the contour | ||
476 | + float f; // image value in given position | ||
477 | + | ||
478 | + int position = (int)((w * h * zi) + (xi * h + yi)); | ||
479 | + if (position < (w * h * d) && xi >= xmin && xi <= xmax && yi >= ymin && yi <= ymax && zi >= zmin && zi <= zmax) { // && d1< (R + 1) | ||
480 | + | ||
481 | + f = (float)I[position]; | ||
482 | + sumf += f; | ||
483 | + sum_dE_debug(dEdp, dEdq, counter, c, s, R, f); | ||
484 | + //printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); | ||
485 | + | ||
486 | + } | ||
487 | + | ||
488 | + | ||
489 | +} | ||
490 | + | ||
491 | + | ||
492 | + | ||
493 | + | ||
494 | + | ||
495 | +void snake_evolve(sphere &snakes, float* I, int w, int h, int d, float dt, int itr, point<float>* samples, size_t sampleNum, bool MC, bool debug = false) { | ||
496 | + point<float> dEdp(0.0, 0.0, 0.0); // energy gradient wrt p | ||
497 | + point<float> dEdq(0.0, 0.0, 0.0); // energy gradient wrt q | ||
498 | + for (int numItr = 0; numItr < itr; numItr++) { | ||
499 | + if (MC) | ||
500 | + snake_Engrad_MC(dEdp, dEdq, snakes, I, samples, sampleNum, w, h, d, debug); | ||
501 | + else | ||
502 | + snake_Engrad(dEdp, dEdq, snakes, I, w, h, d, debug); | ||
503 | + | ||
504 | + if (debug) printf("dEdp.x=%f \t dEdq.x=%f \n dEdp.y=%f \t dEdp.z=%f \n\n", dEdp.x, dEdq.x, dEdp.y, dEdp.z); | ||
505 | + float factor = sqrt(float(numItr + 1)); // step size in gradient descent decreasing by number of iterations | ||
506 | + | ||
507 | + snakes.update(dEdp, dEdq, dt / factor); | ||
508 | + | ||
509 | + } | ||
510 | + | ||
511 | + | ||
512 | +} | ||
513 | + | ||
514 | + | ||
515 | + | ||
516 | +//-------------------------------------------Kernels-------------------------------------------------------------------------- | ||
517 | + | ||
518 | +//__global__ void kernel_snake_evolve_MC(sphere * snakes, float* I, point<float>* samples, int sampleNum, size_t snakeNum, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { | ||
519 | +// | ||
520 | +// int idx = blockDim.x * blockIdx.x + threadIdx.x; | ||
521 | +// | ||
522 | +// if (idx >= snakeNum) // return if the number of threads is more than snakes | ||
523 | +// return; | ||
524 | +// | ||
525 | +// | ||
526 | +// if (idx == 0) | ||
527 | +// printf("\n\n \t\t=============>>>>we are in the MC kernel\n\n"); | ||
528 | +// point<float> dEdp(0.0, 0.0, 0.0); // energy gradient wrt p | ||
529 | +// point<float> dEdq(0.0, 0.0, 0.0); // energy gradient wrt q | ||
530 | +// sphere s = snakes[idx]; | ||
531 | +// | ||
532 | +// for (int i = 0; i < itr; i++) { | ||
533 | +// if (debug) printf("\n\n---------------->> iteration %u\n", i); | ||
534 | +// snake_Engrad_MC(dEdp, dEdq, s, I, samples, sampleNum, w, h, d, debug); | ||
535 | +// float factor = sqrtf(float(i + 1)); | ||
536 | +// if (debug) | ||
537 | +// printf("dEdp.x=%f and dEdp.y=%f and dEdp.z=%f and dEdq.x=%f\n", dEdp.x, dEdp.y, dEdp.z, dEdq.x); | ||
538 | +// s.update(dEdp, dEdq, dt / factor); | ||
539 | +// | ||
540 | +// } | ||
541 | +// | ||
542 | +// snakes[idx] = s; | ||
543 | +// | ||
544 | +// | ||
545 | +//} | ||
546 | + | ||
547 | +__global__ void kernel_snake_evolve_MC_parallel(sphere * snakes, float* I, point<float>* samples, int sampleNum, size_t snakeNum, size_t threads, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { | ||
548 | + | ||
549 | + extern __shared__ float sharedPtr[]; // define shared memory to save result of each thread there | ||
550 | + int n = floorf(sampleNum / threads) ; //# given sample points to one thread | ||
551 | + int idx = blockDim.x * blockIdx.x + threadIdx.x; | ||
552 | + | ||
553 | + if (idx >= (snakeNum * threads)) // return if the number of threads is more than snakes | ||
554 | + return; | ||
555 | + | ||
556 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); //snake cannot be smaller than threshold | ||
557 | + /*if (idx == 0) { | ||
558 | + printf("\n\n \t\t=============>>>>we are in the MC kernel\n\n"); | ||
559 | + printf("number of samples per thread=%d\n", n); | ||
560 | + printf("idx=%d\n", (snakeNum * threads)); | ||
561 | + printf("blockdim.x=%d and threads=%u \n", blockDim.x, threads); | ||
562 | + }*/ | ||
563 | + | ||
564 | + point<float> thread_sample; //sample goes to the thread | ||
565 | + //sum_counter is real number of samples inside the contour averaged. some points of samples may round out of contour and did not contribute in averaging | ||
566 | + for (int i = 0; i < itr; i++) { | ||
567 | + //check the snake, if it pass the threshold | ||
568 | + | ||
569 | + if (snakes[blockIdx.x].r() < threshold) { | ||
570 | + if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); | ||
571 | + return; | ||
572 | + } | ||
573 | + | ||
574 | + float radius = snakes[blockIdx.x].r(); // radius of outer snake | ||
575 | + point<float> c = snakes[blockIdx.x].c(); // center of snake | ||
576 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | ||
577 | + float tempXmax = ceil(c.x + radius + 1); | ||
578 | + float tempYmin = floor(c.y - radius - 1); | ||
579 | + float tempYmax = ceil(c.y + radius + 1); | ||
580 | + float tempZmin = floor(c.z - radius - 1); | ||
581 | + float tempZmax = ceil(c.z + radius + 1); | ||
582 | + | ||
583 | + float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges | ||
584 | + float xmax = min((float)tempXmax, (float)(w - 1.0)); | ||
585 | + float ymin = max((float)tempYmin, (float)0.0); | ||
586 | + float ymax = min((float)tempYmax, (float)(h - 1.0)); | ||
587 | + float zmin = max((float)tempZmin, (float)0.0); | ||
588 | + float zmax = min((float)tempZmax, (float)(d - 1.0)); | ||
589 | + | ||
590 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | ||
591 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | ||
592 | + return; | ||
593 | + } | ||
594 | + | ||
595 | + | ||
596 | + | ||
597 | + if (debug) printf("\n\n---------------->> iteration %u\n", i); | ||
598 | + int counter = 0; | ||
599 | + point<float> dEdp(0.0f, 0.0f, 0.0f); // energy gradient wrt p | ||
600 | + point<float> dEdq(0.0f, 0.0f, 0.0f); // energy gradient wrt q | ||
601 | + for (int j = 0; j < n; j++) { | ||
602 | + //sphere single_snake = snakes[blockIdx.x]; // each block is assigned to one snake. all threads in a block are working for that snake | ||
603 | + thread_sample = samples[threadIdx.x * n + j]; | ||
604 | + snake_Engrad_MC_parallel(dEdp, dEdq, counter, snakes[blockIdx.x], I, thread_sample, w, h, d, debug); | ||
605 | + | ||
606 | + } | ||
607 | + /*if (idx == 0) { | ||
608 | + printf("points in the contour for thread0=%d\n", counter); | ||
609 | + printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); | ||
610 | + } | ||
611 | +*/ | ||
612 | + | ||
613 | + //copy the result of each thread in shared memory | ||
614 | + sharedPtr[threadIdx.x * 7 + 0] = dEdp.x; | ||
615 | + sharedPtr[threadIdx.x * 7 + 1] = dEdp.y; | ||
616 | + sharedPtr[threadIdx.x * 7 + 2] = dEdp.z; | ||
617 | + | ||
618 | + sharedPtr[threadIdx.x * 7 + 3] = dEdq.x; | ||
619 | + sharedPtr[threadIdx.x * 7 + 4] = dEdq.y; | ||
620 | + sharedPtr[threadIdx.x * 7 + 5] = dEdq.z; | ||
621 | + | ||
622 | + sharedPtr[threadIdx.x * 7 + 6] = counter; | ||
623 | + | ||
624 | + __syncthreads(); | ||
625 | + | ||
626 | + //combine threads | ||
627 | + dEdp = point<float>(0.0f, 0.0f, 0.0f); // energy gradient wrt p | ||
628 | + dEdq = point<float>(0.0f, 0.0f, 0.0f); | ||
629 | + counter = 0; | ||
630 | + if (threadIdx.x == 0) { | ||
631 | + float R = snakes[blockIdx.x].r(); // radius of outer snake | ||
632 | + float R3 = R * R * R; //calculate R^3 (radius cube) | ||
633 | + point<float> c = snakes[blockIdx.x].c(); // center of snake | ||
634 | + | ||
635 | + for (int i = 0; i < threads; i++) { | ||
636 | + dEdp.x += sharedPtr[i * 7 + 0]; | ||
637 | + dEdp.y += sharedPtr[i * 7 + 1]; | ||
638 | + dEdp.z += sharedPtr[i * 7 + 2]; | ||
639 | + | ||
640 | + dEdq.x += sharedPtr[i * 7 + 3]; | ||
641 | + dEdq.y += sharedPtr[i * 7 + 4]; | ||
642 | + dEdq.z += sharedPtr[i * 7 + 5]; | ||
643 | + | ||
644 | + counter += sharedPtr[i * 7 + 6]; | ||
645 | + | ||
646 | + } | ||
647 | + | ||
648 | + float volume = pi * (((R + 1.0f) * (R + 1.0f) * (zmax - zmin)) - ((1.0f / 3.0f) * (powf((zmax - c.z), 3.0f) + powf((c.z - zmin), 3.0f)))); | ||
649 | + //float volume = (4.0f / 3.0f) * pi * (R + 1.0f)*(R + 1.0f)*(R + 1.0f); //(4.0f / 3.0f) * pi * powf((R + 1), 3.0f); | ||
650 | + dEdp = dEdp * volume / (float)counter; | ||
651 | + dEdq = dEdq * volume / (float)counter; | ||
652 | + | ||
653 | + | ||
654 | + dEdp = dEdp / (8.0f * R3); | ||
655 | + dEdq = dEdq / (8.0f * R3); | ||
656 | + | ||
657 | + //if (idx == 0) | ||
658 | + //printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); | ||
659 | + | ||
660 | + float factor = sqrtf(float(i + 1)); | ||
661 | + snakes[blockIdx.x].update(dEdp, dEdq, dt / factor); | ||
662 | + | ||
663 | + //printf("snakes[blockIdx.x].p.x=%f and snakes[blockIdx.x].p.y=%f \n snakes[blockIdx.x].q.x=%f and snakes[blockIdx.x].q.y=%f\n\n", snakes[blockIdx.x].p.x, snakes[blockIdx.x].p.y, snakes[blockIdx.x].q.x, snakes[blockIdx.x].q.y); | ||
664 | + } | ||
665 | + __syncthreads(); | ||
666 | + | ||
667 | + } | ||
668 | + | ||
669 | + | ||
670 | + | ||
671 | + | ||
672 | + | ||
673 | + | ||
674 | +} | ||
675 | + | ||
676 | +__global__ void kernel_snake_evolve(sphere* snakes, float *I, size_t snakeNum, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { | ||
677 | + | ||
678 | + //__launch_bounds__(1024, 1); | ||
679 | + int idx = blockDim.x * blockIdx.x + threadIdx.x; | ||
680 | + | ||
681 | + if (idx >= snakeNum) // return if the number of threads is more than snakes | ||
682 | + return; | ||
683 | + | ||
684 | + | ||
685 | + if (idx == 0) | ||
686 | + printf("\n\n \t\t=============>>>>we are in the kernel\n\n"); | ||
687 | + point<float> dEdp(0.0, 0.0, 0.0); // energy gradient wrt p | ||
688 | + point<float> dEdq(0.0, 0.0, 0.0); // energy gradient wrt q | ||
689 | + sphere s = snakes[idx]; | ||
690 | + | ||
691 | + for (int i = 0; i < itr; i++) { | ||
692 | + | ||
693 | + snake_Engrad(dEdp, dEdq, s, I, w, h, d, debug); | ||
694 | + if (debug) | ||
695 | + printf("dEdp.x=%f and dEdp.y=%f and dEdp.z=%f and dEdq.x=%f\n", dEdp.x, dEdp.y, dEdp.z, dEdq.x); | ||
696 | + float factor = sqrtf(float(i + 1)); | ||
697 | + s.update(dEdp, dEdq, dt / factor); | ||
698 | + } | ||
699 | + | ||
700 | + snakes[idx] = s; | ||
701 | + | ||
702 | +} | ||
703 | + | ||
704 | + | ||
705 | +// -----------------------------------Energy computaion and compare hypersnakes------------------------------------------------------------------------ | ||
706 | +__host__ __device__ void sum_E(float& E, point<float> c, point<float> s, float R, float f) { | ||
707 | + | ||
708 | + float r = R / cubeRoot2; // radius of inner snake | ||
709 | + float dz2 = (s.z - c.z)*(s.z - c.z); | ||
710 | + float dy2 = (s.y - c.y)*(s.y - c.y); | ||
711 | + float dx2 = (s.x - c.x)*(s.x - c.x); | ||
712 | + float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour | ||
713 | + | ||
714 | + | ||
715 | + if (d < (r - 0.5f*deltar)) // mouth of snake | ||
716 | + E -= f; | ||
717 | + | ||
718 | + else if (d < (r + 0.5f*deltar)) { // throat of snake | ||
719 | + float S = (2.0f / deltar) * (d - r); // weight function value in the given point | ||
720 | + E += (S * f); | ||
721 | + } | ||
722 | + | ||
723 | + else if (d < (R - 0.5f*deltaR)) // coil of snake | ||
724 | + E += f; | ||
725 | + | ||
726 | + else if (d < (R + 0.5f*deltaR)) { // fangs of snake | ||
727 | + float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); | ||
728 | + E += (S * f); | ||
729 | + } | ||
730 | + | ||
731 | +} | ||
732 | +__host__ __device__ void snake_energy(float &energy, sphere snake, float* I, size_t w, size_t h, size_t d) { | ||
733 | + | ||
734 | + float radius = snake.r(); // radius of outer snake | ||
735 | + point<float> c = snake.c(); // center of snake | ||
736 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | ||
737 | + if (radius < threshold) { | ||
738 | + //printf("radius is out of range\n"); | ||
739 | + return; | ||
740 | + } | ||
741 | + | ||
742 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | ||
743 | + float tempXmax = ceil(c.x + radius + 1); | ||
744 | + float tempYmin = floor(c.y - radius - 1); | ||
745 | + float tempYmax = ceil(c.y + radius + 1); | ||
746 | + float tempZmin = floor(c.z - radius - 1); | ||
747 | + float tempZmax = ceil(c.z + radius + 1); | ||
748 | + | ||
749 | + float xmin = max((float)tempXmin, (float) 0.0); //clamp the bounding box to the image edges | ||
750 | + float xmax = min((float)tempXmax, (float)(w - 1)); | ||
751 | + float ymin = max((float)tempYmin, (float)0); | ||
752 | + float ymax = min((float)tempYmax, (float)(h - 1)); | ||
753 | + float zmin = max((float)tempZmin, (float)0); | ||
754 | + float zmax = min((float)tempZmax, (float)(d - 1)); | ||
755 | + | ||
756 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | ||
757 | + //printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | ||
758 | + return; | ||
759 | + } | ||
760 | + | ||
761 | + float E = 0.0f; | ||
762 | + float R = radius; //simplify radius to R | ||
763 | + float R3 = R * R * R; //calculate R^2 (radius squared) | ||
764 | + | ||
765 | + for (unsigned int z = (unsigned int)zmin; z <= (unsigned int)zmax; z++) { // for each section | ||
766 | + for (unsigned int y = (unsigned int)ymin; y <= (unsigned int)ymax; y++) { //for each row of section in the bounding box | ||
767 | + for (unsigned int x = (unsigned int)xmin; x <= (unsigned int)xmax; x++) { //for each pixel p in the row | ||
768 | + | ||
769 | + point<float> s(x, y, z); // a sample inside the contour | ||
770 | + float f; // image value in given position | ||
771 | + int position = (int)((w * h * z) + (x * h + y)); | ||
772 | + if (position < (w * h * d)) { | ||
773 | + f = I[position]; | ||
774 | + if (!f == 0) | ||
775 | + sum_E(E, c, s, R, f); | ||
776 | + } | ||
777 | + | ||
778 | + } | ||
779 | + } | ||
780 | + } | ||
781 | + energy = E / (8 * R3); | ||
782 | +} | ||
783 | + | ||
784 | +//compute energy of snakes | ||
785 | +__global__ void kernel_snake_energy(float* energy, sphere* snakes, float* I, size_t snakeNum, size_t w, size_t h, size_t d) { | ||
786 | + size_t i = blockDim.x * blockIdx.x + threadIdx.x; | ||
787 | + | ||
788 | + if (i >= snakeNum) return; // return if the number of threads is more than snakes | ||
789 | + | ||
790 | + if (i == 0) { | ||
791 | + printf("we are in energy kernel\n"); | ||
792 | + } | ||
793 | + float energy_temp; | ||
794 | + snake_energy(energy_temp, snakes[i], I, w, h, d); | ||
795 | + energy[i] = energy_temp; | ||
796 | + | ||
797 | +} | ||
798 | + | ||
799 | + | ||
800 | +// returns a set of snake indices that meet specific criteria(to be determined and refined by Mahsa) | ||
801 | +std::vector<size_t> DetectValidSnakes_GPU(sphere* snakes, size_t snakeNum, float* I, size_t w, size_t h, size_t d, size_t threads, float energy_th) { | ||
802 | + | ||
803 | + ////calculate energy | ||
804 | + float *gpu_energy; | ||
805 | + HANDLE_ERROR(cudaMalloc(&gpu_energy, snakeNum * sizeof(float))); | ||
806 | + size_t blocks = snakeNum / threads + 1; | ||
807 | + | ||
808 | + //// allocate memory and copy snakes to the device | ||
809 | + sphere* gpu_snakes = new sphere[snakeNum]; | ||
810 | + HANDLE_ERROR(cudaMalloc(&gpu_snakes, snakeNum * sizeof(sphere))); | ||
811 | + HANDLE_ERROR(cudaMemcpy(gpu_snakes, snakes, snakeNum * sizeof(sphere), cudaMemcpyHostToDevice)); | ||
812 | + | ||
813 | + //// allocate memory and copy image to device | ||
814 | + float *gpu_I; | ||
815 | + HANDLE_ERROR(cudaMalloc(&gpu_I, w * h * d * sizeof(float))); | ||
816 | + HANDLE_ERROR(cudaMemcpy(gpu_I, I, w * h * d * sizeof(float), cudaMemcpyHostToDevice)); | ||
817 | + //create an array to store the snake energies' on cpu | ||
818 | + float *energy; | ||
819 | + energy = (float*)malloc(snakeNum * sizeof(float)); | ||
820 | + //memset(energy, 0, snakeNum * sizeof(float)); | ||
821 | + kernel_snake_energy << < (unsigned int)blocks, (unsigned int)threads >> > (gpu_energy, gpu_snakes, gpu_I, snakeNum, w, h, d); | ||
822 | + | ||
823 | + HANDLE_ERROR(cudaMemcpy(energy, gpu_energy, snakeNum * sizeof(float), cudaMemcpyDeviceToHost)); | ||
824 | + //std::ofstream outfile("energy.txt"); //open a file for writing | ||
825 | + //for (size_t i = 0; i < snakeNum; i++) { | ||
826 | + // outfile << energy[i] << std::endl; //output the energy | ||
827 | + //} | ||
828 | + //outfile.close(); | ||
829 | + | ||
830 | + cudaFree(gpu_energy); | ||
831 | + cudaFree(gpu_I); | ||
832 | + cudaFree(gpu_snakes); | ||
833 | + | ||
834 | + | ||
835 | + // compare snakes in possible overlaps | ||
836 | + std::vector<size_t> id; //store indices of snakes which have overlaps with snake i | ||
837 | + std::vector<size_t> idx; //create a vector to store indices of snakes which must be deleted. | ||
838 | + | ||
839 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | ||
840 | + | ||
841 | + for (size_t i = 0; i < snakeNum; i++) { | ||
842 | + if (snakes[i].r() < threshold) | ||
843 | + idx.push_back(i); | ||
844 | + | ||
845 | + if (std::find(idx.begin(), idx.end(), i) == idx.end()) { // check if snake is already deleted | ||
846 | + id.clear(); | ||
847 | + for (size_t j = 0; j < snakeNum; j++) { | ||
848 | + if (j != i) { | ||
849 | + if (snakes[j].c().x > snakes[i].c().x - 2 * snakes[i].r() && snakes[j].c().x < snakes[i].c().x + 2 * snakes[i].r() && snakes[j].c().y > snakes[i].c().y - 2 * snakes[i].r() && snakes[j].c().y < snakes[i].c().y + 2 * snakes[i].r()) { | ||
850 | + if (snakes[j].r() < threshold) | ||
851 | + idx.push_back(j); | ||
852 | + if (std::find(idx.begin(), idx.end(), j) == idx.end()) { | ||
853 | + float centerDistance_x = snakes[i].c().x - snakes[j].c().x; // centers distance of snakes i and j- in x direction | ||
854 | + float centerDistance_y = snakes[i].c().y - snakes[j].c().y; // centers distance of snakes i and j- in y direction | ||
855 | + float centerDistance_z = snakes[i].c().z - snakes[j].c().z; // centers distance of snakes i and j- in z direction | ||
856 | + float centerDistance = sqrt((centerDistance_x * centerDistance_x) + (centerDistance_y * centerDistance_y) + 4 * (centerDistance_z * centerDistance_z)); // euclidean distance bw center snakes i and j | ||
857 | + float maxRadius = max(snakes[i].r(), snakes[j].r()); // maximum of radius of snake i and snake j | ||
858 | + if (centerDistance < (maxRadius / cubeRoot2)) | ||
859 | + id.push_back(j); // store indices of overlapped snakes with snake i | ||
860 | + } | ||
861 | + } | ||
862 | + } | ||
863 | + } | ||
864 | + if (!id.empty()) { | ||
865 | + id.push_back(i); | ||
866 | + float smallest = energy[id[0]]; | ||
867 | + size_t smallest_id = id[0]; // index of snake should be kept | ||
868 | + for (int k = 0; k < id.size(); k++) { // find snake with smallest energy among id vector | ||
869 | + if (energy[id[k]] < smallest) { | ||
870 | + smallest = energy[id[k]]; | ||
871 | + smallest_id = id[k]; | ||
872 | + } | ||
873 | + } | ||
874 | + for (int m = 0; m < id.size(); m++) { | ||
875 | + if (id[m] != smallest_id) // among snakes with overlaps, the one with smallest energy survies. | ||
876 | + idx.push_back(id[m]); // idx stores snakes which should be deleted | ||
877 | + | ||
878 | + } | ||
879 | + } | ||
880 | + } | ||
881 | + | ||
882 | + } | ||
883 | + std::vector<size_t> th_idx; //create a vector to store final indices exclusive idx (indices of snakes with large energy) and the ones with energy higher than threshold | ||
884 | + for (size_t c = 0; c < snakeNum; c++) { | ||
885 | + if (std::find(idx.begin(), idx.end(), c) == idx.end()) { | ||
886 | + if (energy[c] < energy_th) | ||
887 | + th_idx.push_back(c); //if the snake exceeds the energy threshold, store the index | ||
888 | + } | ||
889 | + } | ||
890 | + free(energy); | ||
891 | + return th_idx; //return the indices of valid snakes | ||
892 | +} | ||
893 | + | ||
894 | + | ||
895 | + | ||
896 | + | ||
897 | +///..................................................................main function............................................................................................. | ||
898 | +void advertise() { | ||
899 | + std::cout << "this is Hypersnakuscule implementation" << std::endl; | ||
900 | + std::cout << "reference papre for 2D is (Snakuscules by Philippe Thévenaz and Michael Unser)" << std::endl; | ||
901 | + std::cout << "implemented by Mahsa Lotfollahi" << std::endl << std::endl; | ||
902 | + std::cout << "Usage: snakuscules input_image [options]" << std::endl; | ||
903 | +} | ||
904 | + | ||
905 | +int main(int argc, char* argv[]) { | ||
906 | + stim::arglist args; // create an argument list | ||
907 | + | ||
908 | + args.add("help", "prints this help"); | ||
909 | + args.add("iter", "number of iteration for evolving contour", "400", "positive value"); | ||
910 | + args.add("radius", "initial radius", "15", "real positive value"); | ||
911 | + args.add("size", "specify size of image in 3 dimension", "", "[w h d]"); | ||
912 | + args.add("dt", "gradient descend stepsize", "10", "real positive value"); | ||
913 | + args.add("cuda", "specify the device used for CUDA calculations", "0", "device ID, -1 for CPU"); | ||
914 | + args.add("mc", "specify using Monte Carlo sampling", "", "MC=1 for Monte Carlo sampling and 0 for original integration"); | ||
915 | + args.add("single", "specify a single contour to evolve", "", "[x y z r]"); | ||
916 | + args.add("filter", "specify filter type for preprocessing like log", "", "name of filter"); | ||
917 | + args.add("energy_th", "specify energy threshold, snakes with energies less than that survive", "-1", "small negative value"); | ||
918 | + args.add("debug", "output debugging information"); | ||
919 | + args.parse(argc, argv); | ||
920 | + | ||
921 | + if (args["help"].is_set()) { //output help if requested by the user | ||
922 | + advertise(); | ||
923 | + std::cout << args.str() << std::endl; | ||
924 | + return 1; | ||
925 | + } | ||
926 | + | ||
927 | + if (args.nargs() < 1) { | ||
928 | + std::cout << "ERROR: no input file specified" << std::endl; | ||
929 | + return 1; | ||
930 | + } | ||
931 | + | ||
932 | + std::string output_file = "output.txt"; //set the default output file name to "output.txt" | ||
933 | + if (args.nargs() >= 2) output_file = args.arg(1); //if an output is specified by the user, use that instead | ||
934 | + | ||
935 | + int itr = args["iter"].as_int(); //get input parameters and set variables | ||
936 | + float radius = (float)args["radius"].as_float(); | ||
937 | + if (!args["size"]) { //get size of image | ||
938 | + std::cout << "you should specify size of image in 3 dimension" << std::endl; | ||
939 | + return 1; | ||
940 | + } | ||
941 | + | ||
942 | + int w = args["size"].as_int(0); | ||
943 | + int h = args["size"].as_int(1); | ||
944 | + int d = args["size"].as_int(2); | ||
945 | + | ||
946 | + float energy_th = (float)args["energy_th"].as_float(); | ||
947 | + float dt = (float)args["dt"].as_float(); | ||
948 | + int cuda_device = args["cuda"].as_int(); //get the desired CUDA device | ||
949 | + bool MC = false; | ||
950 | + int sampleNum; | ||
951 | + if (args["mc"]) MC = true; | ||
952 | + sampleNum = 20000; | ||
953 | + | ||
954 | + if (args["mc"].nargs() > 0) | ||
955 | + sampleNum = args["mc"].as_int(); | ||
956 | + | ||
957 | + bool swarm = true; | ||
958 | + if (args["single"]) swarm = false; //if the user specifies a single snake parameter, don't use the swarm algorithm | ||
959 | + | ||
960 | + bool Filter = false; | ||
961 | + std::string filter_name; | ||
962 | + int kernel_size; // kernel size | ||
963 | + if (args["filter"]) { | ||
964 | + Filter = true; | ||
965 | + filter_name = "log"; | ||
966 | + kernel_size = 5; | ||
967 | + if (args["filter"].nargs() > 0) | ||
968 | + filter_name = args["filter"].as_string(0); | ||
969 | + if (args["filter"].nargs() > 1) | ||
970 | + kernel_size = args["filter"].as_int(1); | ||
971 | + | ||
972 | + } | ||
973 | + | ||
974 | + | ||
975 | + //allocate memory in cpu for input image | ||
976 | + size_t bytes = w * h * d * sizeof(float); //number of bytes needed to store image | ||
977 | + float* I = (float*)malloc(bytes); | ||
978 | + // load input image+ | ||
979 | + std::ifstream inputfile(args.arg(0), std::ios::in | std::ios::binary); | ||
980 | + if (!inputfile) { | ||
981 | + std::cout << "cannot open specified input file" << std::endl; | ||
982 | + return; | ||
983 | + } | ||
984 | + inputfile.read((char*)I, bytes); | ||
985 | + inputfile.close(); | ||
986 | + size_t N = w * h * d; // number of pixels (# array elements) | ||
987 | + float* I_original = (float*)malloc(bytes); // keep the original image without pre-processing | ||
988 | + memcpy(I_original, I, bytes); | ||
989 | + stretch(I_original, N, 0, 255); | ||
990 | + stretch(I, N, 0, 255); | ||
991 | + | ||
992 | + | ||
993 | + if (Filter) { // compute log of image | ||
994 | + if (filter_name == "log") { | ||
995 | + for (int i = 0; i < N; i++) | ||
996 | + I[i] = log(I[i] + 1); | ||
997 | + } | ||
998 | + if (filter_name == "median") { // apply median filter on each section | ||
999 | + float* I_2D = (float*)malloc(w*h * sizeof(float)); // allocate memory to 2_D sections | ||
1000 | + | ||
1001 | + cv::Mat t_I_2D_mat(w, h, CV_32F); //image is stored column major but open cv read and write row major---matrix is transposed | ||
1002 | + cv::Mat I2d_blurred(w, h, CV_32F); //allocate memory for blured image | ||
1003 | + | ||
1004 | + for (int zz = 0; zz < d; zz++) { | ||
1005 | + memcpy(I_2D, I_original + (zz * w *h), w * h * sizeof(float)); // copy each section of 3D image(I) in an array | ||
1006 | + cv::Mat I_2D_mat(h, w, CV_32F, I_2D); // create a Mat to copy data to that and be able to use open cv median filter | ||
1007 | + cv::transpose(I_2D_mat, t_I_2D_mat); | ||
1008 | + cv::medianBlur(t_I_2D_mat, I2d_blurred, kernel_size); // apply median filter | ||
1009 | + cv::transpose(I2d_blurred, I_2D_mat); // transpose to be transfered to array | ||
1010 | + I_2D = (float *)I_2D_mat.data; | ||
1011 | + memcpy(I + (zz * w *h), I_2D, w * h * sizeof(float)); | ||
1012 | + | ||
1013 | + } | ||
1014 | + free(I_2D); | ||
1015 | + t_I_2D_mat.release(); | ||
1016 | + I2d_blurred.release(); | ||
1017 | + } | ||
1018 | + | ||
1019 | + } | ||
1020 | + | ||
1021 | + stretch(I, N, 0, 255); | ||
1022 | + size_t snakeNum; | ||
1023 | + if (swarm) { | ||
1024 | + int D = int(sqrt(1.5)*radius); //distance between hypersnakes | ||
1025 | + snakeNum = w * h * d / (D * D * D ); // approximate number of hypersnakes lying on image | ||
1026 | + } | ||
1027 | + else | ||
1028 | + snakeNum = 1; | ||
1029 | + sphere* snakes = new sphere[snakeNum]; //create an array of spheres.(one for each snake) | ||
1030 | + memset(snakes, 0, snakeNum * sizeof(sphere)); | ||
1031 | + | ||
1032 | + if (swarm) { | ||
1033 | + initialSwarmSnake(snakes, snakeNum, w, h, d, radius); | ||
1034 | + std::cout << "number of snakes=" << snakeNum << std::endl; | ||
1035 | + } | ||
1036 | + | ||
1037 | + | ||
1038 | + else { | ||
1039 | + point<float> center; | ||
1040 | + center.x = (float)args["single"].as_float(0); | ||
1041 | + center.y = (float)args["single"].as_float(1); | ||
1042 | + center.z = (float)args["single"].as_float(2); | ||
1043 | + //cout << "number of args" << args["single"].nargs() << endl; | ||
1044 | + if (args["single"].nargs() >= 4) | ||
1045 | + radius = (float)args["single"].as_float(3); | ||
1046 | + | ||
1047 | + snakes[0].p.x = center.x - radius; // define p and q | ||
1048 | + snakes[0].q.x = center.x + radius; | ||
1049 | + snakes[0].p.y = snakes[0].q.y = center.y; | ||
1050 | + snakes[0].p.z = snakes[0].q.z = center.z; | ||
1051 | + | ||
1052 | + } | ||
1053 | + | ||
1054 | + point<float>* samples = (point<float>*)malloc(sampleNum * sizeof(point<float>)); | ||
1055 | + memset(samples, 0, sampleNum * sizeof(point<float>)); | ||
1056 | + if (MC) { | ||
1057 | + if (args["debug"]) { | ||
1058 | + randGenerator_cube(samples, sampleNum, true); | ||
1059 | + | ||
1060 | + } | ||
1061 | + else randGenerator_cube(samples, sampleNum); | ||
1062 | + } | ||
1063 | + | ||
1064 | + std::cout << "energy_th=" << energy_th << std::endl; | ||
1065 | + std::cout << "initial radius=" << radius << std::endl; | ||
1066 | + //-----------------------------------GPU implementation---------------------------------------------------------------------------- | ||
1067 | + if (cuda_device >= 0) { | ||
1068 | + | ||
1069 | + cudaDeviceProp prop; | ||
1070 | + HANDLE_ERROR(cudaGetDeviceProperties(&prop, 0)); | ||
1071 | + size_t threads = (size_t)prop.maxThreadsPerBlock; | ||
1072 | + size_t blocks = snakeNum; | ||
1073 | + size_t nbyte_shared = 7 * 4 * threads; | ||
1074 | + // allocate memory to snakes and copy them to device | ||
1075 | + sphere* gpu_snakes; | ||
1076 | + HANDLE_ERROR(cudaMalloc(&gpu_snakes, snakeNum * sizeof(sphere))); | ||
1077 | + HANDLE_ERROR(cudaMemcpy(gpu_snakes, snakes, snakeNum * sizeof(sphere), cudaMemcpyHostToDevice)); | ||
1078 | + | ||
1079 | + //allocate memory to image in device and copy from cpu to device | ||
1080 | + float* gpu_I; | ||
1081 | + HANDLE_ERROR(cudaMalloc(&gpu_I, bytes)); | ||
1082 | + HANDLE_ERROR(cudaMemcpy(gpu_I, I, bytes, cudaMemcpyHostToDevice)); | ||
1083 | + stim::gpuStartTimer(); | ||
1084 | + if (MC) { | ||
1085 | + // allocate memory to random samples on device | ||
1086 | + point<float>* G_samples; | ||
1087 | + HANDLE_ERROR(cudaMalloc(&G_samples, sampleNum * sizeof(point<float>))); | ||
1088 | + HANDLE_ERROR(cudaMemset(G_samples, 0, sampleNum * sizeof(point<float>))); | ||
1089 | + HANDLE_ERROR(cudaMemcpy(G_samples, samples, sampleNum * sizeof(point<float>), cudaMemcpyHostToDevice)); | ||
1090 | + | ||
1091 | + if (args["debug"]) | ||
1092 | + kernel_snake_evolve_MC_parallel << < blocks, threads, nbyte_shared >> > (gpu_snakes, gpu_I, G_samples, sampleNum, snakeNum, threads, w, h, d, itr, dt, true); | ||
1093 | + else | ||
1094 | + kernel_snake_evolve_MC_parallel << < blocks, threads, nbyte_shared >> > (gpu_snakes, gpu_I, G_samples, sampleNum, snakeNum, threads, w, h, d, itr, dt); | ||
1095 | + | ||
1096 | + } | ||
1097 | + else { | ||
1098 | + if (args["debug"]) | ||
1099 | + kernel_snake_evolve << <blocks, threads >> > (gpu_snakes, gpu_I, snakeNum, w, h, d, itr, dt, true); | ||
1100 | + else | ||
1101 | + kernel_snake_evolve << <blocks, threads >> > (gpu_snakes, gpu_I, snakeNum, w, h, d, itr, dt); | ||
1102 | + } | ||
1103 | + | ||
1104 | + std::cout << "gpuruntime = " << stim::gpuStopTimer() << " ms" << std::endl; | ||
1105 | + HANDLE_ERROR(cudaMemcpy(snakes, gpu_snakes, snakeNum * sizeof(sphere), cudaMemcpyDeviceToHost)); | ||
1106 | + cudaFree(gpu_I); | ||
1107 | + cudaFree(gpu_snakes); | ||
1108 | + | ||
1109 | + std::vector<size_t> idx = DetectValidSnakes_GPU(snakes, snakeNum, I_original, w, h, d, threads, energy_th); | ||
1110 | + SaveSnakes(output_file, snakes, idx); | ||
1111 | + | ||
1112 | + | ||
1113 | + | ||
1114 | + } | ||
1115 | + | ||
1116 | + //--------------------------------------------CPU implementation---------------------------------------------------------------------------- | ||
1117 | + else { | ||
1118 | + unsigned int start = time(NULL); | ||
1119 | + std::cout << "it is running on CPU" << std::endl; | ||
1120 | + for (int i = 0; i < snakeNum; i++) { | ||
1121 | + //printf("\n\n---------------->> iteration %u", numItr); | ||
1122 | + if (args["debug"]) | ||
1123 | + snake_evolve(snakes[i], I, w, h, d, dt, itr, samples, sampleNum, MC, true); | ||
1124 | + else | ||
1125 | + snake_evolve(snakes[i], I, w, h, d, dt, itr, samples, sampleNum, MC); | ||
1126 | + | ||
1127 | + std::cout << "Output Snakes------------------" << std::endl; | ||
1128 | + std::cout << snakes[i].str() << std::endl; | ||
1129 | + | ||
1130 | + } | ||
1131 | + unsigned int end = time(NULL); | ||
1132 | + std::cout << "cpuRunTime=" << end - start << "s" << std::endl; | ||
1133 | + | ||
1134 | + | ||
1135 | + std::vector<size_t> idx = DetectValidSnakes_GPU(snakes, snakeNum, I_original, w, h, d, 512, energy_th); | ||
1136 | + SaveSnakes(output_file, snakes, idx); | ||
1137 | + | ||
1138 | + } | ||
1139 | + | ||
1140 | + free(I); | ||
1141 | + free(I_original); | ||
1142 | + if (args["debug"]) { | ||
1143 | + std::vector<size_t> idx(snakeNum); | ||
1144 | + std::iota(idx.begin(), idx.end(), 0); | ||
1145 | + //SaveSnakes(debug_file, snakes, idx); //saves the snakes to an output file | ||
1146 | + std::cout << "Output Snakes------------------" << std::endl; | ||
1147 | + for (size_t i = 0; i < idx.size(); i++) { //for each snake | ||
1148 | + std::cout << snakes[idx[i]].str() << std::endl; | ||
1149 | + } | ||
1150 | + } | ||
1151 | + | ||
1152 | + std::vector<size_t> idx(snakeNum); | ||
1153 | + std::iota(idx.begin(), idx.end(), 0); | ||
1154 | + SaveSnakes("no-delete.txt", snakes, idx); //saves the snakes to an output file | ||
1155 | + return 0; | ||
1156 | + | ||
1157 | +} |