Commit f6438346ee2bdf15cdc3f057f58447935dfe13af
1 parent
547f6b98
initial commit after PLoS submission
Showing
3 changed files
with
1279 additions
and
0 deletions
Show diff stats
1 | +#ifndef SNAKUSCULE_H | |
2 | +#define SNAKUSCULE_H | |
3 | + | |
4 | +template <typename T > | |
5 | +class point { | |
6 | +public: | |
7 | + | |
8 | + T x; | |
9 | + T y; | |
10 | + T z; | |
11 | + | |
12 | + //default constructor | |
13 | + CUDA_CALLABLE point() { | |
14 | + x = 0; | |
15 | + y = 0; | |
16 | + z = 0; | |
17 | + }; | |
18 | + //constructor definition | |
19 | + CUDA_CALLABLE point(T a, T b, T c) { | |
20 | + x = a; | |
21 | + y = b; | |
22 | + z = c; | |
23 | + } | |
24 | + | |
25 | + CUDA_CALLABLE point <T> operator= (const point<T> &rhs) { | |
26 | + this->x = rhs.x; | |
27 | + this->y = rhs.y; | |
28 | + this->z = rhs.z; | |
29 | + return (*this); | |
30 | + } | |
31 | + | |
32 | + CUDA_CALLABLE point <T> operator+ (const point<T> rhs) { | |
33 | + point<T> temp; | |
34 | + temp.x = this->x + rhs.x; | |
35 | + temp.y = this->y + rhs.y; | |
36 | + temp.z = this->z + rhs.z; | |
37 | + return temp; | |
38 | + | |
39 | + } | |
40 | + | |
41 | + CUDA_CALLABLE point<T> operator- (const point<T> rhs) { | |
42 | + point<T> temp; | |
43 | + temp.x = this->x - rhs.x; | |
44 | + temp.y = this->y - rhs.y; | |
45 | + temp.z = this->z - rhs.z; | |
46 | + return temp; | |
47 | + | |
48 | + } | |
49 | + CUDA_CALLABLE point<T> operator*(const float rhs) { | |
50 | + return point(rhs * this->x, rhs * this->y, rhs * this->z ); | |
51 | + } | |
52 | + | |
53 | + | |
54 | + CUDA_CALLABLE point<T> operator/ (const float rhs) { | |
55 | + return point(this->x / rhs , this->y / rhs, this->z/ rhs ); | |
56 | + | |
57 | + } | |
58 | +}; | |
59 | + | |
60 | + | |
61 | +class sphere { | |
62 | + | |
63 | +public: | |
64 | + | |
65 | + point<float> p; | |
66 | + point<float> q; | |
67 | + //default constructor | |
68 | + CUDA_CALLABLE sphere() { | |
69 | + p.x = 0; | |
70 | + p.y = 0; | |
71 | + p.z = 0; | |
72 | + q.x = 0; | |
73 | + q.y = 0; | |
74 | + q.z = 0; | |
75 | + }; | |
76 | + | |
77 | + //CUDA_CALLABLE point<float> p() { return point<float>(center.x - radius, center.y); } | |
78 | + //CUDA_CALLABLE point<float> q() { return point<float>(center.x + radius, center.y); } | |
79 | + //CUDA_CALLABLE point<float> c() { return center; } | |
80 | + CUDA_CALLABLE point<float> c() { | |
81 | + point<float> center; | |
82 | + point<float> sum; // sum of two points to get center | |
83 | + sum = p + q; | |
84 | + center.x = 0.5f * sum.x; | |
85 | + center.y = 0.5f * sum.y; | |
86 | + center.z = 0.5f * sum.z; | |
87 | + return center; } | |
88 | + | |
89 | + //CUDA_CALLABLE float r() { return radius; } | |
90 | + CUDA_CALLABLE float r() { | |
91 | + float radius; | |
92 | + point <float> d; //distance | |
93 | + d = p - q; | |
94 | + radius = 0.5f* sqrt((d.x * d.x) + (d.y * d.y) + (d.z * d.z)); | |
95 | + return radius; } | |
96 | + | |
97 | + CUDA_CALLABLE void update(point<float> energyGradp , point<float> energyGradq , float dt){ | |
98 | + p = p - (energyGradp * dt); | |
99 | + q = q - (energyGradq * dt); | |
100 | + } | |
101 | + | |
102 | + CUDA_UNCALLABLE std::string str() { | |
103 | + std::stringstream ss; | |
104 | + ss << "q = (" << q.x << ", " << q.y << ", " << q.z << ")" << std::endl; | |
105 | + ss << "p = (" << p.x << ", " << p.y << ", " << p.z << ")" << std::endl; | |
106 | + point<float> center = c(); | |
107 | + ss << "c = (" << center.x << ", " << center.y << ", " << center.z << ")" << std::endl; | |
108 | + ss << "r = " << r() << std::endl; | |
109 | + return ss.str(); | |
110 | + } | |
111 | +}; | |
112 | + | |
113 | +#endif | |
0 | 114 | \ No newline at end of file | ... | ... |
1 | +#include <iostream> | |
2 | +#include<string> | |
3 | +#include<cuda.h> | |
4 | +#include <cuda_runtime.h> | |
5 | +#include "device_launch_parameters.h" | |
6 | +#include <fstream> | |
7 | +#include <curand.h> | |
8 | +#include <curand_kernel.h> | |
9 | +#include <time.h> | |
10 | +#include <chrono> | |
11 | +#include <stdio.h> | |
12 | +#include <math.h> | |
13 | +#include "opencv2/core/core.hpp" | |
14 | +#include <opencv2/imgproc/imgproc.hpp> | |
15 | +#include <opencv2/opencv.hpp> | |
16 | +#include <opencv2/highgui/highgui_c.h> | |
17 | + | |
18 | +#define USING_OPENCV | |
19 | +#include<stim/image/image.h> | |
20 | +#include <stim/cuda/cudatools/callable.h> | |
21 | +#include <stim/cuda/cudatools/error.h> | |
22 | +#include <stim/cuda/cudatools/timer.h> | |
23 | +#include <stim/math/constants.h> | |
24 | +#include "stim/parser/arguments.h" | |
25 | +#include<random> | |
26 | +#include <numeric> // std::iota | |
27 | +//#include<stim/math/random.h> | |
28 | +#include "hypersnakuscule.h" | |
29 | +//#include "median2.cuh" | |
30 | + | |
31 | +#define deltaR 2.0f | |
32 | +#define deltar 1.5874f // deltar=deltaR/cubeRoot2 | |
33 | +#define cubeRoot2 1.2599f | |
34 | +#define pi 3.14159f | |
35 | +//#define Energy_th -3 | |
36 | +//----------------------------------------------functions-------------------------------------------------------------- | |
37 | +void stretch(float* I, size_t size, int low, int high) { | |
38 | + //size: number of image pixel | |
39 | + float max_val = I[0]; | |
40 | + float min_val = I[0]; | |
41 | + for (int n = 0; n < size; n++) { | |
42 | + if (I[n] > max_val) { | |
43 | + max_val = I[n]; | |
44 | + } | |
45 | + if (I[n] < min_val) { | |
46 | + min_val = I[n]; | |
47 | + } | |
48 | + } | |
49 | + float range = max_val - min_val; | |
50 | + float desired_range = (float)high - (float)low; | |
51 | + for (size_t n = 0; n < size; n++) { //for each element in the image | |
52 | + I[n] = desired_range * (I[n] - min_val) / range + low; | |
53 | + } | |
54 | + | |
55 | +} | |
56 | + | |
57 | +/// random generator function: generate random numbers in a sphere with radius 1 in the center (0, 0, 0) | |
58 | +void randGenerator(point<float>* r, int sampleNum, bool debug = false) { | |
59 | + | |
60 | + for (size_t i = 0; i < sampleNum; i++) { | |
61 | + double rn = (double)rand() / (double)(RAND_MAX); | |
62 | + double theta = (double)rand() / (double)(RAND_MAX)* stim::TAU; | |
63 | + double cosphi = 1.0 - 2.0 * ((double)rand() / (double)(RAND_MAX)); | |
64 | + double phi = std::acos(cosphi); | |
65 | + //double phi = std::acos(2.0 * v - 1.0); | |
66 | + //double phi = (double)rand() / (double)(RAND_MAX)* stim::PI; | |
67 | + //std::cout << "rn=" << rn << "\t theta=" << theta << "\tphi=" << phi << std::endl; | |
68 | + r[i].x = (float)(std::cbrt(rn) * cos(theta) * sin(phi)); | |
69 | + r[i].y = (float)(std::cbrt(rn) * sin(theta) * sin(phi)); | |
70 | + r[i].z = (float)(std::cbrt(rn) * cos(phi)); | |
71 | + | |
72 | + | |
73 | + } | |
74 | + if (debug) { | |
75 | + std::ofstream outfile("randomSamples.txt"); //open a file for writing | |
76 | + for (size_t i = 0; i < sampleNum; i++) { | |
77 | + outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius | |
78 | + } | |
79 | + outfile.close(); | |
80 | + } | |
81 | + | |
82 | +} | |
83 | +///create random numbers in a cube and delete the one outside the sphere | |
84 | +void randGenerator_cube(point<float>* r, int sampleNum, bool debug = false) { | |
85 | + size_t counter = 0; | |
86 | + while (counter < sampleNum) { | |
87 | + double x = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; | |
88 | + double y = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; | |
89 | + double z = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; | |
90 | + double d = sqrt((x * x) + (y * y) + (z * z)); | |
91 | + | |
92 | + if (d < 1.1) { | |
93 | + r[counter].x = (float)x; | |
94 | + r[counter].y = (float)y; | |
95 | + r[counter].z = (float)z; | |
96 | + counter++; | |
97 | + } | |
98 | + } | |
99 | + | |
100 | + if (debug) { | |
101 | + std::ofstream outfile("randomSamples.txt"); //open a file for writing | |
102 | + for (size_t i = 0; i < sampleNum; i++) { | |
103 | + outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius | |
104 | + } | |
105 | + outfile.close(); | |
106 | + } | |
107 | + | |
108 | + | |
109 | +} | |
110 | + | |
111 | +/// random generator function: generate random numbers in a sphere with radius 1 in the center (0 , 0, 0) | |
112 | +//void randGenerator1(point<float>* r, int sampleNum, bool debug = false) { | |
113 | +// for (int i = 0; i < sampleNum; i++) { | |
114 | +// std::default_random_engine generator1(100 + i); | |
115 | +// std::uniform_real_distribution<double> distribution1(0.0f, 1.0f); | |
116 | +// double rn = distribution1(generator1); | |
117 | +// std::default_random_engine generator2(50.0f + 5.0f * i); | |
118 | +// std::uniform_real_distribution<double> distribution2(0.0, 2.0 * stim::PI); | |
119 | +// double theta = distribution2(generator2); | |
120 | +// std::default_random_engine generator3(30.0f + 3.0f * i); | |
121 | +// std::uniform_real_distribution<double> distribution3(0, stim::PI); | |
122 | +// double phi = distribution3(generator3); | |
123 | +// r[i].x = (float)(std::cbrt(rn) * cos(theta) * sin(phi)); | |
124 | +// r[i].y = (float)(std::cbrt(rn) * sin(theta) * sin(phi)); | |
125 | +// r[i].z = (float)(std::cbrt(rn) * cos(phi)); | |
126 | +// } | |
127 | +// if (debug) { | |
128 | +// std::ofstream outfile("randomSamples.txt"); //open a file for writing | |
129 | +// for (size_t i = 0; i < sampleNum; i++) { | |
130 | +// outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius | |
131 | +// } | |
132 | +// outfile.close(); | |
133 | +// } | |
134 | +//} | |
135 | + | |
136 | + | |
137 | + | |
138 | +/// saves the snakes specified by the idx array | |
139 | +void SaveSnakes(std::string filename, sphere* snakes, std::vector<size_t> idx) { | |
140 | + std::ofstream outfile(filename); //open a file for writing | |
141 | + for (size_t i = 0; i < idx.size(); i++) { | |
142 | + point<float> center = snakes[idx[i]].c(); //get the centerpoint of the snake | |
143 | + outfile << center.x << " " << center.y << " " << center.z << " " << snakes[idx[i]].r() << std::endl; //output the center and radius | |
144 | + } | |
145 | + outfile.close(); | |
146 | + | |
147 | + | |
148 | + | |
149 | +} | |
150 | +void initialSwarmSnake(sphere * snakes, size_t &counter, size_t w, size_t h, size_t d, float radius) { | |
151 | + std::cout << "W=" << w << "\t h=" << h << "\td=" << d << std::endl; | |
152 | + int D = int(sqrt(1.5)*radius); //distance between hypersnakes | |
153 | + //int D = 30; //for phantom | |
154 | + int k1 = 0; | |
155 | + counter = 0; | |
156 | + float startpoint = 0; | |
157 | + for (float k = radius; k < (d - radius); k += D ) {// for phantom D | |
158 | + for (float j = radius; j < (h - radius); j += D) { | |
159 | + k1++; | |
160 | + if (k1 % 2 == 1) | |
161 | + startpoint = 0; //for phantom D | |
162 | + else | |
163 | + startpoint = (float)D / 2.0f;//for phantom D | |
164 | + for (float i = startpoint; i < (w - radius); i += D) { | |
165 | + point<float> temp_p(i, j, k); | |
166 | + snakes[counter].p = temp_p; | |
167 | + point<float> temp_q(i + (2 * radius), j, k); | |
168 | + snakes[counter].q = temp_q; | |
169 | + counter = counter + 1; | |
170 | + } | |
171 | + } | |
172 | + } | |
173 | + std::ofstream outfile("initials.txt"); | |
174 | + for (int i = 0; i < counter; i++) { | |
175 | + outfile << snakes[i].c().x << " " << snakes[i].c().y << " " << snakes[i].c().z << " " << snakes[i].r() << std::endl; | |
176 | + } | |
177 | + outfile.close(); | |
178 | +} | |
179 | + | |
180 | +__host__ __device__ void sum_dE(point<float>& dEdp, point<float>& dEdq, point<float> c, point<float> s, float R, float f) { | |
181 | + | |
182 | + float r = R / cubeRoot2; // radius of inner snake | |
183 | + float dz2 = (s.z - c.z)*(s.z - c.z); | |
184 | + float dy2 = (s.y - c.y)*(s.y - c.y); | |
185 | + float dx2 = (s.x - c.x)*(s.x - c.x); | |
186 | + float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour | |
187 | + | |
188 | + float gx = 2.0f * R; // gx= snake.q.x - snake.p.x ; | |
189 | + float dx = (c.x - s.x) / d; | |
190 | + float dy = (c.y - s.y) / d; | |
191 | + float dz = (c.z - s.z) / d; | |
192 | + | |
193 | + if (d < (r - 0.5f*deltar)) { // mouth of snake | |
194 | + dEdp.x -= 3.0f / gx * f; //calculate the growth/shrinking term for the snake | |
195 | + dEdq.x += 3.0f / gx * f; | |
196 | + } | |
197 | + else if (d < (r + 0.5f*deltar)) { // throat of snake | |
198 | + float S = (2.0f / deltar) * (d - r); // weight function value in the given point | |
199 | + dEdp.x += f * ((3.0f / gx) * S + (dx + (1.0f / cubeRoot2)) / deltar); | |
200 | + dEdp.y += f *(dy / deltar); | |
201 | + dEdp.z += f *(dz / deltar); | |
202 | + | |
203 | + dEdq.x += f * ((-3.0f / gx) * S + (dx - (1.0f / cubeRoot2)) / deltar); | |
204 | + dEdq.y = dEdp.y; | |
205 | + dEdq.z = dEdp.z; | |
206 | + | |
207 | + } | |
208 | + else if (d < (R - 0.5f*deltaR)) { // coil of snake | |
209 | + dEdp.x += 3.0f / gx * f; | |
210 | + dEdq.x -= 3.0f / gx * f; | |
211 | + } | |
212 | + | |
213 | + else if (d < (R + 0.5f*deltaR)) { // fangs of snake | |
214 | + float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); | |
215 | + | |
216 | + dEdp.x += f * ((3.0f * S / gx) - (0.5f * (dx + 1.0f) / deltaR)); | |
217 | + dEdp.y -= 0.5f *(dy / deltaR) * f; | |
218 | + dEdp.z -= 0.5f *(dz / deltaR) * f; | |
219 | + | |
220 | + dEdq.x += f *((-3.0f * S / gx) - (0.5f * (dx - 1.0f) / deltaR)); | |
221 | + dEdq.y = dEdp.y; | |
222 | + dEdq.z = dEdp.z; | |
223 | + } | |
224 | + | |
225 | +} | |
226 | + | |
227 | +//sum_dE in debug mode | |
228 | +__host__ __device__ void sum_dE_debug(point<float>& dEdp, point<float>& dEdq, int &counter, point<float> c, point<float> s, float R, float f) { | |
229 | + | |
230 | + float r = R / cubeRoot2; // radius of inner snake | |
231 | + float dz2 = (s.z - c.z)*(s.z - c.z); | |
232 | + float dy2 = (s.y - c.y)*(s.y - c.y); | |
233 | + float dx2 = (s.x - c.x)*(s.x - c.x); | |
234 | + float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour | |
235 | + | |
236 | + float gx = 2.0f * R; // gx= snake.q.x - snake.p.x ; | |
237 | + float dx = (c.x - s.x) / d; | |
238 | + float dy = (c.y - s.y) / d; | |
239 | + float dz = (c.z - s.z) / d; | |
240 | + int trivial = 0; //to test if any point is out of the contour | |
241 | + if (d < (r - 0.5f*deltar)) { // mouth of snake | |
242 | + counter++; | |
243 | + dEdp.x -= 3.0f / gx * f; //calculate the growth/shrinking term for the snake | |
244 | + dEdq.x += 3.0f / gx * f; | |
245 | + } | |
246 | + else if (d < (r + 0.5f*deltar)) { // throat of snake | |
247 | + counter++; | |
248 | + float S = (2.0f / deltar) * (d - r); // weight function value in the given point | |
249 | + dEdp.x += f * ((3.0f / gx) * S + (dx + (1.0f / cubeRoot2)) / deltar); | |
250 | + dEdp.y += f *(dy / deltar); | |
251 | + dEdp.z += f *(dz / deltar); | |
252 | + | |
253 | + dEdq.x += f * ((-3.0f / gx) * S + (dx - (1.0f / cubeRoot2)) / deltar); | |
254 | + dEdq.y = dEdp.y; | |
255 | + dEdq.z = dEdp.z; | |
256 | + | |
257 | + } | |
258 | + else if (d < (R - 0.5f*deltaR)) { // coil of snake | |
259 | + counter++; | |
260 | + dEdp.x += 3.0f / gx * f; | |
261 | + dEdq.x -= 3.0f / gx * f; | |
262 | + } | |
263 | + | |
264 | + else if (d < (R + 0.5f*deltaR)) { // fangs of snake | |
265 | + counter++; | |
266 | + float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); | |
267 | + | |
268 | + dEdp.x += f * ((3.0f * S / gx) - (0.5f * (dx + 1.0f) / deltaR)); | |
269 | + dEdp.y -= 0.5f *(dy / deltaR) * f; | |
270 | + dEdp.z -= 0.5f *(dz / deltaR) * f; | |
271 | + | |
272 | + dEdq.x += f *((-3.0f * S / gx) - (0.5f * (dx - 1.0f) / deltaR)); | |
273 | + dEdq.y = dEdp.y; | |
274 | + dEdq.z = dEdp.z; | |
275 | + } | |
276 | + else | |
277 | + trivial++; | |
278 | + | |
279 | +} | |
280 | + | |
281 | +// this function calculate gradient of energy with respect to two point p and q | |
282 | +__host__ __device__ void snake_Engrad(point<float>&dEdp, point<float>&dEdq, sphere snake, float* I, size_t w, size_t h, size_t d, bool debug = false) { | |
283 | + | |
284 | + float radius = snake.r(); // radius of outer snake | |
285 | + point<float> c = snake.c(); // center of snake | |
286 | + dEdp = point<float>(0, 0, 0); //initialize dEdp and dEdq to zero | |
287 | + dEdq = point<float>(0, 0, 0); | |
288 | + | |
289 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | |
290 | + if (radius < threshold) { | |
291 | + if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); | |
292 | + return; | |
293 | + } | |
294 | + | |
295 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | |
296 | + float tempXmax = ceil(c.x + radius + 1); | |
297 | + float tempYmin = floor(c.y - radius - 1); | |
298 | + float tempYmax = ceil(c.y + radius + 1); | |
299 | + float tempZmin = floor(c.z - radius - 1); | |
300 | + float tempZmax = ceil(c.z + radius + 1); | |
301 | + | |
302 | + float xmin = max((float)tempXmin, (float) 0.0); //clamp the bounding box to the image edges | |
303 | + float xmax = min((float)tempXmax, (float)(w - 1)); | |
304 | + float ymin = max((float)tempYmin, (float)0); | |
305 | + float ymax = min((float)tempYmax, (float)(h - 1)); | |
306 | + float zmin = max((float)tempZmin, (float)0); | |
307 | + float zmax = min((float)tempZmax, (float)(d - 1)); | |
308 | + | |
309 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | |
310 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | |
311 | + return; | |
312 | + } | |
313 | + | |
314 | + float R = radius; //simplify radius to R | |
315 | + float R3 = R * R * R; //calculate R^2 (radius squared) | |
316 | + for (unsigned int z = (unsigned int)zmin; z <= (unsigned int)zmax; z++) { // for each section | |
317 | + for (unsigned int x = (unsigned int)xmin; x <= (unsigned int)xmax; x++) { //for each column of section in the bounding box | |
318 | + for (unsigned int y = (unsigned int)ymin; y <= (unsigned int)ymax; y++) { //for each pixel p in the column | |
319 | + | |
320 | + point<float> s(x, y, z); // a sample inside the contour | |
321 | + float f; // image value in given position | |
322 | + | |
323 | + int position = (int)((w * h * z) + (x * h + y)); | |
324 | + if (position < (w * h * d)) { | |
325 | + f = I[position]; | |
326 | + | |
327 | + | |
328 | + if (!f == 0) | |
329 | + sum_dE(dEdp, dEdq, c, s, R, f); | |
330 | + | |
331 | + } | |
332 | + | |
333 | + } | |
334 | + } | |
335 | + } | |
336 | + | |
337 | + dEdp = dEdp / (8 * R3); | |
338 | + dEdq = dEdq / (8 * R3); | |
339 | + | |
340 | + | |
341 | +} | |
342 | + | |
343 | +//// this function calculate gradient of energy with respect to two point p and q using Monte Carlo | |
344 | +__host__ __device__ void snake_Engrad_MC(point<float>&dEdp, point<float>&dEdq, sphere snake, float* I, point<float>* samples, size_t sampleNum, size_t w, size_t h, size_t d, bool debug = false) { | |
345 | + | |
346 | + float radius = snake.r(); // radius of outer snake | |
347 | + point<float> c = snake.c(); // center of snake | |
348 | + dEdp = point<float>(0, 0, 0); //initialize dEdp and dEdq to zero | |
349 | + dEdq = point<float>(0, 0, 0); | |
350 | + | |
351 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | |
352 | + if (radius < threshold) { | |
353 | + if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); | |
354 | + return; | |
355 | + } | |
356 | + | |
357 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | |
358 | + float tempXmax = ceil(c.x + radius + 1); | |
359 | + float tempYmin = floor(c.y - radius - 1); | |
360 | + float tempYmax = ceil(c.y + radius + 1); | |
361 | + float tempZmin = floor(c.z - radius - 1); | |
362 | + float tempZmax = ceil(c.z + radius + 1); | |
363 | + | |
364 | + float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges | |
365 | + float xmax = min((float)tempXmax, (float)(w - 1.0)); | |
366 | + float ymin = max((float)tempYmin, (float)0.0); | |
367 | + float ymax = min((float)tempYmax, (float)(h - 1.0)); | |
368 | + float zmin = max((float)tempZmin, (float)0.0); | |
369 | + float zmax = min((float)tempZmax, (float)(d - 1.0)); | |
370 | + | |
371 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | |
372 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | |
373 | + return; | |
374 | + } | |
375 | + | |
376 | + | |
377 | + | |
378 | + | |
379 | + float R = radius; //simplify radius to R | |
380 | + float R3 = R * R * R; //calculate R^3 (radius cube) | |
381 | + int counter = 0; | |
382 | + float sumf = 0.0; | |
383 | + | |
384 | + for (int i = 0; i < sampleNum; i++) { | |
385 | + float sx = samples[i].x; | |
386 | + float sy = samples[i].y; | |
387 | + float sz = samples[i].z; | |
388 | + | |
389 | + float x = (R + 1.0f) * sx + c.x; | |
390 | + float y = (R + 1.0f) * sy + c.y; | |
391 | + float z = (R + 1.0f) * sz + c.z; | |
392 | + | |
393 | + int xi = (int)round(x); | |
394 | + int yi = (int)round(y); | |
395 | + int zi = (int)round(z); | |
396 | + | |
397 | + point<float> s(xi, yi, zi); // a sample inside the contour | |
398 | + float f; // image value in given position | |
399 | + | |
400 | + int position = (int)((w * h * zi) + (xi * h + yi)); | |
401 | + if (position < (w * h * d) && xi >= xmin && xi <= xmax && yi >= ymin && yi <= ymax && zi >= zmin && zi <= zmax) { // && d1< (R + 1) | |
402 | + counter++; | |
403 | + f = (float)I[position]; | |
404 | + sumf += f; | |
405 | + //if (debug) | |
406 | + sum_dE_debug(dEdp, dEdq, counter, c, s, R, f); | |
407 | + //sum_dE(dEdp, dEdq, c, s, R, f); | |
408 | + } | |
409 | + } | |
410 | + | |
411 | + | |
412 | + | |
413 | + | |
414 | + | |
415 | + float volume = pi * (((R + 1.0f) * (R + 1.0f) * (zmax - zmin)) - ((1.0f / 3.0f) * (powf((zmax - c.z), 3.0f) + powf((c.z - zmin), 3.0f)))); | |
416 | + | |
417 | + //if (debug) printf("volume_sphere=%f and volume=%f\n", volume_sphere, volume); | |
418 | + //if (debug) printf("sampleNum=%u and (float)counter= %f \n", sampleNum, (float)counter); | |
419 | + | |
420 | + dEdp = dEdp * volume / (float)counter; | |
421 | + | |
422 | + dEdq = dEdq * volume / (float)counter; | |
423 | + | |
424 | + | |
425 | + dEdp = dEdp / (8.0f * R3); | |
426 | + dEdq = dEdq / (8.0f * R3); | |
427 | + | |
428 | +} | |
429 | + | |
430 | + | |
431 | +// this function calculate gradient of energy with respect to two point p and q using Monte Carlo | |
432 | +__host__ __device__ void snake_Engrad_MC_parallel(point<float>&dEdp, point<float>&dEdq, int &counter, sphere snake, float* I, point<float> sample, size_t w, size_t h, size_t d, bool debug = false) { | |
433 | + | |
434 | + float radius = snake.r(); // radius of outer snake | |
435 | + point<float> c = snake.c(); // center of snake | |
436 | + | |
437 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | |
438 | + float tempXmax = ceil(c.x + radius + 1); | |
439 | + float tempYmin = floor(c.y - radius - 1); | |
440 | + float tempYmax = ceil(c.y + radius + 1); | |
441 | + float tempZmin = floor(c.z - radius - 1); | |
442 | + float tempZmax = ceil(c.z + radius + 1); | |
443 | + | |
444 | + float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges | |
445 | + float xmax = min((float)tempXmax, (float)(w - 1.0)); | |
446 | + float ymin = max((float)tempYmin, (float)0.0); | |
447 | + float ymax = min((float)tempYmax, (float)(h - 1.0)); | |
448 | + float zmin = max((float)tempZmin, (float)0.0); | |
449 | + float zmax = min((float)tempZmax, (float)(d - 1.0)); | |
450 | + | |
451 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | |
452 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | |
453 | + return; | |
454 | + } | |
455 | + | |
456 | + | |
457 | + | |
458 | + | |
459 | + float R = radius; //simplify radius to R | |
460 | + float sumf = 0.0; | |
461 | + | |
462 | + //for (int i = 0; i < sampleNum; i++) { | |
463 | + float sx = sample.x; | |
464 | + float sy = sample.y; | |
465 | + float sz = sample.z; | |
466 | + | |
467 | + float x = (R + 1.0f) * sx + c.x; | |
468 | + float y = (R + 1.0f) * sy + c.y; | |
469 | + float z = (R + 1.0f) * sz + c.z; | |
470 | + | |
471 | + int xi = (int)round(x); | |
472 | + int yi = (int)round(y); | |
473 | + int zi = (int)round(z); | |
474 | + | |
475 | + point<float> s(xi, yi, zi); // a sample inside the contour | |
476 | + float f; // image value in given position | |
477 | + | |
478 | + int position = (int)((w * h * zi) + (xi * h + yi)); | |
479 | + if (position < (w * h * d) && xi >= xmin && xi <= xmax && yi >= ymin && yi <= ymax && zi >= zmin && zi <= zmax) { // && d1< (R + 1) | |
480 | + | |
481 | + f = (float)I[position]; | |
482 | + sumf += f; | |
483 | + sum_dE_debug(dEdp, dEdq, counter, c, s, R, f); | |
484 | + //printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); | |
485 | + | |
486 | + } | |
487 | + | |
488 | + | |
489 | +} | |
490 | + | |
491 | + | |
492 | + | |
493 | + | |
494 | + | |
495 | +void snake_evolve(sphere &snakes, float* I, int w, int h, int d, float dt, int itr, point<float>* samples, size_t sampleNum, bool MC, bool debug = false) { | |
496 | + point<float> dEdp(0.0, 0.0, 0.0); // energy gradient wrt p | |
497 | + point<float> dEdq(0.0, 0.0, 0.0); // energy gradient wrt q | |
498 | + for (int numItr = 0; numItr < itr; numItr++) { | |
499 | + if (MC) | |
500 | + snake_Engrad_MC(dEdp, dEdq, snakes, I, samples, sampleNum, w, h, d, debug); | |
501 | + else | |
502 | + snake_Engrad(dEdp, dEdq, snakes, I, w, h, d, debug); | |
503 | + | |
504 | + if (debug) printf("dEdp.x=%f \t dEdq.x=%f \n dEdp.y=%f \t dEdp.z=%f \n\n", dEdp.x, dEdq.x, dEdp.y, dEdp.z); | |
505 | + float factor = sqrt(float(numItr + 1)); // step size in gradient descent decreasing by number of iterations | |
506 | + | |
507 | + snakes.update(dEdp, dEdq, dt / factor); | |
508 | + | |
509 | + } | |
510 | + | |
511 | + | |
512 | +} | |
513 | + | |
514 | + | |
515 | + | |
516 | +//-------------------------------------------Kernels-------------------------------------------------------------------------- | |
517 | + | |
518 | +//__global__ void kernel_snake_evolve_MC(sphere * snakes, float* I, point<float>* samples, int sampleNum, size_t snakeNum, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { | |
519 | +// | |
520 | +// int idx = blockDim.x * blockIdx.x + threadIdx.x; | |
521 | +// | |
522 | +// if (idx >= snakeNum) // return if the number of threads is more than snakes | |
523 | +// return; | |
524 | +// | |
525 | +// | |
526 | +// if (idx == 0) | |
527 | +// printf("\n\n \t\t=============>>>>we are in the MC kernel\n\n"); | |
528 | +// point<float> dEdp(0.0, 0.0, 0.0); // energy gradient wrt p | |
529 | +// point<float> dEdq(0.0, 0.0, 0.0); // energy gradient wrt q | |
530 | +// sphere s = snakes[idx]; | |
531 | +// | |
532 | +// for (int i = 0; i < itr; i++) { | |
533 | +// if (debug) printf("\n\n---------------->> iteration %u\n", i); | |
534 | +// snake_Engrad_MC(dEdp, dEdq, s, I, samples, sampleNum, w, h, d, debug); | |
535 | +// float factor = sqrtf(float(i + 1)); | |
536 | +// if (debug) | |
537 | +// printf("dEdp.x=%f and dEdp.y=%f and dEdp.z=%f and dEdq.x=%f\n", dEdp.x, dEdp.y, dEdp.z, dEdq.x); | |
538 | +// s.update(dEdp, dEdq, dt / factor); | |
539 | +// | |
540 | +// } | |
541 | +// | |
542 | +// snakes[idx] = s; | |
543 | +// | |
544 | +// | |
545 | +//} | |
546 | + | |
547 | +__global__ void kernel_snake_evolve_MC_parallel(sphere * snakes, float* I, point<float>* samples, int sampleNum, size_t snakeNum, size_t threads, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { | |
548 | + | |
549 | + extern __shared__ float sharedPtr[]; // define shared memory to save result of each thread there | |
550 | + int n = floorf(sampleNum / threads) ; //# given sample points to one thread | |
551 | + int idx = blockDim.x * blockIdx.x + threadIdx.x; | |
552 | + | |
553 | + if (idx >= (snakeNum * threads)) // return if the number of threads is more than snakes | |
554 | + return; | |
555 | + | |
556 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); //snake cannot be smaller than threshold | |
557 | + /*if (idx == 0) { | |
558 | + printf("\n\n \t\t=============>>>>we are in the MC kernel\n\n"); | |
559 | + printf("number of samples per thread=%d\n", n); | |
560 | + printf("idx=%d\n", (snakeNum * threads)); | |
561 | + printf("blockdim.x=%d and threads=%u \n", blockDim.x, threads); | |
562 | + }*/ | |
563 | + | |
564 | + point<float> thread_sample; //sample goes to the thread | |
565 | + //sum_counter is real number of samples inside the contour averaged. some points of samples may round out of contour and did not contribute in averaging | |
566 | + for (int i = 0; i < itr; i++) { | |
567 | + //check the snake, if it pass the threshold | |
568 | + | |
569 | + if (snakes[blockIdx.x].r() < threshold) { | |
570 | + if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); | |
571 | + return; | |
572 | + } | |
573 | + | |
574 | + float radius = snakes[blockIdx.x].r(); // radius of outer snake | |
575 | + point<float> c = snakes[blockIdx.x].c(); // center of snake | |
576 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | |
577 | + float tempXmax = ceil(c.x + radius + 1); | |
578 | + float tempYmin = floor(c.y - radius - 1); | |
579 | + float tempYmax = ceil(c.y + radius + 1); | |
580 | + float tempZmin = floor(c.z - radius - 1); | |
581 | + float tempZmax = ceil(c.z + radius + 1); | |
582 | + | |
583 | + float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges | |
584 | + float xmax = min((float)tempXmax, (float)(w - 1.0)); | |
585 | + float ymin = max((float)tempYmin, (float)0.0); | |
586 | + float ymax = min((float)tempYmax, (float)(h - 1.0)); | |
587 | + float zmin = max((float)tempZmin, (float)0.0); | |
588 | + float zmax = min((float)tempZmax, (float)(d - 1.0)); | |
589 | + | |
590 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | |
591 | + if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | |
592 | + return; | |
593 | + } | |
594 | + | |
595 | + | |
596 | + | |
597 | + if (debug) printf("\n\n---------------->> iteration %u\n", i); | |
598 | + int counter = 0; | |
599 | + point<float> dEdp(0.0f, 0.0f, 0.0f); // energy gradient wrt p | |
600 | + point<float> dEdq(0.0f, 0.0f, 0.0f); // energy gradient wrt q | |
601 | + for (int j = 0; j < n; j++) { | |
602 | + //sphere single_snake = snakes[blockIdx.x]; // each block is assigned to one snake. all threads in a block are working for that snake | |
603 | + thread_sample = samples[threadIdx.x * n + j]; | |
604 | + snake_Engrad_MC_parallel(dEdp, dEdq, counter, snakes[blockIdx.x], I, thread_sample, w, h, d, debug); | |
605 | + | |
606 | + } | |
607 | + /*if (idx == 0) { | |
608 | + printf("points in the contour for thread0=%d\n", counter); | |
609 | + printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); | |
610 | + } | |
611 | +*/ | |
612 | + | |
613 | + //copy the result of each thread in shared memory | |
614 | + sharedPtr[threadIdx.x * 7 + 0] = dEdp.x; | |
615 | + sharedPtr[threadIdx.x * 7 + 1] = dEdp.y; | |
616 | + sharedPtr[threadIdx.x * 7 + 2] = dEdp.z; | |
617 | + | |
618 | + sharedPtr[threadIdx.x * 7 + 3] = dEdq.x; | |
619 | + sharedPtr[threadIdx.x * 7 + 4] = dEdq.y; | |
620 | + sharedPtr[threadIdx.x * 7 + 5] = dEdq.z; | |
621 | + | |
622 | + sharedPtr[threadIdx.x * 7 + 6] = counter; | |
623 | + | |
624 | + __syncthreads(); | |
625 | + | |
626 | + //combine threads | |
627 | + dEdp = point<float>(0.0f, 0.0f, 0.0f); // energy gradient wrt p | |
628 | + dEdq = point<float>(0.0f, 0.0f, 0.0f); | |
629 | + counter = 0; | |
630 | + if (threadIdx.x == 0) { | |
631 | + float R = snakes[blockIdx.x].r(); // radius of outer snake | |
632 | + float R3 = R * R * R; //calculate R^3 (radius cube) | |
633 | + point<float> c = snakes[blockIdx.x].c(); // center of snake | |
634 | + | |
635 | + for (int i = 0; i < threads; i++) { | |
636 | + dEdp.x += sharedPtr[i * 7 + 0]; | |
637 | + dEdp.y += sharedPtr[i * 7 + 1]; | |
638 | + dEdp.z += sharedPtr[i * 7 + 2]; | |
639 | + | |
640 | + dEdq.x += sharedPtr[i * 7 + 3]; | |
641 | + dEdq.y += sharedPtr[i * 7 + 4]; | |
642 | + dEdq.z += sharedPtr[i * 7 + 5]; | |
643 | + | |
644 | + counter += sharedPtr[i * 7 + 6]; | |
645 | + | |
646 | + } | |
647 | + | |
648 | + float volume = pi * (((R + 1.0f) * (R + 1.0f) * (zmax - zmin)) - ((1.0f / 3.0f) * (powf((zmax - c.z), 3.0f) + powf((c.z - zmin), 3.0f)))); | |
649 | + //float volume = (4.0f / 3.0f) * pi * (R + 1.0f)*(R + 1.0f)*(R + 1.0f); //(4.0f / 3.0f) * pi * powf((R + 1), 3.0f); | |
650 | + dEdp = dEdp * volume / (float)counter; | |
651 | + dEdq = dEdq * volume / (float)counter; | |
652 | + | |
653 | + | |
654 | + dEdp = dEdp / (8.0f * R3); | |
655 | + dEdq = dEdq / (8.0f * R3); | |
656 | + | |
657 | + //if (idx == 0) | |
658 | + //printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); | |
659 | + | |
660 | + float factor = sqrtf(float(i + 1)); | |
661 | + snakes[blockIdx.x].update(dEdp, dEdq, dt / factor); | |
662 | + | |
663 | + //printf("snakes[blockIdx.x].p.x=%f and snakes[blockIdx.x].p.y=%f \n snakes[blockIdx.x].q.x=%f and snakes[blockIdx.x].q.y=%f\n\n", snakes[blockIdx.x].p.x, snakes[blockIdx.x].p.y, snakes[blockIdx.x].q.x, snakes[blockIdx.x].q.y); | |
664 | + } | |
665 | + __syncthreads(); | |
666 | + | |
667 | + } | |
668 | + | |
669 | + | |
670 | + | |
671 | + | |
672 | + | |
673 | + | |
674 | +} | |
675 | + | |
676 | +__global__ void kernel_snake_evolve(sphere* snakes, float *I, size_t snakeNum, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { | |
677 | + | |
678 | + //__launch_bounds__(1024, 1); | |
679 | + int idx = blockDim.x * blockIdx.x + threadIdx.x; | |
680 | + | |
681 | + if (idx >= snakeNum) // return if the number of threads is more than snakes | |
682 | + return; | |
683 | + | |
684 | + | |
685 | + if (idx == 0) | |
686 | + printf("\n\n \t\t=============>>>>we are in the kernel\n\n"); | |
687 | + point<float> dEdp(0.0, 0.0, 0.0); // energy gradient wrt p | |
688 | + point<float> dEdq(0.0, 0.0, 0.0); // energy gradient wrt q | |
689 | + sphere s = snakes[idx]; | |
690 | + | |
691 | + for (int i = 0; i < itr; i++) { | |
692 | + | |
693 | + snake_Engrad(dEdp, dEdq, s, I, w, h, d, debug); | |
694 | + if (debug) | |
695 | + printf("dEdp.x=%f and dEdp.y=%f and dEdp.z=%f and dEdq.x=%f\n", dEdp.x, dEdp.y, dEdp.z, dEdq.x); | |
696 | + float factor = sqrtf(float(i + 1)); | |
697 | + s.update(dEdp, dEdq, dt / factor); | |
698 | + } | |
699 | + | |
700 | + snakes[idx] = s; | |
701 | + | |
702 | +} | |
703 | + | |
704 | + | |
705 | +// -----------------------------------Energy computaion and compare hypersnakes------------------------------------------------------------------------ | |
706 | +__host__ __device__ void sum_E(float& E, point<float> c, point<float> s, float R, float f) { | |
707 | + | |
708 | + float r = R / cubeRoot2; // radius of inner snake | |
709 | + float dz2 = (s.z - c.z)*(s.z - c.z); | |
710 | + float dy2 = (s.y - c.y)*(s.y - c.y); | |
711 | + float dx2 = (s.x - c.x)*(s.x - c.x); | |
712 | + float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour | |
713 | + | |
714 | + | |
715 | + if (d < (r - 0.5f*deltar)) // mouth of snake | |
716 | + E -= f; | |
717 | + | |
718 | + else if (d < (r + 0.5f*deltar)) { // throat of snake | |
719 | + float S = (2.0f / deltar) * (d - r); // weight function value in the given point | |
720 | + E += (S * f); | |
721 | + } | |
722 | + | |
723 | + else if (d < (R - 0.5f*deltaR)) // coil of snake | |
724 | + E += f; | |
725 | + | |
726 | + else if (d < (R + 0.5f*deltaR)) { // fangs of snake | |
727 | + float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); | |
728 | + E += (S * f); | |
729 | + } | |
730 | + | |
731 | +} | |
732 | +__host__ __device__ void snake_energy(float &energy, sphere snake, float* I, size_t w, size_t h, size_t d) { | |
733 | + | |
734 | + float radius = snake.r(); // radius of outer snake | |
735 | + point<float> c = snake.c(); // center of snake | |
736 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | |
737 | + if (radius < threshold) { | |
738 | + //printf("radius is out of range\n"); | |
739 | + return; | |
740 | + } | |
741 | + | |
742 | + float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere | |
743 | + float tempXmax = ceil(c.x + radius + 1); | |
744 | + float tempYmin = floor(c.y - radius - 1); | |
745 | + float tempYmax = ceil(c.y + radius + 1); | |
746 | + float tempZmin = floor(c.z - radius - 1); | |
747 | + float tempZmax = ceil(c.z + radius + 1); | |
748 | + | |
749 | + float xmin = max((float)tempXmin, (float) 0.0); //clamp the bounding box to the image edges | |
750 | + float xmax = min((float)tempXmax, (float)(w - 1)); | |
751 | + float ymin = max((float)tempYmin, (float)0); | |
752 | + float ymax = min((float)tempYmax, (float)(h - 1)); | |
753 | + float zmin = max((float)tempZmin, (float)0); | |
754 | + float zmax = min((float)tempZmax, (float)(d - 1)); | |
755 | + | |
756 | + if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { | |
757 | + //printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); | |
758 | + return; | |
759 | + } | |
760 | + | |
761 | + float E = 0.0f; | |
762 | + float R = radius; //simplify radius to R | |
763 | + float R3 = R * R * R; //calculate R^2 (radius squared) | |
764 | + | |
765 | + for (unsigned int z = (unsigned int)zmin; z <= (unsigned int)zmax; z++) { // for each section | |
766 | + for (unsigned int y = (unsigned int)ymin; y <= (unsigned int)ymax; y++) { //for each row of section in the bounding box | |
767 | + for (unsigned int x = (unsigned int)xmin; x <= (unsigned int)xmax; x++) { //for each pixel p in the row | |
768 | + | |
769 | + point<float> s(x, y, z); // a sample inside the contour | |
770 | + float f; // image value in given position | |
771 | + int position = (int)((w * h * z) + (x * h + y)); | |
772 | + if (position < (w * h * d)) { | |
773 | + f = I[position]; | |
774 | + if (!f == 0) | |
775 | + sum_E(E, c, s, R, f); | |
776 | + } | |
777 | + | |
778 | + } | |
779 | + } | |
780 | + } | |
781 | + energy = E / (8 * R3); | |
782 | +} | |
783 | + | |
784 | +//compute energy of snakes | |
785 | +__global__ void kernel_snake_energy(float* energy, sphere* snakes, float* I, size_t snakeNum, size_t w, size_t h, size_t d) { | |
786 | + size_t i = blockDim.x * blockIdx.x + threadIdx.x; | |
787 | + | |
788 | + if (i >= snakeNum) return; // return if the number of threads is more than snakes | |
789 | + | |
790 | + if (i == 0) { | |
791 | + printf("we are in energy kernel\n"); | |
792 | + } | |
793 | + float energy_temp; | |
794 | + snake_energy(energy_temp, snakes[i], I, w, h, d); | |
795 | + energy[i] = energy_temp; | |
796 | + | |
797 | +} | |
798 | + | |
799 | + | |
800 | +// returns a set of snake indices that meet specific criteria(to be determined and refined by Mahsa) | |
801 | +std::vector<size_t> DetectValidSnakes_GPU(sphere* snakes, size_t snakeNum, float* I, size_t w, size_t h, size_t d, size_t threads, float energy_th) { | |
802 | + | |
803 | + ////calculate energy | |
804 | + float *gpu_energy; | |
805 | + HANDLE_ERROR(cudaMalloc(&gpu_energy, snakeNum * sizeof(float))); | |
806 | + size_t blocks = snakeNum / threads + 1; | |
807 | + | |
808 | + //// allocate memory and copy snakes to the device | |
809 | + sphere* gpu_snakes = new sphere[snakeNum]; | |
810 | + HANDLE_ERROR(cudaMalloc(&gpu_snakes, snakeNum * sizeof(sphere))); | |
811 | + HANDLE_ERROR(cudaMemcpy(gpu_snakes, snakes, snakeNum * sizeof(sphere), cudaMemcpyHostToDevice)); | |
812 | + | |
813 | + //// allocate memory and copy image to device | |
814 | + float *gpu_I; | |
815 | + HANDLE_ERROR(cudaMalloc(&gpu_I, w * h * d * sizeof(float))); | |
816 | + HANDLE_ERROR(cudaMemcpy(gpu_I, I, w * h * d * sizeof(float), cudaMemcpyHostToDevice)); | |
817 | + //create an array to store the snake energies' on cpu | |
818 | + float *energy; | |
819 | + energy = (float*)malloc(snakeNum * sizeof(float)); | |
820 | + //memset(energy, 0, snakeNum * sizeof(float)); | |
821 | + kernel_snake_energy << < (unsigned int)blocks, (unsigned int)threads >> > (gpu_energy, gpu_snakes, gpu_I, snakeNum, w, h, d); | |
822 | + | |
823 | + HANDLE_ERROR(cudaMemcpy(energy, gpu_energy, snakeNum * sizeof(float), cudaMemcpyDeviceToHost)); | |
824 | + //std::ofstream outfile("energy.txt"); //open a file for writing | |
825 | + //for (size_t i = 0; i < snakeNum; i++) { | |
826 | + // outfile << energy[i] << std::endl; //output the energy | |
827 | + //} | |
828 | + //outfile.close(); | |
829 | + | |
830 | + cudaFree(gpu_energy); | |
831 | + cudaFree(gpu_I); | |
832 | + cudaFree(gpu_snakes); | |
833 | + | |
834 | + | |
835 | + // compare snakes in possible overlaps | |
836 | + std::vector<size_t> id; //store indices of snakes which have overlaps with snake i | |
837 | + std::vector<size_t> idx; //create a vector to store indices of snakes which must be deleted. | |
838 | + | |
839 | + float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); | |
840 | + | |
841 | + for (size_t i = 0; i < snakeNum; i++) { | |
842 | + if (snakes[i].r() < threshold) | |
843 | + idx.push_back(i); | |
844 | + | |
845 | + if (std::find(idx.begin(), idx.end(), i) == idx.end()) { // check if snake is already deleted | |
846 | + id.clear(); | |
847 | + for (size_t j = 0; j < snakeNum; j++) { | |
848 | + if (j != i) { | |
849 | + if (snakes[j].c().x > snakes[i].c().x - 2 * snakes[i].r() && snakes[j].c().x < snakes[i].c().x + 2 * snakes[i].r() && snakes[j].c().y > snakes[i].c().y - 2 * snakes[i].r() && snakes[j].c().y < snakes[i].c().y + 2 * snakes[i].r()) { | |
850 | + if (snakes[j].r() < threshold) | |
851 | + idx.push_back(j); | |
852 | + if (std::find(idx.begin(), idx.end(), j) == idx.end()) { | |
853 | + float centerDistance_x = snakes[i].c().x - snakes[j].c().x; // centers distance of snakes i and j- in x direction | |
854 | + float centerDistance_y = snakes[i].c().y - snakes[j].c().y; // centers distance of snakes i and j- in y direction | |
855 | + float centerDistance_z = snakes[i].c().z - snakes[j].c().z; // centers distance of snakes i and j- in z direction | |
856 | + float centerDistance = sqrt((centerDistance_x * centerDistance_x) + (centerDistance_y * centerDistance_y) + 4 * (centerDistance_z * centerDistance_z)); // euclidean distance bw center snakes i and j | |
857 | + float maxRadius = max(snakes[i].r(), snakes[j].r()); // maximum of radius of snake i and snake j | |
858 | + if (centerDistance < (maxRadius / cubeRoot2)) | |
859 | + id.push_back(j); // store indices of overlapped snakes with snake i | |
860 | + } | |
861 | + } | |
862 | + } | |
863 | + } | |
864 | + if (!id.empty()) { | |
865 | + id.push_back(i); | |
866 | + float smallest = energy[id[0]]; | |
867 | + size_t smallest_id = id[0]; // index of snake should be kept | |
868 | + for (int k = 0; k < id.size(); k++) { // find snake with smallest energy among id vector | |
869 | + if (energy[id[k]] < smallest) { | |
870 | + smallest = energy[id[k]]; | |
871 | + smallest_id = id[k]; | |
872 | + } | |
873 | + } | |
874 | + for (int m = 0; m < id.size(); m++) { | |
875 | + if (id[m] != smallest_id) // among snakes with overlaps, the one with smallest energy survies. | |
876 | + idx.push_back(id[m]); // idx stores snakes which should be deleted | |
877 | + | |
878 | + } | |
879 | + } | |
880 | + } | |
881 | + | |
882 | + } | |
883 | + std::vector<size_t> th_idx; //create a vector to store final indices exclusive idx (indices of snakes with large energy) and the ones with energy higher than threshold | |
884 | + for (size_t c = 0; c < snakeNum; c++) { | |
885 | + if (std::find(idx.begin(), idx.end(), c) == idx.end()) { | |
886 | + if (energy[c] < energy_th) | |
887 | + th_idx.push_back(c); //if the snake exceeds the energy threshold, store the index | |
888 | + } | |
889 | + } | |
890 | + free(energy); | |
891 | + return th_idx; //return the indices of valid snakes | |
892 | +} | |
893 | + | |
894 | + | |
895 | + | |
896 | + | |
897 | +///..................................................................main function............................................................................................. | |
898 | +void advertise() { | |
899 | + std::cout << "this is Hypersnakuscule implementation" << std::endl; | |
900 | + std::cout << "reference papre for 2D is (Snakuscules by Philippe Thévenaz and Michael Unser)" << std::endl; | |
901 | + std::cout << "implemented by Mahsa Lotfollahi" << std::endl << std::endl; | |
902 | + std::cout << "Usage: snakuscules input_image [options]" << std::endl; | |
903 | +} | |
904 | + | |
905 | +int main(int argc, char* argv[]) { | |
906 | + stim::arglist args; // create an argument list | |
907 | + | |
908 | + args.add("help", "prints this help"); | |
909 | + args.add("iter", "number of iteration for evolving contour", "400", "positive value"); | |
910 | + args.add("radius", "initial radius", "15", "real positive value"); | |
911 | + args.add("size", "specify size of image in 3 dimension", "", "[w h d]"); | |
912 | + args.add("dt", "gradient descend stepsize", "10", "real positive value"); | |
913 | + args.add("cuda", "specify the device used for CUDA calculations", "0", "device ID, -1 for CPU"); | |
914 | + args.add("mc", "specify using Monte Carlo sampling", "", "MC=1 for Monte Carlo sampling and 0 for original integration"); | |
915 | + args.add("single", "specify a single contour to evolve", "", "[x y z r]"); | |
916 | + args.add("filter", "specify filter type for preprocessing like log", "", "name of filter"); | |
917 | + args.add("energy_th", "specify energy threshold, snakes with energies less than that survive", "-1", "small negative value"); | |
918 | + args.add("debug", "output debugging information"); | |
919 | + args.parse(argc, argv); | |
920 | + | |
921 | + if (args["help"].is_set()) { //output help if requested by the user | |
922 | + advertise(); | |
923 | + std::cout << args.str() << std::endl; | |
924 | + return 1; | |
925 | + } | |
926 | + | |
927 | + if (args.nargs() < 1) { | |
928 | + std::cout << "ERROR: no input file specified" << std::endl; | |
929 | + return 1; | |
930 | + } | |
931 | + | |
932 | + std::string output_file = "output.txt"; //set the default output file name to "output.txt" | |
933 | + if (args.nargs() >= 2) output_file = args.arg(1); //if an output is specified by the user, use that instead | |
934 | + | |
935 | + int itr = args["iter"].as_int(); //get input parameters and set variables | |
936 | + float radius = (float)args["radius"].as_float(); | |
937 | + if (!args["size"]) { //get size of image | |
938 | + std::cout << "you should specify size of image in 3 dimension" << std::endl; | |
939 | + return 1; | |
940 | + } | |
941 | + | |
942 | + int w = args["size"].as_int(0); | |
943 | + int h = args["size"].as_int(1); | |
944 | + int d = args["size"].as_int(2); | |
945 | + | |
946 | + float energy_th = (float)args["energy_th"].as_float(); | |
947 | + float dt = (float)args["dt"].as_float(); | |
948 | + int cuda_device = args["cuda"].as_int(); //get the desired CUDA device | |
949 | + bool MC = false; | |
950 | + int sampleNum; | |
951 | + if (args["mc"]) MC = true; | |
952 | + sampleNum = 20000; | |
953 | + | |
954 | + if (args["mc"].nargs() > 0) | |
955 | + sampleNum = args["mc"].as_int(); | |
956 | + | |
957 | + bool swarm = true; | |
958 | + if (args["single"]) swarm = false; //if the user specifies a single snake parameter, don't use the swarm algorithm | |
959 | + | |
960 | + bool Filter = false; | |
961 | + std::string filter_name; | |
962 | + int kernel_size; // kernel size | |
963 | + if (args["filter"]) { | |
964 | + Filter = true; | |
965 | + filter_name = "log"; | |
966 | + kernel_size = 5; | |
967 | + if (args["filter"].nargs() > 0) | |
968 | + filter_name = args["filter"].as_string(0); | |
969 | + if (args["filter"].nargs() > 1) | |
970 | + kernel_size = args["filter"].as_int(1); | |
971 | + | |
972 | + } | |
973 | + | |
974 | + | |
975 | + //allocate memory in cpu for input image | |
976 | + size_t bytes = w * h * d * sizeof(float); //number of bytes needed to store image | |
977 | + float* I = (float*)malloc(bytes); | |
978 | + // load input image+ | |
979 | + std::ifstream inputfile(args.arg(0), std::ios::in | std::ios::binary); | |
980 | + if (!inputfile) { | |
981 | + std::cout << "cannot open specified input file" << std::endl; | |
982 | + return; | |
983 | + } | |
984 | + inputfile.read((char*)I, bytes); | |
985 | + inputfile.close(); | |
986 | + size_t N = w * h * d; // number of pixels (# array elements) | |
987 | + float* I_original = (float*)malloc(bytes); // keep the original image without pre-processing | |
988 | + memcpy(I_original, I, bytes); | |
989 | + stretch(I_original, N, 0, 255); | |
990 | + stretch(I, N, 0, 255); | |
991 | + | |
992 | + | |
993 | + if (Filter) { // compute log of image | |
994 | + if (filter_name == "log") { | |
995 | + for (int i = 0; i < N; i++) | |
996 | + I[i] = log(I[i] + 1); | |
997 | + } | |
998 | + if (filter_name == "median") { // apply median filter on each section | |
999 | + float* I_2D = (float*)malloc(w*h * sizeof(float)); // allocate memory to 2_D sections | |
1000 | + | |
1001 | + cv::Mat t_I_2D_mat(w, h, CV_32F); //image is stored column major but open cv read and write row major---matrix is transposed | |
1002 | + cv::Mat I2d_blurred(w, h, CV_32F); //allocate memory for blured image | |
1003 | + | |
1004 | + for (int zz = 0; zz < d; zz++) { | |
1005 | + memcpy(I_2D, I_original + (zz * w *h), w * h * sizeof(float)); // copy each section of 3D image(I) in an array | |
1006 | + cv::Mat I_2D_mat(h, w, CV_32F, I_2D); // create a Mat to copy data to that and be able to use open cv median filter | |
1007 | + cv::transpose(I_2D_mat, t_I_2D_mat); | |
1008 | + cv::medianBlur(t_I_2D_mat, I2d_blurred, kernel_size); // apply median filter | |
1009 | + cv::transpose(I2d_blurred, I_2D_mat); // transpose to be transfered to array | |
1010 | + I_2D = (float *)I_2D_mat.data; | |
1011 | + memcpy(I + (zz * w *h), I_2D, w * h * sizeof(float)); | |
1012 | + | |
1013 | + } | |
1014 | + free(I_2D); | |
1015 | + t_I_2D_mat.release(); | |
1016 | + I2d_blurred.release(); | |
1017 | + } | |
1018 | + | |
1019 | + } | |
1020 | + | |
1021 | + stretch(I, N, 0, 255); | |
1022 | + size_t snakeNum; | |
1023 | + if (swarm) { | |
1024 | + int D = int(sqrt(1.5)*radius); //distance between hypersnakes | |
1025 | + snakeNum = w * h * d / (D * D * D ); // approximate number of hypersnakes lying on image | |
1026 | + } | |
1027 | + else | |
1028 | + snakeNum = 1; | |
1029 | + sphere* snakes = new sphere[snakeNum]; //create an array of spheres.(one for each snake) | |
1030 | + memset(snakes, 0, snakeNum * sizeof(sphere)); | |
1031 | + | |
1032 | + if (swarm) { | |
1033 | + initialSwarmSnake(snakes, snakeNum, w, h, d, radius); | |
1034 | + std::cout << "number of snakes=" << snakeNum << std::endl; | |
1035 | + } | |
1036 | + | |
1037 | + | |
1038 | + else { | |
1039 | + point<float> center; | |
1040 | + center.x = (float)args["single"].as_float(0); | |
1041 | + center.y = (float)args["single"].as_float(1); | |
1042 | + center.z = (float)args["single"].as_float(2); | |
1043 | + //cout << "number of args" << args["single"].nargs() << endl; | |
1044 | + if (args["single"].nargs() >= 4) | |
1045 | + radius = (float)args["single"].as_float(3); | |
1046 | + | |
1047 | + snakes[0].p.x = center.x - radius; // define p and q | |
1048 | + snakes[0].q.x = center.x + radius; | |
1049 | + snakes[0].p.y = snakes[0].q.y = center.y; | |
1050 | + snakes[0].p.z = snakes[0].q.z = center.z; | |
1051 | + | |
1052 | + } | |
1053 | + | |
1054 | + point<float>* samples = (point<float>*)malloc(sampleNum * sizeof(point<float>)); | |
1055 | + memset(samples, 0, sampleNum * sizeof(point<float>)); | |
1056 | + if (MC) { | |
1057 | + if (args["debug"]) { | |
1058 | + randGenerator_cube(samples, sampleNum, true); | |
1059 | + | |
1060 | + } | |
1061 | + else randGenerator_cube(samples, sampleNum); | |
1062 | + } | |
1063 | + | |
1064 | + std::cout << "energy_th=" << energy_th << std::endl; | |
1065 | + std::cout << "initial radius=" << radius << std::endl; | |
1066 | + //-----------------------------------GPU implementation---------------------------------------------------------------------------- | |
1067 | + if (cuda_device >= 0) { | |
1068 | + | |
1069 | + cudaDeviceProp prop; | |
1070 | + HANDLE_ERROR(cudaGetDeviceProperties(&prop, 0)); | |
1071 | + size_t threads = (size_t)prop.maxThreadsPerBlock; | |
1072 | + size_t blocks = snakeNum; | |
1073 | + size_t nbyte_shared = 7 * 4 * threads; | |
1074 | + // allocate memory to snakes and copy them to device | |
1075 | + sphere* gpu_snakes; | |
1076 | + HANDLE_ERROR(cudaMalloc(&gpu_snakes, snakeNum * sizeof(sphere))); | |
1077 | + HANDLE_ERROR(cudaMemcpy(gpu_snakes, snakes, snakeNum * sizeof(sphere), cudaMemcpyHostToDevice)); | |
1078 | + | |
1079 | + //allocate memory to image in device and copy from cpu to device | |
1080 | + float* gpu_I; | |
1081 | + HANDLE_ERROR(cudaMalloc(&gpu_I, bytes)); | |
1082 | + HANDLE_ERROR(cudaMemcpy(gpu_I, I, bytes, cudaMemcpyHostToDevice)); | |
1083 | + stim::gpuStartTimer(); | |
1084 | + if (MC) { | |
1085 | + // allocate memory to random samples on device | |
1086 | + point<float>* G_samples; | |
1087 | + HANDLE_ERROR(cudaMalloc(&G_samples, sampleNum * sizeof(point<float>))); | |
1088 | + HANDLE_ERROR(cudaMemset(G_samples, 0, sampleNum * sizeof(point<float>))); | |
1089 | + HANDLE_ERROR(cudaMemcpy(G_samples, samples, sampleNum * sizeof(point<float>), cudaMemcpyHostToDevice)); | |
1090 | + | |
1091 | + if (args["debug"]) | |
1092 | + kernel_snake_evolve_MC_parallel << < blocks, threads, nbyte_shared >> > (gpu_snakes, gpu_I, G_samples, sampleNum, snakeNum, threads, w, h, d, itr, dt, true); | |
1093 | + else | |
1094 | + kernel_snake_evolve_MC_parallel << < blocks, threads, nbyte_shared >> > (gpu_snakes, gpu_I, G_samples, sampleNum, snakeNum, threads, w, h, d, itr, dt); | |
1095 | + | |
1096 | + } | |
1097 | + else { | |
1098 | + if (args["debug"]) | |
1099 | + kernel_snake_evolve << <blocks, threads >> > (gpu_snakes, gpu_I, snakeNum, w, h, d, itr, dt, true); | |
1100 | + else | |
1101 | + kernel_snake_evolve << <blocks, threads >> > (gpu_snakes, gpu_I, snakeNum, w, h, d, itr, dt); | |
1102 | + } | |
1103 | + | |
1104 | + std::cout << "gpuruntime = " << stim::gpuStopTimer() << " ms" << std::endl; | |
1105 | + HANDLE_ERROR(cudaMemcpy(snakes, gpu_snakes, snakeNum * sizeof(sphere), cudaMemcpyDeviceToHost)); | |
1106 | + cudaFree(gpu_I); | |
1107 | + cudaFree(gpu_snakes); | |
1108 | + | |
1109 | + std::vector<size_t> idx = DetectValidSnakes_GPU(snakes, snakeNum, I_original, w, h, d, threads, energy_th); | |
1110 | + SaveSnakes(output_file, snakes, idx); | |
1111 | + | |
1112 | + | |
1113 | + | |
1114 | + } | |
1115 | + | |
1116 | + //--------------------------------------------CPU implementation---------------------------------------------------------------------------- | |
1117 | + else { | |
1118 | + unsigned int start = time(NULL); | |
1119 | + std::cout << "it is running on CPU" << std::endl; | |
1120 | + for (int i = 0; i < snakeNum; i++) { | |
1121 | + //printf("\n\n---------------->> iteration %u", numItr); | |
1122 | + if (args["debug"]) | |
1123 | + snake_evolve(snakes[i], I, w, h, d, dt, itr, samples, sampleNum, MC, true); | |
1124 | + else | |
1125 | + snake_evolve(snakes[i], I, w, h, d, dt, itr, samples, sampleNum, MC); | |
1126 | + | |
1127 | + std::cout << "Output Snakes------------------" << std::endl; | |
1128 | + std::cout << snakes[i].str() << std::endl; | |
1129 | + | |
1130 | + } | |
1131 | + unsigned int end = time(NULL); | |
1132 | + std::cout << "cpuRunTime=" << end - start << "s" << std::endl; | |
1133 | + | |
1134 | + | |
1135 | + std::vector<size_t> idx = DetectValidSnakes_GPU(snakes, snakeNum, I_original, w, h, d, 512, energy_th); | |
1136 | + SaveSnakes(output_file, snakes, idx); | |
1137 | + | |
1138 | + } | |
1139 | + | |
1140 | + free(I); | |
1141 | + free(I_original); | |
1142 | + if (args["debug"]) { | |
1143 | + std::vector<size_t> idx(snakeNum); | |
1144 | + std::iota(idx.begin(), idx.end(), 0); | |
1145 | + //SaveSnakes(debug_file, snakes, idx); //saves the snakes to an output file | |
1146 | + std::cout << "Output Snakes------------------" << std::endl; | |
1147 | + for (size_t i = 0; i < idx.size(); i++) { //for each snake | |
1148 | + std::cout << snakes[idx[i]].str() << std::endl; | |
1149 | + } | |
1150 | + } | |
1151 | + | |
1152 | + std::vector<size_t> idx(snakeNum); | |
1153 | + std::iota(idx.begin(), idx.end(), 0); | |
1154 | + SaveSnakes("no-delete.txt", snakes, idx); //saves the snakes to an output file | |
1155 | + return 0; | |
1156 | + | |
1157 | +} | ... | ... |