#include #include #include #include #include "device_launch_parameters.h" #include #include #include #include #include #include #include #include "opencv2/core/core.hpp" #include #include #include #define USING_OPENCV #include #include #include #include #include #include "stim/parser/arguments.h" #include #include // std::iota //#include #include "hypersnakuscule.h" //#include "median2.cuh" #define deltaR 2.0f #define deltar 1.5874f // deltar=deltaR/cubeRoot2 #define cubeRoot2 1.2599f #define pi 3.14159f //#define Energy_th -3 //----------------------------------------------functions-------------------------------------------------------------- void stretch(float* I, size_t size, int low, int high) { //size: number of image pixel float max_val = I[0]; float min_val = I[0]; for (int n = 0; n < size; n++) { if (I[n] > max_val) { max_val = I[n]; } if (I[n] < min_val) { min_val = I[n]; } } float range = max_val - min_val; float desired_range = (float)high - (float)low; for (size_t n = 0; n < size; n++) { //for each element in the image I[n] = desired_range * (I[n] - min_val) / range + low; } } /// random generator function: generate random numbers in a sphere with radius 1 in the center (0, 0, 0) void randGenerator(point* r, int sampleNum, bool debug = false) { for (size_t i = 0; i < sampleNum; i++) { double rn = (double)rand() / (double)(RAND_MAX); double theta = (double)rand() / (double)(RAND_MAX)* stim::TAU; double cosphi = 1.0 - 2.0 * ((double)rand() / (double)(RAND_MAX)); double phi = std::acos(cosphi); //double phi = std::acos(2.0 * v - 1.0); //double phi = (double)rand() / (double)(RAND_MAX)* stim::PI; //std::cout << "rn=" << rn << "\t theta=" << theta << "\tphi=" << phi << std::endl; r[i].x = (float)(std::cbrt(rn) * cos(theta) * sin(phi)); r[i].y = (float)(std::cbrt(rn) * sin(theta) * sin(phi)); r[i].z = (float)(std::cbrt(rn) * cos(phi)); } if (debug) { std::ofstream outfile("randomSamples.txt"); //open a file for writing for (size_t i = 0; i < sampleNum; i++) { outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius } outfile.close(); } } ///create random numbers in a cube and delete the one outside the sphere void randGenerator_cube(point* r, int sampleNum, bool debug = false) { size_t counter = 0; while (counter < sampleNum) { double x = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; double y = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; double z = ((double)rand() / (double)(RAND_MAX) * 2.0) - 1.0; double d = sqrt((x * x) + (y * y) + (z * z)); if (d < 1.1) { r[counter].x = (float)x; r[counter].y = (float)y; r[counter].z = (float)z; counter++; } } if (debug) { std::ofstream outfile("randomSamples.txt"); //open a file for writing for (size_t i = 0; i < sampleNum; i++) { outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius } outfile.close(); } } /// random generator function: generate random numbers in a sphere with radius 1 in the center (0 , 0, 0) //void randGenerator1(point* r, int sampleNum, bool debug = false) { // for (int i = 0; i < sampleNum; i++) { // std::default_random_engine generator1(100 + i); // std::uniform_real_distribution distribution1(0.0f, 1.0f); // double rn = distribution1(generator1); // std::default_random_engine generator2(50.0f + 5.0f * i); // std::uniform_real_distribution distribution2(0.0, 2.0 * stim::PI); // double theta = distribution2(generator2); // std::default_random_engine generator3(30.0f + 3.0f * i); // std::uniform_real_distribution distribution3(0, stim::PI); // double phi = distribution3(generator3); // r[i].x = (float)(std::cbrt(rn) * cos(theta) * sin(phi)); // r[i].y = (float)(std::cbrt(rn) * sin(theta) * sin(phi)); // r[i].z = (float)(std::cbrt(rn) * cos(phi)); // } // if (debug) { // std::ofstream outfile("randomSamples.txt"); //open a file for writing // for (size_t i = 0; i < sampleNum; i++) { // outfile << r[i].x << " " << r[i].y << " " << r[i].z << std::endl; //output the center and radius // } // outfile.close(); // } //} /// saves the snakes specified by the idx array void SaveSnakes(std::string filename, sphere* snakes, std::vector idx) { std::ofstream outfile(filename); //open a file for writing for (size_t i = 0; i < idx.size(); i++) { point center = snakes[idx[i]].c(); //get the centerpoint of the snake outfile << center.x << " " << center.y << " " << center.z << " " << snakes[idx[i]].r() << std::endl; //output the center and radius } outfile.close(); } void initialSwarmSnake(sphere * snakes, size_t &counter, size_t w, size_t h, size_t d, float radius) { std::cout << "W=" << w << "\t h=" << h << "\td=" << d << std::endl; int D = int(sqrt(1.5)*radius); //distance between hypersnakes //int D = 30; //for phantom int k1 = 0; counter = 0; float startpoint = 0; for (float k = radius; k < (d - radius); k += D ) {// for phantom D for (float j = radius; j < (h - radius); j += D) { k1++; if (k1 % 2 == 1) startpoint = 0; //for phantom D else startpoint = (float)D / 2.0f;//for phantom D for (float i = startpoint; i < (w - radius); i += D) { point temp_p(i, j, k); snakes[counter].p = temp_p; point temp_q(i + (2 * radius), j, k); snakes[counter].q = temp_q; counter = counter + 1; } } } std::ofstream outfile("initials.txt"); for (int i = 0; i < counter; i++) { outfile << snakes[i].c().x << " " << snakes[i].c().y << " " << snakes[i].c().z << " " << snakes[i].r() << std::endl; } outfile.close(); } __host__ __device__ void sum_dE(point& dEdp, point& dEdq, point c, point s, float R, float f) { float r = R / cubeRoot2; // radius of inner snake float dz2 = (s.z - c.z)*(s.z - c.z); float dy2 = (s.y - c.y)*(s.y - c.y); float dx2 = (s.x - c.x)*(s.x - c.x); float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour float gx = 2.0f * R; // gx= snake.q.x - snake.p.x ; float dx = (c.x - s.x) / d; float dy = (c.y - s.y) / d; float dz = (c.z - s.z) / d; if (d < (r - 0.5f*deltar)) { // mouth of snake dEdp.x -= 3.0f / gx * f; //calculate the growth/shrinking term for the snake dEdq.x += 3.0f / gx * f; } else if (d < (r + 0.5f*deltar)) { // throat of snake float S = (2.0f / deltar) * (d - r); // weight function value in the given point dEdp.x += f * ((3.0f / gx) * S + (dx + (1.0f / cubeRoot2)) / deltar); dEdp.y += f *(dy / deltar); dEdp.z += f *(dz / deltar); dEdq.x += f * ((-3.0f / gx) * S + (dx - (1.0f / cubeRoot2)) / deltar); dEdq.y = dEdp.y; dEdq.z = dEdp.z; } else if (d < (R - 0.5f*deltaR)) { // coil of snake dEdp.x += 3.0f / gx * f; dEdq.x -= 3.0f / gx * f; } else if (d < (R + 0.5f*deltaR)) { // fangs of snake float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); dEdp.x += f * ((3.0f * S / gx) - (0.5f * (dx + 1.0f) / deltaR)); dEdp.y -= 0.5f *(dy / deltaR) * f; dEdp.z -= 0.5f *(dz / deltaR) * f; dEdq.x += f *((-3.0f * S / gx) - (0.5f * (dx - 1.0f) / deltaR)); dEdq.y = dEdp.y; dEdq.z = dEdp.z; } } //sum_dE in debug mode __host__ __device__ void sum_dE_debug(point& dEdp, point& dEdq, int &counter, point c, point s, float R, float f) { float r = R / cubeRoot2; // radius of inner snake float dz2 = (s.z - c.z)*(s.z - c.z); float dy2 = (s.y - c.y)*(s.y - c.y); float dx2 = (s.x - c.x)*(s.x - c.x); float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour float gx = 2.0f * R; // gx= snake.q.x - snake.p.x ; float dx = (c.x - s.x) / d; float dy = (c.y - s.y) / d; float dz = (c.z - s.z) / d; int trivial = 0; //to test if any point is out of the contour if (d < (r - 0.5f*deltar)) { // mouth of snake counter++; dEdp.x -= 3.0f / gx * f; //calculate the growth/shrinking term for the snake dEdq.x += 3.0f / gx * f; } else if (d < (r + 0.5f*deltar)) { // throat of snake counter++; float S = (2.0f / deltar) * (d - r); // weight function value in the given point dEdp.x += f * ((3.0f / gx) * S + (dx + (1.0f / cubeRoot2)) / deltar); dEdp.y += f *(dy / deltar); dEdp.z += f *(dz / deltar); dEdq.x += f * ((-3.0f / gx) * S + (dx - (1.0f / cubeRoot2)) / deltar); dEdq.y = dEdp.y; dEdq.z = dEdp.z; } else if (d < (R - 0.5f*deltaR)) { // coil of snake counter++; dEdp.x += 3.0f / gx * f; dEdq.x -= 3.0f / gx * f; } else if (d < (R + 0.5f*deltaR)) { // fangs of snake counter++; float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); dEdp.x += f * ((3.0f * S / gx) - (0.5f * (dx + 1.0f) / deltaR)); dEdp.y -= 0.5f *(dy / deltaR) * f; dEdp.z -= 0.5f *(dz / deltaR) * f; dEdq.x += f *((-3.0f * S / gx) - (0.5f * (dx - 1.0f) / deltaR)); dEdq.y = dEdp.y; dEdq.z = dEdp.z; } else trivial++; } // this function calculate gradient of energy with respect to two point p and q __host__ __device__ void snake_Engrad(point&dEdp, point&dEdq, sphere snake, float* I, size_t w, size_t h, size_t d, bool debug = false) { float radius = snake.r(); // radius of outer snake point c = snake.c(); // center of snake dEdp = point(0, 0, 0); //initialize dEdp and dEdq to zero dEdq = point(0, 0, 0); float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); if (radius < threshold) { if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); return; } float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere float tempXmax = ceil(c.x + radius + 1); float tempYmin = floor(c.y - radius - 1); float tempYmax = ceil(c.y + radius + 1); float tempZmin = floor(c.z - radius - 1); float tempZmax = ceil(c.z + radius + 1); float xmin = max((float)tempXmin, (float) 0.0); //clamp the bounding box to the image edges float xmax = min((float)tempXmax, (float)(w - 1)); float ymin = max((float)tempYmin, (float)0); float ymax = min((float)tempYmax, (float)(h - 1)); float zmin = max((float)tempZmin, (float)0); float zmax = min((float)tempZmax, (float)(d - 1)); if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); return; } float R = radius; //simplify radius to R float R3 = R * R * R; //calculate R^2 (radius squared) for (unsigned int z = (unsigned int)zmin; z <= (unsigned int)zmax; z++) { // for each section for (unsigned int x = (unsigned int)xmin; x <= (unsigned int)xmax; x++) { //for each column of section in the bounding box for (unsigned int y = (unsigned int)ymin; y <= (unsigned int)ymax; y++) { //for each pixel p in the column point s(x, y, z); // a sample inside the contour float f; // image value in given position int position = (int)((w * h * z) + (x * h + y)); if (position < (w * h * d)) { f = I[position]; if (!f == 0) sum_dE(dEdp, dEdq, c, s, R, f); } } } } dEdp = dEdp / (8 * R3); dEdq = dEdq / (8 * R3); } //// this function calculate gradient of energy with respect to two point p and q using Monte Carlo __host__ __device__ void snake_Engrad_MC(point&dEdp, point&dEdq, sphere snake, float* I, point* samples, size_t sampleNum, size_t w, size_t h, size_t d, bool debug = false) { float radius = snake.r(); // radius of outer snake point c = snake.c(); // center of snake dEdp = point(0, 0, 0); //initialize dEdp and dEdq to zero dEdq = point(0, 0, 0); float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); if (radius < threshold) { if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); return; } float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere float tempXmax = ceil(c.x + radius + 1); float tempYmin = floor(c.y - radius - 1); float tempYmax = ceil(c.y + radius + 1); float tempZmin = floor(c.z - radius - 1); float tempZmax = ceil(c.z + radius + 1); float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges float xmax = min((float)tempXmax, (float)(w - 1.0)); float ymin = max((float)tempYmin, (float)0.0); float ymax = min((float)tempYmax, (float)(h - 1.0)); float zmin = max((float)tempZmin, (float)0.0); float zmax = min((float)tempZmax, (float)(d - 1.0)); if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); return; } float R = radius; //simplify radius to R float R3 = R * R * R; //calculate R^3 (radius cube) int counter = 0; float sumf = 0.0; for (int i = 0; i < sampleNum; i++) { float sx = samples[i].x; float sy = samples[i].y; float sz = samples[i].z; float x = (R + 1.0f) * sx + c.x; float y = (R + 1.0f) * sy + c.y; float z = (R + 1.0f) * sz + c.z; int xi = (int)round(x); int yi = (int)round(y); int zi = (int)round(z); point s(xi, yi, zi); // a sample inside the contour float f; // image value in given position int position = (int)((w * h * zi) + (xi * h + yi)); if (position < (w * h * d) && xi >= xmin && xi <= xmax && yi >= ymin && yi <= ymax && zi >= zmin && zi <= zmax) { // && d1< (R + 1) counter++; f = (float)I[position]; sumf += f; //if (debug) sum_dE_debug(dEdp, dEdq, counter, c, s, R, f); //sum_dE(dEdp, dEdq, c, s, R, f); } } float volume = pi * (((R + 1.0f) * (R + 1.0f) * (zmax - zmin)) - ((1.0f / 3.0f) * (powf((zmax - c.z), 3.0f) + powf((c.z - zmin), 3.0f)))); //if (debug) printf("volume_sphere=%f and volume=%f\n", volume_sphere, volume); //if (debug) printf("sampleNum=%u and (float)counter= %f \n", sampleNum, (float)counter); dEdp = dEdp * volume / (float)counter; dEdq = dEdq * volume / (float)counter; dEdp = dEdp / (8.0f * R3); dEdq = dEdq / (8.0f * R3); } // this function calculate gradient of energy with respect to two point p and q using Monte Carlo __host__ __device__ void snake_Engrad_MC_parallel(point&dEdp, point&dEdq, int &counter, sphere snake, float* I, point sample, size_t w, size_t h, size_t d, bool debug = false) { float radius = snake.r(); // radius of outer snake point c = snake.c(); // center of snake float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere float tempXmax = ceil(c.x + radius + 1); float tempYmin = floor(c.y - radius - 1); float tempYmax = ceil(c.y + radius + 1); float tempZmin = floor(c.z - radius - 1); float tempZmax = ceil(c.z + radius + 1); float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges float xmax = min((float)tempXmax, (float)(w - 1.0)); float ymin = max((float)tempYmin, (float)0.0); float ymax = min((float)tempYmax, (float)(h - 1.0)); float zmin = max((float)tempZmin, (float)0.0); float zmax = min((float)tempZmax, (float)(d - 1.0)); if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); return; } float R = radius; //simplify radius to R float sumf = 0.0; //for (int i = 0; i < sampleNum; i++) { float sx = sample.x; float sy = sample.y; float sz = sample.z; float x = (R + 1.0f) * sx + c.x; float y = (R + 1.0f) * sy + c.y; float z = (R + 1.0f) * sz + c.z; int xi = (int)round(x); int yi = (int)round(y); int zi = (int)round(z); point s(xi, yi, zi); // a sample inside the contour float f; // image value in given position int position = (int)((w * h * zi) + (xi * h + yi)); if (position < (w * h * d) && xi >= xmin && xi <= xmax && yi >= ymin && yi <= ymax && zi >= zmin && zi <= zmax) { // && d1< (R + 1) f = (float)I[position]; sumf += f; sum_dE_debug(dEdp, dEdq, counter, c, s, R, f); //printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); } } void snake_evolve(sphere &snakes, float* I, int w, int h, int d, float dt, int itr, point* samples, size_t sampleNum, bool MC, bool debug = false) { point dEdp(0.0, 0.0, 0.0); // energy gradient wrt p point dEdq(0.0, 0.0, 0.0); // energy gradient wrt q for (int numItr = 0; numItr < itr; numItr++) { if (MC) snake_Engrad_MC(dEdp, dEdq, snakes, I, samples, sampleNum, w, h, d, debug); else snake_Engrad(dEdp, dEdq, snakes, I, w, h, d, debug); if (debug) printf("dEdp.x=%f \t dEdq.x=%f \n dEdp.y=%f \t dEdp.z=%f \n\n", dEdp.x, dEdq.x, dEdp.y, dEdp.z); float factor = sqrt(float(numItr + 1)); // step size in gradient descent decreasing by number of iterations snakes.update(dEdp, dEdq, dt / factor); } } //-------------------------------------------Kernels-------------------------------------------------------------------------- //__global__ void kernel_snake_evolve_MC(sphere * snakes, float* I, point* samples, int sampleNum, size_t snakeNum, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { // // int idx = blockDim.x * blockIdx.x + threadIdx.x; // // if (idx >= snakeNum) // return if the number of threads is more than snakes // return; // // // if (idx == 0) // printf("\n\n \t\t=============>>>>we are in the MC kernel\n\n"); // point dEdp(0.0, 0.0, 0.0); // energy gradient wrt p // point dEdq(0.0, 0.0, 0.0); // energy gradient wrt q // sphere s = snakes[idx]; // // for (int i = 0; i < itr; i++) { // if (debug) printf("\n\n---------------->> iteration %u\n", i); // snake_Engrad_MC(dEdp, dEdq, s, I, samples, sampleNum, w, h, d, debug); // float factor = sqrtf(float(i + 1)); // if (debug) // printf("dEdp.x=%f and dEdp.y=%f and dEdp.z=%f and dEdq.x=%f\n", dEdp.x, dEdp.y, dEdp.z, dEdq.x); // s.update(dEdp, dEdq, dt / factor); // // } // // snakes[idx] = s; // // //} __global__ void kernel_snake_evolve_MC_parallel(sphere * snakes, float* I, point* samples, int sampleNum, size_t snakeNum, size_t threads, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { extern __shared__ float sharedPtr[]; // define shared memory to save result of each thread there int n = floorf(sampleNum / threads) ; //# given sample points to one thread int idx = blockDim.x * blockIdx.x + threadIdx.x; if (idx >= (snakeNum * threads)) // return if the number of threads is more than snakes return; float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); //snake cannot be smaller than threshold /*if (idx == 0) { printf("\n\n \t\t=============>>>>we are in the MC kernel\n\n"); printf("number of samples per thread=%d\n", n); printf("idx=%d\n", (snakeNum * threads)); printf("blockdim.x=%d and threads=%u \n", blockDim.x, threads); }*/ point thread_sample; //sample goes to the thread //sum_counter is real number of samples inside the contour averaged. some points of samples may round out of contour and did not contribute in averaging for (int i = 0; i < itr; i++) { //check the snake, if it pass the threshold if (snakes[blockIdx.x].r() < threshold) { if (debug) printf("\t RADIUS IS OUT OF RANGE\n"); return; } float radius = snakes[blockIdx.x].r(); // radius of outer snake point c = snakes[blockIdx.x].c(); // center of snake float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere float tempXmax = ceil(c.x + radius + 1); float tempYmin = floor(c.y - radius - 1); float tempYmax = ceil(c.y + radius + 1); float tempZmin = floor(c.z - radius - 1); float tempZmax = ceil(c.z + radius + 1); float xmin = max((float)tempXmin, (float) 0.0); //clsamples(amp the bounding box to the image edges float xmax = min((float)tempXmax, (float)(w - 1.0)); float ymin = max((float)tempYmin, (float)0.0); float ymax = min((float)tempYmax, (float)(h - 1.0)); float zmin = max((float)tempZmin, (float)0.0); float zmax = min((float)tempZmax, (float)(d - 1.0)); if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { if (debug) printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); return; } if (debug) printf("\n\n---------------->> iteration %u\n", i); int counter = 0; point dEdp(0.0f, 0.0f, 0.0f); // energy gradient wrt p point dEdq(0.0f, 0.0f, 0.0f); // energy gradient wrt q for (int j = 0; j < n; j++) { //sphere single_snake = snakes[blockIdx.x]; // each block is assigned to one snake. all threads in a block are working for that snake thread_sample = samples[threadIdx.x * n + j]; snake_Engrad_MC_parallel(dEdp, dEdq, counter, snakes[blockIdx.x], I, thread_sample, w, h, d, debug); } /*if (idx == 0) { printf("points in the contour for thread0=%d\n", counter); printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); } */ //copy the result of each thread in shared memory sharedPtr[threadIdx.x * 7 + 0] = dEdp.x; sharedPtr[threadIdx.x * 7 + 1] = dEdp.y; sharedPtr[threadIdx.x * 7 + 2] = dEdp.z; sharedPtr[threadIdx.x * 7 + 3] = dEdq.x; sharedPtr[threadIdx.x * 7 + 4] = dEdq.y; sharedPtr[threadIdx.x * 7 + 5] = dEdq.z; sharedPtr[threadIdx.x * 7 + 6] = counter; __syncthreads(); //combine threads dEdp = point(0.0f, 0.0f, 0.0f); // energy gradient wrt p dEdq = point(0.0f, 0.0f, 0.0f); counter = 0; if (threadIdx.x == 0) { float R = snakes[blockIdx.x].r(); // radius of outer snake float R3 = R * R * R; //calculate R^3 (radius cube) point c = snakes[blockIdx.x].c(); // center of snake for (int i = 0; i < threads; i++) { dEdp.x += sharedPtr[i * 7 + 0]; dEdp.y += sharedPtr[i * 7 + 1]; dEdp.z += sharedPtr[i * 7 + 2]; dEdq.x += sharedPtr[i * 7 + 3]; dEdq.y += sharedPtr[i * 7 + 4]; dEdq.z += sharedPtr[i * 7 + 5]; counter += sharedPtr[i * 7 + 6]; } float volume = pi * (((R + 1.0f) * (R + 1.0f) * (zmax - zmin)) - ((1.0f / 3.0f) * (powf((zmax - c.z), 3.0f) + powf((c.z - zmin), 3.0f)))); //float volume = (4.0f / 3.0f) * pi * (R + 1.0f)*(R + 1.0f)*(R + 1.0f); //(4.0f / 3.0f) * pi * powf((R + 1), 3.0f); dEdp = dEdp * volume / (float)counter; dEdq = dEdq * volume / (float)counter; dEdp = dEdp / (8.0f * R3); dEdq = dEdq / (8.0f * R3); //if (idx == 0) //printf("dEdp.x=%f and dEdq.x=%f \t dEdp.y=%f and dEdq.y=%f \t dEdp.z=%f and dEdq.z=%f", dEdp.x, dEdq.x, dEdp.y, dEdq.y, dEdp.z, dEdq.z); float factor = sqrtf(float(i + 1)); snakes[blockIdx.x].update(dEdp, dEdq, dt / factor); //printf("snakes[blockIdx.x].p.x=%f and snakes[blockIdx.x].p.y=%f \n snakes[blockIdx.x].q.x=%f and snakes[blockIdx.x].q.y=%f\n\n", snakes[blockIdx.x].p.x, snakes[blockIdx.x].p.y, snakes[blockIdx.x].q.x, snakes[blockIdx.x].q.y); } __syncthreads(); } } __global__ void kernel_snake_evolve(sphere* snakes, float *I, size_t snakeNum, size_t w, size_t h, size_t d, int itr, float dt, bool debug = false) { //__launch_bounds__(1024, 1); int idx = blockDim.x * blockIdx.x + threadIdx.x; if (idx >= snakeNum) // return if the number of threads is more than snakes return; if (idx == 0) printf("\n\n \t\t=============>>>>we are in the kernel\n\n"); point dEdp(0.0, 0.0, 0.0); // energy gradient wrt p point dEdq(0.0, 0.0, 0.0); // energy gradient wrt q sphere s = snakes[idx]; for (int i = 0; i < itr; i++) { snake_Engrad(dEdp, dEdq, s, I, w, h, d, debug); if (debug) printf("dEdp.x=%f and dEdp.y=%f and dEdp.z=%f and dEdq.x=%f\n", dEdp.x, dEdp.y, dEdp.z, dEdq.x); float factor = sqrtf(float(i + 1)); s.update(dEdp, dEdq, dt / factor); } snakes[idx] = s; } // -----------------------------------Energy computaion and compare hypersnakes------------------------------------------------------------------------ __host__ __device__ void sum_E(float& E, point c, point s, float R, float f) { float r = R / cubeRoot2; // radius of inner snake float dz2 = (s.z - c.z)*(s.z - c.z); float dy2 = (s.y - c.y)*(s.y - c.y); float dx2 = (s.x - c.x)*(s.x - c.x); float d = sqrt(dz2 + dy2 + dx2); //distance bw given sample and center of contour if (d < (r - 0.5f*deltar)) // mouth of snake E -= f; else if (d < (r + 0.5f*deltar)) { // throat of snake float S = (2.0f / deltar) * (d - r); // weight function value in the given point E += (S * f); } else if (d < (R - 0.5f*deltaR)) // coil of snake E += f; else if (d < (R + 0.5f*deltaR)) { // fangs of snake float S = -(1.0f / deltaR) * (d - (R + deltaR / 2.0f)); E += (S * f); } } __host__ __device__ void snake_energy(float &energy, sphere snake, float* I, size_t w, size_t h, size_t d) { float radius = snake.r(); // radius of outer snake point c = snake.c(); // center of snake float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); if (radius < threshold) { //printf("radius is out of range\n"); return; } float tempXmin = floor(c.x - radius - 1); //calculate a bounding box around the sphere float tempXmax = ceil(c.x + radius + 1); float tempYmin = floor(c.y - radius - 1); float tempYmax = ceil(c.y + radius + 1); float tempZmin = floor(c.z - radius - 1); float tempZmax = ceil(c.z + radius + 1); float xmin = max((float)tempXmin, (float) 0.0); //clamp the bounding box to the image edges float xmax = min((float)tempXmax, (float)(w - 1)); float ymin = max((float)tempYmin, (float)0); float ymax = min((float)tempYmax, (float)(h - 1)); float zmin = max((float)tempZmin, (float)0); float zmax = min((float)tempZmax, (float)(d - 1)); if ((xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)) { //printf("(xmax <= xmin) || (ymax <= ymin) || (zmax <= zmin)"); return; } float E = 0.0f; float R = radius; //simplify radius to R float R3 = R * R * R; //calculate R^2 (radius squared) for (unsigned int z = (unsigned int)zmin; z <= (unsigned int)zmax; z++) { // for each section for (unsigned int y = (unsigned int)ymin; y <= (unsigned int)ymax; y++) { //for each row of section in the bounding box for (unsigned int x = (unsigned int)xmin; x <= (unsigned int)xmax; x++) { //for each pixel p in the row point s(x, y, z); // a sample inside the contour float f; // image value in given position int position = (int)((w * h * z) + (x * h + y)); if (position < (w * h * d)) { f = I[position]; if (!f == 0) sum_E(E, c, s, R, f); } } } } energy = E / (8 * R3); } //compute energy of snakes __global__ void kernel_snake_energy(float* energy, sphere* snakes, float* I, size_t snakeNum, size_t w, size_t h, size_t d) { size_t i = blockDim.x * blockIdx.x + threadIdx.x; if (i >= snakeNum) return; // return if the number of threads is more than snakes if (i == 0) { printf("we are in energy kernel\n"); } float energy_temp; snake_energy(energy_temp, snakes[i], I, w, h, d); energy[i] = energy_temp; } // returns a set of snake indices that meet specific criteria(to be determined and refined by Mahsa) std::vector DetectValidSnakes_GPU(sphere* snakes, size_t snakeNum, float* I, size_t w, size_t h, size_t d, size_t threads, float energy_th) { ////calculate energy float *gpu_energy; HANDLE_ERROR(cudaMalloc(&gpu_energy, snakeNum * sizeof(float))); size_t blocks = snakeNum / threads + 1; //// allocate memory and copy snakes to the device sphere* gpu_snakes = new sphere[snakeNum]; HANDLE_ERROR(cudaMalloc(&gpu_snakes, snakeNum * sizeof(sphere))); HANDLE_ERROR(cudaMemcpy(gpu_snakes, snakes, snakeNum * sizeof(sphere), cudaMemcpyHostToDevice)); //// allocate memory and copy image to device float *gpu_I; HANDLE_ERROR(cudaMalloc(&gpu_I, w * h * d * sizeof(float))); HANDLE_ERROR(cudaMemcpy(gpu_I, I, w * h * d * sizeof(float), cudaMemcpyHostToDevice)); //create an array to store the snake energies' on cpu float *energy; energy = (float*)malloc(snakeNum * sizeof(float)); //memset(energy, 0, snakeNum * sizeof(float)); kernel_snake_energy << < (unsigned int)blocks, (unsigned int)threads >> > (gpu_energy, gpu_snakes, gpu_I, snakeNum, w, h, d); HANDLE_ERROR(cudaMemcpy(energy, gpu_energy, snakeNum * sizeof(float), cudaMemcpyDeviceToHost)); //std::ofstream outfile("energy.txt"); //open a file for writing //for (size_t i = 0; i < snakeNum; i++) { // outfile << energy[i] << std::endl; //output the energy //} //outfile.close(); cudaFree(gpu_energy); cudaFree(gpu_I); cudaFree(gpu_snakes); // compare snakes in possible overlaps std::vector id; //store indices of snakes which have overlaps with snake i std::vector idx; //create a vector to store indices of snakes which must be deleted. float threshold = ((1 + cubeRoot2) / (cubeRoot2 - 1))*(deltar / pow(2.0f, (2.0f / 3.0f))); for (size_t i = 0; i < snakeNum; i++) { if (snakes[i].r() < threshold) idx.push_back(i); if (std::find(idx.begin(), idx.end(), i) == idx.end()) { // check if snake is already deleted id.clear(); for (size_t j = 0; j < snakeNum; j++) { if (j != i) { if (snakes[j].c().x > snakes[i].c().x - 2 * snakes[i].r() && snakes[j].c().x < snakes[i].c().x + 2 * snakes[i].r() && snakes[j].c().y > snakes[i].c().y - 2 * snakes[i].r() && snakes[j].c().y < snakes[i].c().y + 2 * snakes[i].r()) { if (snakes[j].r() < threshold) idx.push_back(j); if (std::find(idx.begin(), idx.end(), j) == idx.end()) { float centerDistance_x = snakes[i].c().x - snakes[j].c().x; // centers distance of snakes i and j- in x direction float centerDistance_y = snakes[i].c().y - snakes[j].c().y; // centers distance of snakes i and j- in y direction float centerDistance_z = snakes[i].c().z - snakes[j].c().z; // centers distance of snakes i and j- in z direction float centerDistance = sqrt((centerDistance_x * centerDistance_x) + (centerDistance_y * centerDistance_y) + 4 * (centerDistance_z * centerDistance_z)); // euclidean distance bw center snakes i and j float maxRadius = max(snakes[i].r(), snakes[j].r()); // maximum of radius of snake i and snake j if (centerDistance < (maxRadius / cubeRoot2)) id.push_back(j); // store indices of overlapped snakes with snake i } } } } if (!id.empty()) { id.push_back(i); float smallest = energy[id[0]]; size_t smallest_id = id[0]; // index of snake should be kept for (int k = 0; k < id.size(); k++) { // find snake with smallest energy among id vector if (energy[id[k]] < smallest) { smallest = energy[id[k]]; smallest_id = id[k]; } } for (int m = 0; m < id.size(); m++) { if (id[m] != smallest_id) // among snakes with overlaps, the one with smallest energy survies. idx.push_back(id[m]); // idx stores snakes which should be deleted } } } } std::vector th_idx; //create a vector to store final indices exclusive idx (indices of snakes with large energy) and the ones with energy higher than threshold for (size_t c = 0; c < snakeNum; c++) { if (std::find(idx.begin(), idx.end(), c) == idx.end()) { if (energy[c] < energy_th) th_idx.push_back(c); //if the snake exceeds the energy threshold, store the index } } free(energy); return th_idx; //return the indices of valid snakes } ///..................................................................main function............................................................................................. void advertise() { std::cout << "this is Hypersnakuscule implementation" << std::endl; std::cout << "reference papre for 2D is (Snakuscules by Philippe Thévenaz and Michael Unser)" << std::endl; std::cout << "implemented by Mahsa Lotfollahi" << std::endl << std::endl; std::cout << "Usage: snakuscules input_image [options]" << std::endl; } int main(int argc, char* argv[]) { stim::arglist args; // create an argument list args.add("help", "prints this help"); args.add("iter", "number of iteration for evolving contour", "400", "positive value"); args.add("radius", "initial radius", "15", "real positive value"); args.add("size", "specify size of image in 3 dimension", "", "[w h d]"); args.add("dt", "gradient descend stepsize", "10", "real positive value"); args.add("cuda", "specify the device used for CUDA calculations", "0", "device ID, -1 for CPU"); args.add("mc", "specify using Monte Carlo sampling", "", "MC=1 for Monte Carlo sampling and 0 for original integration"); args.add("single", "specify a single contour to evolve", "", "[x y z r]"); args.add("filter", "specify filter type for preprocessing like log", "", "name of filter"); args.add("energy_th", "specify energy threshold, snakes with energies less than that survive", "-1", "small negative value"); args.add("debug", "output debugging information"); args.parse(argc, argv); if (args["help"].is_set()) { //output help if requested by the user advertise(); std::cout << args.str() << std::endl; return 1; } if (args.nargs() < 1) { std::cout << "ERROR: no input file specified" << std::endl; return 1; } std::string output_file = "output.txt"; //set the default output file name to "output.txt" if (args.nargs() >= 2) output_file = args.arg(1); //if an output is specified by the user, use that instead int itr = args["iter"].as_int(); //get input parameters and set variables float radius = (float)args["radius"].as_float(); if (!args["size"]) { //get size of image std::cout << "you should specify size of image in 3 dimension" << std::endl; return 1; } int w = args["size"].as_int(0); int h = args["size"].as_int(1); int d = args["size"].as_int(2); float energy_th = (float)args["energy_th"].as_float(); float dt = (float)args["dt"].as_float(); int cuda_device = args["cuda"].as_int(); //get the desired CUDA device bool MC = false; int sampleNum; if (args["mc"]) MC = true; sampleNum = 20000; if (args["mc"].nargs() > 0) sampleNum = args["mc"].as_int(); bool swarm = true; if (args["single"]) swarm = false; //if the user specifies a single snake parameter, don't use the swarm algorithm bool Filter = false; std::string filter_name; int kernel_size; // kernel size if (args["filter"]) { Filter = true; filter_name = "log"; kernel_size = 5; if (args["filter"].nargs() > 0) filter_name = args["filter"].as_string(0); if (args["filter"].nargs() > 1) kernel_size = args["filter"].as_int(1); } //allocate memory in cpu for input image size_t bytes = w * h * d * sizeof(float); //number of bytes needed to store image float* I = (float*)malloc(bytes); // load input image+ std::ifstream inputfile(args.arg(0), std::ios::in | std::ios::binary); if (!inputfile) { std::cout << "cannot open specified input file" << std::endl; return; } inputfile.read((char*)I, bytes); inputfile.close(); size_t N = w * h * d; // number of pixels (# array elements) float* I_original = (float*)malloc(bytes); // keep the original image without pre-processing memcpy(I_original, I, bytes); stretch(I_original, N, 0, 255); stretch(I, N, 0, 255); if (Filter) { // compute log of image if (filter_name == "log") { for (int i = 0; i < N; i++) I[i] = log(I[i] + 1); } if (filter_name == "median") { // apply median filter on each section float* I_2D = (float*)malloc(w*h * sizeof(float)); // allocate memory to 2_D sections cv::Mat t_I_2D_mat(w, h, CV_32F); //image is stored column major but open cv read and write row major---matrix is transposed cv::Mat I2d_blurred(w, h, CV_32F); //allocate memory for blured image for (int zz = 0; zz < d; zz++) { memcpy(I_2D, I_original + (zz * w *h), w * h * sizeof(float)); // copy each section of 3D image(I) in an array cv::Mat I_2D_mat(h, w, CV_32F, I_2D); // create a Mat to copy data to that and be able to use open cv median filter cv::transpose(I_2D_mat, t_I_2D_mat); cv::medianBlur(t_I_2D_mat, I2d_blurred, kernel_size); // apply median filter cv::transpose(I2d_blurred, I_2D_mat); // transpose to be transfered to array I_2D = (float *)I_2D_mat.data; memcpy(I + (zz * w *h), I_2D, w * h * sizeof(float)); } free(I_2D); t_I_2D_mat.release(); I2d_blurred.release(); } } stretch(I, N, 0, 255); size_t snakeNum; if (swarm) { int D = int(sqrt(1.5)*radius); //distance between hypersnakes snakeNum = w * h * d / (D * D * D ); // approximate number of hypersnakes lying on image } else snakeNum = 1; sphere* snakes = new sphere[snakeNum]; //create an array of spheres.(one for each snake) memset(snakes, 0, snakeNum * sizeof(sphere)); if (swarm) { initialSwarmSnake(snakes, snakeNum, w, h, d, radius); std::cout << "number of snakes=" << snakeNum << std::endl; } else { point center; center.x = (float)args["single"].as_float(0); center.y = (float)args["single"].as_float(1); center.z = (float)args["single"].as_float(2); //cout << "number of args" << args["single"].nargs() << endl; if (args["single"].nargs() >= 4) radius = (float)args["single"].as_float(3); snakes[0].p.x = center.x - radius; // define p and q snakes[0].q.x = center.x + radius; snakes[0].p.y = snakes[0].q.y = center.y; snakes[0].p.z = snakes[0].q.z = center.z; } point* samples = (point*)malloc(sampleNum * sizeof(point)); memset(samples, 0, sampleNum * sizeof(point)); if (MC) { if (args["debug"]) { randGenerator_cube(samples, sampleNum, true); } else randGenerator_cube(samples, sampleNum); } std::cout << "energy_th=" << energy_th << std::endl; std::cout << "initial radius=" << radius << std::endl; //-----------------------------------GPU implementation---------------------------------------------------------------------------- if (cuda_device >= 0) { cudaDeviceProp prop; HANDLE_ERROR(cudaGetDeviceProperties(&prop, 0)); size_t threads = (size_t)prop.maxThreadsPerBlock; size_t blocks = snakeNum; size_t nbyte_shared = 7 * 4 * threads; // allocate memory to snakes and copy them to device sphere* gpu_snakes; HANDLE_ERROR(cudaMalloc(&gpu_snakes, snakeNum * sizeof(sphere))); HANDLE_ERROR(cudaMemcpy(gpu_snakes, snakes, snakeNum * sizeof(sphere), cudaMemcpyHostToDevice)); //allocate memory to image in device and copy from cpu to device float* gpu_I; HANDLE_ERROR(cudaMalloc(&gpu_I, bytes)); HANDLE_ERROR(cudaMemcpy(gpu_I, I, bytes, cudaMemcpyHostToDevice)); stim::gpuStartTimer(); if (MC) { // allocate memory to random samples on device point* G_samples; HANDLE_ERROR(cudaMalloc(&G_samples, sampleNum * sizeof(point))); HANDLE_ERROR(cudaMemset(G_samples, 0, sampleNum * sizeof(point))); HANDLE_ERROR(cudaMemcpy(G_samples, samples, sampleNum * sizeof(point), cudaMemcpyHostToDevice)); if (args["debug"]) kernel_snake_evolve_MC_parallel << < blocks, threads, nbyte_shared >> > (gpu_snakes, gpu_I, G_samples, sampleNum, snakeNum, threads, w, h, d, itr, dt, true); else kernel_snake_evolve_MC_parallel << < blocks, threads, nbyte_shared >> > (gpu_snakes, gpu_I, G_samples, sampleNum, snakeNum, threads, w, h, d, itr, dt); } else { if (args["debug"]) kernel_snake_evolve << > > (gpu_snakes, gpu_I, snakeNum, w, h, d, itr, dt, true); else kernel_snake_evolve << > > (gpu_snakes, gpu_I, snakeNum, w, h, d, itr, dt); } std::cout << "gpuruntime = " << stim::gpuStopTimer() << " ms" << std::endl; HANDLE_ERROR(cudaMemcpy(snakes, gpu_snakes, snakeNum * sizeof(sphere), cudaMemcpyDeviceToHost)); cudaFree(gpu_I); cudaFree(gpu_snakes); std::vector idx = DetectValidSnakes_GPU(snakes, snakeNum, I_original, w, h, d, threads, energy_th); SaveSnakes(output_file, snakes, idx); } //--------------------------------------------CPU implementation---------------------------------------------------------------------------- else { unsigned int start = time(NULL); std::cout << "it is running on CPU" << std::endl; for (int i = 0; i < snakeNum; i++) { //printf("\n\n---------------->> iteration %u", numItr); if (args["debug"]) snake_evolve(snakes[i], I, w, h, d, dt, itr, samples, sampleNum, MC, true); else snake_evolve(snakes[i], I, w, h, d, dt, itr, samples, sampleNum, MC); std::cout << "Output Snakes------------------" << std::endl; std::cout << snakes[i].str() << std::endl; } unsigned int end = time(NULL); std::cout << "cpuRunTime=" << end - start << "s" << std::endl; std::vector idx = DetectValidSnakes_GPU(snakes, snakeNum, I_original, w, h, d, 512, energy_th); SaveSnakes(output_file, snakes, idx); } free(I); free(I_original); if (args["debug"]) { std::vector idx(snakeNum); std::iota(idx.begin(), idx.end(), 0); //SaveSnakes(debug_file, snakes, idx); //saves the snakes to an output file std::cout << "Output Snakes------------------" << std::endl; for (size_t i = 0; i < idx.size(); i++) { //for each snake std::cout << snakes[idx[i]].str() << std::endl; } } std::vector idx(snakeNum); std::iota(idx.begin(), idx.end(), 0); SaveSnakes("no-delete.txt", snakes, idx); //saves the snakes to an output file return 0; }