#ifndef STIM_IVOTE2_CUH #define STIM_IVOTE2_CUH #include #include #include #include #include #include #include #include #include #include namespace stim { // this function precomputes the atan2 values template void atan_2(T* cpuTable, unsigned int rmax) { int xsize = 2 * rmax + 1; //initialize the width and height of the window which atan2 are computed in. int ysize = 2 * rmax + 1; int yi = rmax; // assign the center coordinates of the atan2 window to yi and xi int xi = rmax; for (int xt = 0; xt < xsize; xt++) { //for each element in the atan2 table for (int yt = 0; yt < ysize; yt++) { int id = yt * xsize + xt; //convert the current 2D coordinates to 1D int xd = xi - xt; // calculate the distance between the pixel and the center of the atan2 window int yd = yi - yt; T atan_2d = atan2((T)yd, (T)xd); // calculate the angle between the pixel and the center of the atan2 window and store the result. cpuTable[id] = atan_2d; } } } //this kernel invert the 2D image template __global__ void cuda_invert(T* gpuI, size_t x, size_t y) { // calculate the 2D coordinates for this current thread. size_t xi = blockIdx.x * blockDim.x + threadIdx.x; size_t yi = blockIdx.y * blockDim.y + threadIdx.y; if (xi >= x || yi >= y) return; size_t i = yi * x + xi; // convert 2D coordinates to 1D gpuI[i] = 255 - gpuI[i]; //invert the pixel intensity } //this function calculate the threshold using OTSU method template T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) { T Imax = pts[0]; //initialize the maximum value to the first one T Imin = pts[0]; //initialize the maximum value to the first on for (size_t n = 0; n < pixels; n++) { //for every value if (pts[n] > Imax) { //if the value is higher than the current max Imax = pts[n]; } } for (size_t n = 0; n< pixels; n++) { //for every value if (pts[n] < Imin) { //if the value is higher than the current max Imin = pts[n]; } } T th_step = ((Imax - Imin) / th_num); vector var_b; for (unsigned int t0 = 0; t0 < th_num; t0++) { T th = t0 * th_step + Imin; unsigned int n_b(0), n_o(0); //these variables save the number of elements that are below and over the threshold T m_b(0), m_o(0); //these variables save the mean value for each cluster for (unsigned int idx = 0; idx < pixels; idx++) { if (pts[idx] <= th) { m_b += pts[idx]; n_b += 1; } else { m_o += pts[idx]; n_o += 1; } } m_b = m_b / n_b; //calculate the mean value for the below threshold cluster m_o = m_o / n_o; //calculate the mean value for the over threshold cluster var_b.push_back(n_b * n_o * pow((m_b - m_o), 2)); } vector::iterator max_var = std::max_element(var_b.begin(), var_b.end()); //finding maximum elements in the vector size_t th_idx = std::distance(var_b.begin(), max_var); T threshold = Imin + (T)(th_idx * th_step); return threshold; } //this function performs the 2D iterative voting algorithm on the image stored in the gpu template void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { size_t pixels = x * y; //compute the size of input image // if (invert) { //if inversion is required call the kernel to invert the image unsigned int max_threads = stim::maxThreadsPerBlock(); dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads)); dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1); cuda_invert << > > (gpuI, x, y); } // size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T)); // create the atan2 table T* cpuTable = (T*)malloc(table_bytes); //assign memory on the cpu for atan2 table atan_2(cpuTable, rmax); //call the function to precompute the atan2 table T* gpuTable; HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes)); HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice)); //copy atan2 table to the gpu size_t bytes = pixels* sizeof(T); //calculate the bytes of the input float dphi = phi / iter; //change in phi for each iteration float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2)); //allocate space to store the 2D gradient float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image stim::cuda::gpu_gradient_2d(gpuGrad, gpuI, x, y); //calculate the 2D gradient stim::cuda::gpu_cart2polar(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar for (int i = 0; i < iter; i++) { //for each iteration cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0 stim::cuda::gpu_vote(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //perform voting stim::cuda::gpu_update_dir(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //update the voter directions phi = phi - dphi; //decrement phi } stim::cuda::gpu_local_max(gpuI, gpuVote, conn, x, y); //calculate the local maxima if (t > 0) { T* pts = (T*)malloc(bytes); //allocate memory on the cpu to store the output of iterative voting HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output from gpu to the cpu memory T threshold; threshold = t; size_t ind; for (size_t ix = 0; ix < x; ix++) { for (size_t iy = 0; iy < y; iy++) { ind = iy * x + ix; if (pts[ind] > threshold) { pts[ind] = 1; } else pts[ind] = 0; } } HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice)); //copy the points to the gpu } } template void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { size_t bytes = x*y * sizeof(T); T* gpuI; //allocate space on the gpu to save the input image HANDLE_ERROR(cudaMalloc(&gpuI, bytes)); HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu stim::gpu_ivote2(gpuI, rmax, x, y, invert, t, iter, phi, conn, debug); //call the gpu version of the ivote HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu } } #endif