ivote2.cuh 7.32 KB
``````#ifndef STIM_IVOTE2_CUH
#define STIM_IVOTE2_CUH

#include <iostream>
#include <fstream>
#include <stim/cuda/cudatools/error.h>
#include <stim/cuda/arraymath.cuh>
#include <stim/iVote/ivote2/iter_vote2.cuh>
#include <stim/iVote/ivote2/local_max.cuh>
#include <stim/math/constants.h>
#include <stim/math/vector.h>
#include <stim/visualization/colormap.h>

namespace stim {
// this function precomputes the atan2 values
template<typename T>
void atan_2(T* cpuTable, unsigned int rmax) {
int xsize = 2 * rmax + 1;						//initialize the width and height of the window which atan2 are computed in.
int ysize = 2 * rmax + 1;
int yi = rmax;									// assign the center coordinates of the atan2 window to yi and xi
int xi = rmax;
for (int xt = 0; xt < xsize; xt++) {			//for each element in the atan2 table
for (int yt = 0; yt < ysize; yt++) {
int id = yt * xsize + xt;				//convert the current 2D coordinates to 1D
int xd = xi - xt;						// calculate the distance between the pixel and the center of the atan2 window
int yd = yi - yt;
T atan_2d = atan2((T)yd, (T)xd);	// calculate the angle between the pixel and the center of the atan2 window and store the result.
cpuTable[id] = atan_2d;
}
}
}

//this kernel invert the 2D image
template<typename T>
__global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
// calculate the 2D coordinates for this current thread.
size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
size_t yi = blockIdx.y * blockDim.y + threadIdx.y;

if (xi >= x || yi >= y) return;
size_t i = yi * x + xi;					// convert 2D coordinates to 1D
gpuI[i] = 255 - gpuI[i];				//invert the pixel intensity
}

//this function calculate the threshold using OTSU method
template<typename T>
T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
T Imax = pts[0];				//initialize the maximum value to the first one
T Imin = pts[0];				//initialize the maximum value to the first on

for (size_t n = 0; n < pixels; n++) {		//for every value
if (pts[n] > Imax) {			//if the value is higher than the current max
Imax = pts[n];
}
}
for (size_t n = 0; n< pixels; n++) {		//for every value
if (pts[n] < Imin) {			//if the value is higher than the current max
Imin = pts[n];
}
}

T th_step = ((Imax - Imin) / th_num);
vector<T> var_b;
for (unsigned int t0 = 0; t0 < th_num; t0++) {
T th = t0 * th_step + Imin;
unsigned int n_b(0), n_o(0);		//these variables save the number of elements that are below and over the threshold
T m_b(0), m_o(0);				//these variables save the mean value for each cluster
for (unsigned int idx = 0; idx < pixels; idx++) {
if (pts[idx] <= th) {
m_b += pts[idx];
n_b += 1;
}
else {
m_o += pts[idx];
n_o += 1;
}
}

m_b = m_b / n_b;		//calculate the mean value for the below threshold cluster
m_o = m_o / n_o;		//calculate the mean value for the over threshold cluster

var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
}

vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end());	//finding maximum elements in the vector
size_t th_idx = std::distance(var_b.begin(), max_var);
T threshold = Imin + (T)(th_idx * th_step);
return threshold;
}

//this function performs the 2D iterative voting algorithm on the image stored in the gpu
template<typename T>
void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {

size_t pixels = x * y;				//compute the size of input image
//
if (invert) {						//if inversion is required call the kernel to invert the image
dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
cuda_invert << <blocks, threads >> > (gpuI, x, y);
}
//
size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T));				// create the atan2 table
T* cpuTable = (T*)malloc(table_bytes);											//assign memory on the cpu for atan2 table
atan_2<T>(cpuTable, rmax);														//call the function to precompute the atan2 table
T* gpuTable;  HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice));	//copy atan2 table to the gpu

size_t bytes = pixels* sizeof(T);													//calculate the bytes of the input
float dphi = phi / iter;															//change in phi for each iteration

float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes));										//allocate space to store the vote image

for (int i = 0; i < iter; i++) {														//for each iteration
cudaMemset(gpuVote, 0, bytes);													//reset the vote image to 0
stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug);		//perform voting
stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug);	//update the voter directions
phi = phi - dphi;																//decrement phi
}
stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y);				//calculate the local maxima

T* pts = (T*)malloc(bytes);													//allocate memory on the cpu to store the output of iterative voting
HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost));			//copy the output from gpu to the cpu memory

T threshold;
if (t == 0) threshold = stim::th_otsu<T>(pts, pixels);	//if threshold value is not set call the function to compute the threshold
else threshold = t;

std::ofstream output;		//save the thresholded detected seeds in a text file
output.open(outname_txt);
output << "X" << " " << "Y" << " " << "threshold" << "\n";
size_t ind;
for (size_t ix = 0; ix < x; ix++) {
for (size_t iy = 0; iy < y; iy++) {
ind = iy * x + ix;
if (pts[ind] > threshold) {
output << ix << " " << iy << " " << pts[ind] << "\n";
pts[ind] = 1;
}
else pts[ind] = 0;
}
}
output.close();

HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice));		//copy the points to the gpu
stim::cpu2image(pts, outname_img, x, y); //output the image

}

template<typename T>
void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {
size_t bytes = x*y * sizeof(T);
T* gpuI;						//allocate space on the gpu to save the input image
HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice));		//copy the image to the gpu
stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn, debug);				//call the gpu version of the ivote
HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost));		//copy the output to the cpu
}
}
#endif``````