From 3f0e43cee10e36ee424753707555ba8e23b3273f Mon Sep 17 00:00:00 2001 From: laila Saadatifard Date: Fri, 8 Jul 2016 12:14:52 -0500 Subject: [PATCH] compute the local max without thresholding --- cpp/CMakeLists.txt | 2 ++ cpp/local_max3.cuh | 14 ++++++++------ cpp/local_max3_threshold.cuh | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cpp/main.cpp | 54 ++++++++++++++++++++++++++++-------------------------- 4 files changed, 133 insertions(+), 32 deletions(-) create mode 100644 cpp/local_max3_threshold.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d35d37b..76dc842 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -58,3 +58,5 @@ target_link_libraries(ivote3 configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY) + +configure_file(nissl-raw-data/nissl-float-128.128.128.vol ${CMAKE_CURRENT_BINARY_DIR}/nissl-float-128.128.128_at.vol @ONLY) \ No newline at end of file diff --git a/cpp/local_max3.cuh b/cpp/local_max3.cuh index c94f875..a13bd56 100644 --- a/cpp/local_max3.cuh +++ b/cpp/local_max3.cuh @@ -6,7 +6,7 @@ #include template -__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){ +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, int conn_x, int conn_y, int conn_z, int x, int y, int z){ //calculate x,y,z coordinates for this thread int xi = blockIdx.x * blockDim.x + threadIdx.x; @@ -25,9 +25,6 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int //initialize the max value to the vote value for this pixle. T lv_i = gpu_vote[i]; - //check the vote value is greater than threshold - if (lv_i < t) return; - for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){ for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){ @@ -38,12 +35,17 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int int i_l = zl * x * y + yl * x + xl; if (gpu_vote[i_l] > lv_i) return; + if (gpu_vote[i_l] == lv_i){ + if( i_l > i){ + return; + } + } } } } } - gpu_center[i] = 1; + gpu_center[i] = lv_i; } template @@ -55,7 +57,7 @@ void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsign dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); //call the kernel to find the local max - cuda_local_max3<<>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z); + cuda_local_max3<<>>(gpu_output, gpu_vote, conn[0], conn[1], conn[2], x, y, z); diff --git a/cpp/local_max3_threshold.cuh b/cpp/local_max3_threshold.cuh new file mode 100644 index 0000000..4bcd28c --- /dev/null +++ b/cpp/local_max3_threshold.cuh @@ -0,0 +1,95 @@ +#ifndef STIM_CUDA_LOCAL_MAX3_THRESHOLD_H +#define STIM_CUDA_LOCAL_MAX3_THRESHOLD_H + +#include +#include +#include + +template +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){ + + //calculate x,y,z coordinates for this thread + int xi = blockIdx.x * blockDim.x + threadIdx.x; + //find the grid size along y + int grid_y = y / blockDim.y; + int blockidx_y = blockIdx.y % grid_y; + int yi = blockidx_y * blockDim.y + threadIdx.y; + int zi = blockIdx.y / grid_y; + int i = zi * x * y + yi * x + xi; + + if (xi>=x || yi>=y || zi>=z) return; + + //initilize the center value for this pixel to zero + gpu_center[i] = 0; + + //initialize the max value to the vote value for this pixle. + T lv_i = gpu_vote[i]; + + //check the vote value is greater than threshold + if (lv_i < t) return; + + for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){ + + for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){ + + for (int zl = zi - conn_z ; zl <= zi + conn_z; zl++){ + + if (xl>=0 && yl>=0 && zl>=0 && xl lv_i) return; + if (gpu_vote[i_l] == lv_i){ + if( i_l > i){ + return; + } + } + } + } + } + } + + gpu_center[i] = 1; +} + +template +void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ + + //find the max number of threads per block. + unsigned int max_threads = stim::maxThreadsPerBlock(); + dim3 threads(sqrt (max_threads),sqrt (max_threads)); + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); + + //call the kernel to find the local max + cuda_local_max3<<>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z); + + + +} + +template +void cpu_local_max3(T* cpu_output, T* cpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ + + //calculate the number of bytes in the data set. + unsigned int bytes = x * y * z * sizeof(T); + + //allocate space on the gpu for the vote data and the output. + T* gpu_vote; + cudaMalloc(&gpu_vote, bytes); + + T* gpu_output; + cudaMalloc(&gpu_output, bytes); + + //copy the vote data to the gpu. + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice); + + //call the gpu version of local max function. + gpu_local_max3(gpu_output, gpu_vote, t, conn, x ,y, z); + + //copy the detected result to the cpu. + cudaMemcpy(cpu_output, gpu_output, bytes, cudaMemcpyDeviceToHost); + + cudaFree(gpu_vote); + cudaFree(gpu_output); +} + +#endif \ No newline at end of file diff --git a/cpp/main.cpp b/cpp/main.cpp index dbb6ae8..928d0b4 100644 --- a/cpp/main.cpp +++ b/cpp/main.cpp @@ -215,7 +215,7 @@ int main(int argc, char** argv){ unsigned int r[3] = { 12, rmax, rmax}; float std = 5; float sigma[3] = { std, std, std}; - unsigned int nlmax = 1; + unsigned int nlmax = 5; unsigned int conn[3] = { nlmax, nlmax, nlmax}; float phi_deg = 25.0; float phi = phi_deg * pi /180; @@ -269,31 +269,33 @@ int main(int argc, char** argv){ // creat a file for saving the list centers - //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj"); - //// set the number of detected cells to zero. - //int nod = 0; - //if (list.is_open()){ - - // for (int iz=0; iz0){ + nod++; + list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << "\t" << cpu_out[idx] << '\n' ; + + } + } + } + } + list << "p" << "\t"; + for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){ + list << i_nod << "\t"; + } + + list.close(); + } + + //} cudaDeviceReset(); -- libgit2 0.21.4