Commit 3f0e43cee10e36ee424753707555ba8e23b3273f

Authored by Laila Saadatifard
1 parent c986ebb7

compute the local max without thresholding

cpp/CMakeLists.txt
... ... @@ -58,3 +58,5 @@ target_link_libraries(ivote3
58 58 configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY)
59 59 configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY)
60 60 configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY)
  61 +
  62 +configure_file(nissl-raw-data/nissl-float-128.128.128.vol ${CMAKE_CURRENT_BINARY_DIR}/nissl-float-128.128.128_at.vol @ONLY)
61 63 \ No newline at end of file
... ...
cpp/local_max3.cuh
... ... @@ -6,7 +6,7 @@
6 6 #include <stim/cuda/cudatools.h>
7 7  
8 8 template<typename T>
9   -__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){
  9 +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, int conn_x, int conn_y, int conn_z, int x, int y, int z){
10 10  
11 11 //calculate x,y,z coordinates for this thread
12 12 int xi = blockIdx.x * blockDim.x + threadIdx.x;
... ... @@ -25,9 +25,6 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int
25 25 //initialize the max value to the vote value for this pixle.
26 26 T lv_i = gpu_vote[i];
27 27  
28   - //check the vote value is greater than threshold
29   - if (lv_i < t) return;
30   -
31 28 for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){
32 29  
33 30 for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){
... ... @@ -38,12 +35,17 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int
38 35  
39 36 int i_l = zl * x * y + yl * x + xl;
40 37 if (gpu_vote[i_l] > lv_i) return;
  38 + if (gpu_vote[i_l] == lv_i){
  39 + if( i_l > i){
  40 + return;
  41 + }
  42 + }
41 43 }
42 44 }
43 45 }
44 46 }
45 47  
46   - gpu_center[i] = 1;
  48 + gpu_center[i] = lv_i;
47 49 }
48 50  
49 51 template<typename T>
... ... @@ -55,7 +57,7 @@ void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsign
55 57 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
56 58  
57 59 //call the kernel to find the local max
58   - cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z);
  60 + cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, conn[0], conn[1], conn[2], x, y, z);
59 61  
60 62  
61 63  
... ...
cpp/local_max3_threshold.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_LOCAL_MAX3_THRESHOLD_H
  2 +#define STIM_CUDA_LOCAL_MAX3_THRESHOLD_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +
  8 +template<typename T>
  9 +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){
  10 +
  11 + //calculate x,y,z coordinates for this thread
  12 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  13 + //find the grid size along y
  14 + int grid_y = y / blockDim.y;
  15 + int blockidx_y = blockIdx.y % grid_y;
  16 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  17 + int zi = blockIdx.y / grid_y;
  18 + int i = zi * x * y + yi * x + xi;
  19 +
  20 + if (xi>=x || yi>=y || zi>=z) return;
  21 +
  22 + //initilize the center value for this pixel to zero
  23 + gpu_center[i] = 0;
  24 +
  25 + //initialize the max value to the vote value for this pixle.
  26 + T lv_i = gpu_vote[i];
  27 +
  28 + //check the vote value is greater than threshold
  29 + if (lv_i < t) return;
  30 +
  31 + for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){
  32 +
  33 + for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){
  34 +
  35 + for (int zl = zi - conn_z ; zl <= zi + conn_z; zl++){
  36 +
  37 + if (xl>=0 && yl>=0 && zl>=0 && xl<x && yl<y && zl<z){
  38 +
  39 + int i_l = zl * x * y + yl * x + xl;
  40 + if (gpu_vote[i_l] > lv_i) return;
  41 + if (gpu_vote[i_l] == lv_i){
  42 + if( i_l > i){
  43 + return;
  44 + }
  45 + }
  46 + }
  47 + }
  48 + }
  49 + }
  50 +
  51 + gpu_center[i] = 1;
  52 +}
  53 +
  54 +template<typename T>
  55 +void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
  56 +
  57 + //find the max number of threads per block.
  58 + unsigned int max_threads = stim::maxThreadsPerBlock();
  59 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  60 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  61 +
  62 + //call the kernel to find the local max
  63 + cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z);
  64 +
  65 +
  66 +
  67 +}
  68 +
  69 +template<typename T>
  70 +void cpu_local_max3(T* cpu_output, T* cpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
  71 +
  72 + //calculate the number of bytes in the data set.
  73 + unsigned int bytes = x * y * z * sizeof(T);
  74 +
  75 + //allocate space on the gpu for the vote data and the output.
  76 + T* gpu_vote;
  77 + cudaMalloc(&gpu_vote, bytes);
  78 +
  79 + T* gpu_output;
  80 + cudaMalloc(&gpu_output, bytes);
  81 +
  82 + //copy the vote data to the gpu.
  83 + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice);
  84 +
  85 + //call the gpu version of local max function.
  86 + gpu_local_max3<T>(gpu_output, gpu_vote, t, conn, x ,y, z);
  87 +
  88 + //copy the detected result to the cpu.
  89 + cudaMemcpy(cpu_output, gpu_output, bytes, cudaMemcpyDeviceToHost);
  90 +
  91 + cudaFree(gpu_vote);
  92 + cudaFree(gpu_output);
  93 +}
  94 +
  95 +#endif
0 96 \ No newline at end of file
... ...
cpp/main.cpp
... ... @@ -215,7 +215,7 @@ int main(int argc, char** argv){
215 215 unsigned int r[3] = { 12, rmax, rmax};
216 216 float std = 5;
217 217 float sigma[3] = { std, std, std};
218   - unsigned int nlmax = 1;
  218 + unsigned int nlmax = 5;
219 219 unsigned int conn[3] = { nlmax, nlmax, nlmax};
220 220 float phi_deg = 25.0;
221 221 float phi = phi_deg * pi /180;
... ... @@ -269,31 +269,33 @@ int main(int argc, char** argv){
269 269  
270 270 // creat a file for saving the list centers
271 271  
272   - //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj");
273   - //// set the number of detected cells to zero.
274   - //int nod = 0;
275   - //if (list.is_open()){
276   -
277   - // for (int iz=0; iz<z; iz++){
278   - // for (int iy=0; iy<y; iy++){
279   - // for (int ix=0; ix<x; ix++){
280   -
281   - // int idx = iz * x * y + iy * x + ix;
282   - // if (cpu_out[idx]==1){
283   - // nod++;
284   - // list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ;
285   - //
286   - // }
287   - // }
288   - // }
289   - // }
290   - // list << "p" << "\t";
291   - // for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){
292   - // list << i_nod << "\t";
293   - // }
294   -
295   - //list.close();
296   - //}
  272 + std::ofstream list(OutName.str()+std::to_string(t0)+".obj");
  273 + // set the number of detected cells to zero.
  274 + int nod = 0;
  275 + if (list.is_open()){
  276 +
  277 + for (int iz=0; iz<z; iz++){
  278 + for (int iy=0; iy<y; iy++){
  279 + for (int ix=0; ix<x; ix++){
  280 +
  281 + int idx = iz * x * y + iy * x + ix;
  282 + if (cpu_out[idx]>0){
  283 + nod++;
  284 + list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << "\t" << cpu_out[idx] << '\n' ;
  285 +
  286 + }
  287 + }
  288 + }
  289 + }
  290 + list << "p" << "\t";
  291 + for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){
  292 + list << i_nod << "\t";
  293 + }
  294 +
  295 + list.close();
  296 + }
  297 +
  298 +
297 299 //}
298 300 cudaDeviceReset();
299 301  
... ...