Commit 3f0e43cee10e36ee424753707555ba8e23b3273f

Authored by Laila Saadatifard
1 parent c986ebb7

compute the local max without thresholding

cpp/CMakeLists.txt
@@ -58,3 +58,5 @@ target_link_libraries(ivote3 @@ -58,3 +58,5 @@ target_link_libraries(ivote3
58 configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) 58 configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY)
59 configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) 59 configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY)
60 configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY) 60 configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY)
  61 +
  62 +configure_file(nissl-raw-data/nissl-float-128.128.128.vol ${CMAKE_CURRENT_BINARY_DIR}/nissl-float-128.128.128_at.vol @ONLY)
61 \ No newline at end of file 63 \ No newline at end of file
cpp/local_max3.cuh
@@ -6,7 +6,7 @@ @@ -6,7 +6,7 @@
6 #include <stim/cuda/cudatools.h> 6 #include <stim/cuda/cudatools.h>
7 7
8 template<typename T> 8 template<typename T>
9 -__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){ 9 +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, int conn_x, int conn_y, int conn_z, int x, int y, int z){
10 10
11 //calculate x,y,z coordinates for this thread 11 //calculate x,y,z coordinates for this thread
12 int xi = blockIdx.x * blockDim.x + threadIdx.x; 12 int xi = blockIdx.x * blockDim.x + threadIdx.x;
@@ -25,9 +25,6 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int @@ -25,9 +25,6 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int
25 //initialize the max value to the vote value for this pixle. 25 //initialize the max value to the vote value for this pixle.
26 T lv_i = gpu_vote[i]; 26 T lv_i = gpu_vote[i];
27 27
28 - //check the vote value is greater than threshold  
29 - if (lv_i < t) return;  
30 -  
31 for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){ 28 for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){
32 29
33 for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){ 30 for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){
@@ -38,12 +35,17 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int @@ -38,12 +35,17 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int
38 35
39 int i_l = zl * x * y + yl * x + xl; 36 int i_l = zl * x * y + yl * x + xl;
40 if (gpu_vote[i_l] > lv_i) return; 37 if (gpu_vote[i_l] > lv_i) return;
  38 + if (gpu_vote[i_l] == lv_i){
  39 + if( i_l > i){
  40 + return;
  41 + }
  42 + }
41 } 43 }
42 } 44 }
43 } 45 }
44 } 46 }
45 47
46 - gpu_center[i] = 1; 48 + gpu_center[i] = lv_i;
47 } 49 }
48 50
49 template<typename T> 51 template<typename T>
@@ -55,7 +57,7 @@ void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsign @@ -55,7 +57,7 @@ void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsign
55 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); 57 dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
56 58
57 //call the kernel to find the local max 59 //call the kernel to find the local max
58 - cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z); 60 + cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, conn[0], conn[1], conn[2], x, y, z);
59 61
60 62
61 63
cpp/local_max3_threshold.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_LOCAL_MAX3_THRESHOLD_H
  2 +#define STIM_CUDA_LOCAL_MAX3_THRESHOLD_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +
  8 +template<typename T>
  9 +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){
  10 +
  11 + //calculate x,y,z coordinates for this thread
  12 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  13 + //find the grid size along y
  14 + int grid_y = y / blockDim.y;
  15 + int blockidx_y = blockIdx.y % grid_y;
  16 + int yi = blockidx_y * blockDim.y + threadIdx.y;
  17 + int zi = blockIdx.y / grid_y;
  18 + int i = zi * x * y + yi * x + xi;
  19 +
  20 + if (xi>=x || yi>=y || zi>=z) return;
  21 +
  22 + //initilize the center value for this pixel to zero
  23 + gpu_center[i] = 0;
  24 +
  25 + //initialize the max value to the vote value for this pixle.
  26 + T lv_i = gpu_vote[i];
  27 +
  28 + //check the vote value is greater than threshold
  29 + if (lv_i < t) return;
  30 +
  31 + for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){
  32 +
  33 + for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){
  34 +
  35 + for (int zl = zi - conn_z ; zl <= zi + conn_z; zl++){
  36 +
  37 + if (xl>=0 && yl>=0 && zl>=0 && xl<x && yl<y && zl<z){
  38 +
  39 + int i_l = zl * x * y + yl * x + xl;
  40 + if (gpu_vote[i_l] > lv_i) return;
  41 + if (gpu_vote[i_l] == lv_i){
  42 + if( i_l > i){
  43 + return;
  44 + }
  45 + }
  46 + }
  47 + }
  48 + }
  49 + }
  50 +
  51 + gpu_center[i] = 1;
  52 +}
  53 +
  54 +template<typename T>
  55 +void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
  56 +
  57 + //find the max number of threads per block.
  58 + unsigned int max_threads = stim::maxThreadsPerBlock();
  59 + dim3 threads(sqrt (max_threads),sqrt (max_threads));
  60 + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  61 +
  62 + //call the kernel to find the local max
  63 + cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z);
  64 +
  65 +
  66 +
  67 +}
  68 +
  69 +template<typename T>
  70 +void cpu_local_max3(T* cpu_output, T* cpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
  71 +
  72 + //calculate the number of bytes in the data set.
  73 + unsigned int bytes = x * y * z * sizeof(T);
  74 +
  75 + //allocate space on the gpu for the vote data and the output.
  76 + T* gpu_vote;
  77 + cudaMalloc(&gpu_vote, bytes);
  78 +
  79 + T* gpu_output;
  80 + cudaMalloc(&gpu_output, bytes);
  81 +
  82 + //copy the vote data to the gpu.
  83 + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice);
  84 +
  85 + //call the gpu version of local max function.
  86 + gpu_local_max3<T>(gpu_output, gpu_vote, t, conn, x ,y, z);
  87 +
  88 + //copy the detected result to the cpu.
  89 + cudaMemcpy(cpu_output, gpu_output, bytes, cudaMemcpyDeviceToHost);
  90 +
  91 + cudaFree(gpu_vote);
  92 + cudaFree(gpu_output);
  93 +}
  94 +
  95 +#endif
0 \ No newline at end of file 96 \ No newline at end of file
@@ -215,7 +215,7 @@ int main(int argc, char** argv){ @@ -215,7 +215,7 @@ int main(int argc, char** argv){
215 unsigned int r[3] = { 12, rmax, rmax}; 215 unsigned int r[3] = { 12, rmax, rmax};
216 float std = 5; 216 float std = 5;
217 float sigma[3] = { std, std, std}; 217 float sigma[3] = { std, std, std};
218 - unsigned int nlmax = 1; 218 + unsigned int nlmax = 5;
219 unsigned int conn[3] = { nlmax, nlmax, nlmax}; 219 unsigned int conn[3] = { nlmax, nlmax, nlmax};
220 float phi_deg = 25.0; 220 float phi_deg = 25.0;
221 float phi = phi_deg * pi /180; 221 float phi = phi_deg * pi /180;
@@ -269,31 +269,33 @@ int main(int argc, char** argv){ @@ -269,31 +269,33 @@ int main(int argc, char** argv){
269 269
270 // creat a file for saving the list centers 270 // creat a file for saving the list centers
271 271
272 - //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj");  
273 - //// set the number of detected cells to zero.  
274 - //int nod = 0;  
275 - //if (list.is_open()){  
276 -  
277 - // for (int iz=0; iz<z; iz++){  
278 - // for (int iy=0; iy<y; iy++){  
279 - // for (int ix=0; ix<x; ix++){  
280 -  
281 - // int idx = iz * x * y + iy * x + ix;  
282 - // if (cpu_out[idx]==1){  
283 - // nod++;  
284 - // list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ;  
285 - //  
286 - // }  
287 - // }  
288 - // }  
289 - // }  
290 - // list << "p" << "\t";  
291 - // for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){  
292 - // list << i_nod << "\t";  
293 - // }  
294 -  
295 - //list.close();  
296 - //} 272 + std::ofstream list(OutName.str()+std::to_string(t0)+".obj");
  273 + // set the number of detected cells to zero.
  274 + int nod = 0;
  275 + if (list.is_open()){
  276 +
  277 + for (int iz=0; iz<z; iz++){
  278 + for (int iy=0; iy<y; iy++){
  279 + for (int ix=0; ix<x; ix++){
  280 +
  281 + int idx = iz * x * y + iy * x + ix;
  282 + if (cpu_out[idx]>0){
  283 + nod++;
  284 + list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << "\t" << cpu_out[idx] << '\n' ;
  285 +
  286 + }
  287 + }
  288 + }
  289 + }
  290 + list << "p" << "\t";
  291 + for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){
  292 + list << i_nod << "\t";
  293 + }
  294 +
  295 + list.close();
  296 + }
  297 +
  298 +
297 //} 299 //}
298 cudaDeviceReset(); 300 cudaDeviceReset();
299 301