Commit 3f0e43cee10e36ee424753707555ba8e23b3273f
1 parent
c986ebb7
compute the local max without thresholding
Showing
4 changed files
with
133 additions
and
32 deletions
Show diff stats
cpp/CMakeLists.txt
@@ -58,3 +58,5 @@ target_link_libraries(ivote3 | @@ -58,3 +58,5 @@ target_link_libraries(ivote3 | ||
58 | configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) | 58 | configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) |
59 | configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) | 59 | configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) |
60 | configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY) | 60 | configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY) |
61 | + | ||
62 | +configure_file(nissl-raw-data/nissl-float-128.128.128.vol ${CMAKE_CURRENT_BINARY_DIR}/nissl-float-128.128.128_at.vol @ONLY) | ||
61 | \ No newline at end of file | 63 | \ No newline at end of file |
cpp/local_max3.cuh
@@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
6 | #include <stim/cuda/cudatools.h> | 6 | #include <stim/cuda/cudatools.h> |
7 | 7 | ||
8 | template<typename T> | 8 | template<typename T> |
9 | -__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){ | 9 | +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, int conn_x, int conn_y, int conn_z, int x, int y, int z){ |
10 | 10 | ||
11 | //calculate x,y,z coordinates for this thread | 11 | //calculate x,y,z coordinates for this thread |
12 | int xi = blockIdx.x * blockDim.x + threadIdx.x; | 12 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
@@ -25,9 +25,6 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int | @@ -25,9 +25,6 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int | ||
25 | //initialize the max value to the vote value for this pixle. | 25 | //initialize the max value to the vote value for this pixle. |
26 | T lv_i = gpu_vote[i]; | 26 | T lv_i = gpu_vote[i]; |
27 | 27 | ||
28 | - //check the vote value is greater than threshold | ||
29 | - if (lv_i < t) return; | ||
30 | - | ||
31 | for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){ | 28 | for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){ |
32 | 29 | ||
33 | for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){ | 30 | for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){ |
@@ -38,12 +35,17 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int | @@ -38,12 +35,17 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int | ||
38 | 35 | ||
39 | int i_l = zl * x * y + yl * x + xl; | 36 | int i_l = zl * x * y + yl * x + xl; |
40 | if (gpu_vote[i_l] > lv_i) return; | 37 | if (gpu_vote[i_l] > lv_i) return; |
38 | + if (gpu_vote[i_l] == lv_i){ | ||
39 | + if( i_l > i){ | ||
40 | + return; | ||
41 | + } | ||
42 | + } | ||
41 | } | 43 | } |
42 | } | 44 | } |
43 | } | 45 | } |
44 | } | 46 | } |
45 | 47 | ||
46 | - gpu_center[i] = 1; | 48 | + gpu_center[i] = lv_i; |
47 | } | 49 | } |
48 | 50 | ||
49 | template<typename T> | 51 | template<typename T> |
@@ -55,7 +57,7 @@ void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsign | @@ -55,7 +57,7 @@ void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsign | ||
55 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | 57 | dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); |
56 | 58 | ||
57 | //call the kernel to find the local max | 59 | //call the kernel to find the local max |
58 | - cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z); | 60 | + cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, conn[0], conn[1], conn[2], x, y, z); |
59 | 61 | ||
60 | 62 | ||
61 | 63 |
1 | +#ifndef STIM_CUDA_LOCAL_MAX3_THRESHOLD_H | ||
2 | +#define STIM_CUDA_LOCAL_MAX3_THRESHOLD_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | + | ||
8 | +template<typename T> | ||
9 | +__global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int conn_y, int conn_z, int x, int y, int z){ | ||
10 | + | ||
11 | + //calculate x,y,z coordinates for this thread | ||
12 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
13 | + //find the grid size along y | ||
14 | + int grid_y = y / blockDim.y; | ||
15 | + int blockidx_y = blockIdx.y % grid_y; | ||
16 | + int yi = blockidx_y * blockDim.y + threadIdx.y; | ||
17 | + int zi = blockIdx.y / grid_y; | ||
18 | + int i = zi * x * y + yi * x + xi; | ||
19 | + | ||
20 | + if (xi>=x || yi>=y || zi>=z) return; | ||
21 | + | ||
22 | + //initilize the center value for this pixel to zero | ||
23 | + gpu_center[i] = 0; | ||
24 | + | ||
25 | + //initialize the max value to the vote value for this pixle. | ||
26 | + T lv_i = gpu_vote[i]; | ||
27 | + | ||
28 | + //check the vote value is greater than threshold | ||
29 | + if (lv_i < t) return; | ||
30 | + | ||
31 | + for (int xl = xi - conn_x; xl <= xi + conn_x; xl++){ | ||
32 | + | ||
33 | + for (int yl = yi - conn_y; yl <= yi + conn_y; yl++){ | ||
34 | + | ||
35 | + for (int zl = zi - conn_z ; zl <= zi + conn_z; zl++){ | ||
36 | + | ||
37 | + if (xl>=0 && yl>=0 && zl>=0 && xl<x && yl<y && zl<z){ | ||
38 | + | ||
39 | + int i_l = zl * x * y + yl * x + xl; | ||
40 | + if (gpu_vote[i_l] > lv_i) return; | ||
41 | + if (gpu_vote[i_l] == lv_i){ | ||
42 | + if( i_l > i){ | ||
43 | + return; | ||
44 | + } | ||
45 | + } | ||
46 | + } | ||
47 | + } | ||
48 | + } | ||
49 | + } | ||
50 | + | ||
51 | + gpu_center[i] = 1; | ||
52 | +} | ||
53 | + | ||
54 | +template<typename T> | ||
55 | +void gpu_local_max3(T* gpu_output, T* gpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ | ||
56 | + | ||
57 | + //find the max number of threads per block. | ||
58 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
59 | + dim3 threads(sqrt (max_threads),sqrt (max_threads)); | ||
60 | + dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); | ||
61 | + | ||
62 | + //call the kernel to find the local max | ||
63 | + cuda_local_max3<T><<<blocks, threads>>>(gpu_output, gpu_vote, t, conn[0], conn[1], conn[2], x, y, z); | ||
64 | + | ||
65 | + | ||
66 | + | ||
67 | +} | ||
68 | + | ||
69 | +template<typename T> | ||
70 | +void cpu_local_max3(T* cpu_output, T* cpu_vote, T t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ | ||
71 | + | ||
72 | + //calculate the number of bytes in the data set. | ||
73 | + unsigned int bytes = x * y * z * sizeof(T); | ||
74 | + | ||
75 | + //allocate space on the gpu for the vote data and the output. | ||
76 | + T* gpu_vote; | ||
77 | + cudaMalloc(&gpu_vote, bytes); | ||
78 | + | ||
79 | + T* gpu_output; | ||
80 | + cudaMalloc(&gpu_output, bytes); | ||
81 | + | ||
82 | + //copy the vote data to the gpu. | ||
83 | + cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice); | ||
84 | + | ||
85 | + //call the gpu version of local max function. | ||
86 | + gpu_local_max3<T>(gpu_output, gpu_vote, t, conn, x ,y, z); | ||
87 | + | ||
88 | + //copy the detected result to the cpu. | ||
89 | + cudaMemcpy(cpu_output, gpu_output, bytes, cudaMemcpyDeviceToHost); | ||
90 | + | ||
91 | + cudaFree(gpu_vote); | ||
92 | + cudaFree(gpu_output); | ||
93 | +} | ||
94 | + | ||
95 | +#endif | ||
0 | \ No newline at end of file | 96 | \ No newline at end of file |
cpp/main.cpp
@@ -215,7 +215,7 @@ int main(int argc, char** argv){ | @@ -215,7 +215,7 @@ int main(int argc, char** argv){ | ||
215 | unsigned int r[3] = { 12, rmax, rmax}; | 215 | unsigned int r[3] = { 12, rmax, rmax}; |
216 | float std = 5; | 216 | float std = 5; |
217 | float sigma[3] = { std, std, std}; | 217 | float sigma[3] = { std, std, std}; |
218 | - unsigned int nlmax = 1; | 218 | + unsigned int nlmax = 5; |
219 | unsigned int conn[3] = { nlmax, nlmax, nlmax}; | 219 | unsigned int conn[3] = { nlmax, nlmax, nlmax}; |
220 | float phi_deg = 25.0; | 220 | float phi_deg = 25.0; |
221 | float phi = phi_deg * pi /180; | 221 | float phi = phi_deg * pi /180; |
@@ -269,31 +269,33 @@ int main(int argc, char** argv){ | @@ -269,31 +269,33 @@ int main(int argc, char** argv){ | ||
269 | 269 | ||
270 | // creat a file for saving the list centers | 270 | // creat a file for saving the list centers |
271 | 271 | ||
272 | - //std::ofstream list("shared2D-v8/" + OutName.str()+std::to_string(t0)+".obj"); | ||
273 | - //// set the number of detected cells to zero. | ||
274 | - //int nod = 0; | ||
275 | - //if (list.is_open()){ | ||
276 | - | ||
277 | - // for (int iz=0; iz<z; iz++){ | ||
278 | - // for (int iy=0; iy<y; iy++){ | ||
279 | - // for (int ix=0; ix<x; ix++){ | ||
280 | - | ||
281 | - // int idx = iz * x * y + iy * x + ix; | ||
282 | - // if (cpu_out[idx]==1){ | ||
283 | - // nod++; | ||
284 | - // list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << '\n' ; | ||
285 | - // | ||
286 | - // } | ||
287 | - // } | ||
288 | - // } | ||
289 | - // } | ||
290 | - // list << "p" << "\t"; | ||
291 | - // for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){ | ||
292 | - // list << i_nod << "\t"; | ||
293 | - // } | ||
294 | - | ||
295 | - //list.close(); | ||
296 | - //} | 272 | + std::ofstream list(OutName.str()+std::to_string(t0)+".obj"); |
273 | + // set the number of detected cells to zero. | ||
274 | + int nod = 0; | ||
275 | + if (list.is_open()){ | ||
276 | + | ||
277 | + for (int iz=0; iz<z; iz++){ | ||
278 | + for (int iy=0; iy<y; iy++){ | ||
279 | + for (int ix=0; ix<x; ix++){ | ||
280 | + | ||
281 | + int idx = iz * x * y + iy * x + ix; | ||
282 | + if (cpu_out[idx]>0){ | ||
283 | + nod++; | ||
284 | + list << "v" << "\t" << ix << "\t" << iy << "\t"<< iz << "\t" << cpu_out[idx] << '\n' ; | ||
285 | + | ||
286 | + } | ||
287 | + } | ||
288 | + } | ||
289 | + } | ||
290 | + list << "p" << "\t"; | ||
291 | + for (unsigned int i_nod =1 ; i_nod <=nod; i_nod++){ | ||
292 | + list << i_nod << "\t"; | ||
293 | + } | ||
294 | + | ||
295 | + list.close(); | ||
296 | + } | ||
297 | + | ||
298 | + | ||
297 | //} | 299 | //} |
298 | cudaDeviceReset(); | 300 | cudaDeviceReset(); |
299 | 301 |