Commit d8c97a2b80e8bfefa80c8cea42ede50d2077d301

Authored by David Mayerich
2 parents ef1f6218 daacc99c

Merge branch 'local_max' into 'master'

change the local max kernel to not threshold the output

See merge request !32
stim/cuda/ivote/local_max.cuh
... ... @@ -10,7 +10,7 @@ namespace stim{
10 10  
11 11 // this kernel calculates the local maximum for finding the cell centers
12 12 template<typename T>
13   - __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, T final_t, int conn, int x, int y){
  13 + __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){
14 14  
15 15 // calculate the 2D coordinates for this current thread.
16 16 int xi = blockIdx.x * blockDim.x + threadIdx.x;
... ... @@ -27,7 +27,7 @@ namespace stim{
27 27 T val = gpuVote[i];
28 28  
29 29 //compare to the threshold
30   - if(val < final_t) return;
  30 + //if(val < final_t) return;
31 31  
32 32 //define an array to store indices with same vote value
33 33 /*int * IdxEq;
... ... @@ -56,11 +56,12 @@ namespace stim{
56 56 return;
57 57 }
58 58 } */
59   - gpuCenters[i] = 1;
  59 + //gpuCenters[i] = 1;
  60 + gpuCenters[i] = gpuVote[i];
60 61 }
61 62  
62 63 template<typename T>
63   - void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
  64 + void gpu_local_max(T* gpuCenters, T* gpuVote, unsigned int conn, unsigned int x, unsigned int y){
64 65  
65 66 unsigned int max_threads = stim::maxThreadsPerBlock();
66 67 /*dim3 threads(max_threads, 1);
... ... @@ -69,11 +70,11 @@ namespace stim{
69 70 dim3 blocks(x/threads.x + 1, y/threads.y + 1);
70 71  
71 72 //call the kernel to find the local maximum.
72   - cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y);
  73 + cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, conn, x, y);
73 74 }
74 75  
75 76 template<typename T>
76   - void cpu_local_max(T* cpuCenters, T* cpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
  77 + void cpu_local_max(T* cpuCenters, T* cpuVote, unsigned int conn, unsigned int x, unsigned int y){
77 78  
78 79 //calculate the number of bytes in the array
79 80 unsigned int bytes = x * y * sizeof(T);
... ... @@ -90,7 +91,7 @@ namespace stim{
90 91 HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice));
91 92  
92 93 //call the GPU version of the local max function
93   - gpu_local_max<T>(gpuCenters, gpuVote, final_t, conn, x, y);
  94 + gpu_local_max<T>(gpuCenters, gpuVote, conn, x, y);
94 95  
95 96 //copy the cell centers data to the CPU
96 97 cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ;
... ...
stim/cuda/ivote/update_dir_bb.cuh
... ... @@ -40,8 +40,9 @@ namespace stim{
40 40 bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box
41 41  
42 42 int x_table = 2*rmax +1;
43   - int lut_i;
44 43 T rmax_sq = rmax * rmax;
  44 +
  45 + int lut_i;
45 46 T dx_sq, dy_sq;
46 47  
47 48 bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image
... ... @@ -49,11 +50,12 @@ namespace stim{
49 50  
50 51 int by, bx;
51 52 int dx, dy; //coordinate relative to (xi, yi)
  53 +
52 54 T v;
53 55 T max_v = 0; //initialize the maximum vote value to zero
54 56 T alpha;
55   - int max_dx = bb.low[0];
56   - int max_dy = bb.low[1];
  57 + int max_dx = bb.low[0] - xi;
  58 + int max_dy = bb.low[1] - yi;
57 59 for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box
58 60 dy = by - yi; //calculate the y coordinate of the current point relative to yi
59 61 dy_sq = dy * dy;
... ...