Blame view

cpp/vote3.cuh 3.78 KB
5c079506   Laila Saadatifard   upload the ivote ...
1
2
3
4
5
6
7
8
9
10
11
12
  #ifndef STIM_CUDA_VOTE3_H
  #define STIM_CUDA_VOTE3_H
  
  #include <iostream>
  #include <cuda.h>
  #include <stim/cuda/cudatools.h>
  #include <stim/cuda/sharedmem.cuh>
  #include <stim/cuda/cudatools/error.h>
  
  
  		// this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  		template<typename T>
02fb26b3   Laila Saadatifard   change the vote a...
13
  		__global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){
5c079506   Laila Saadatifard   upload the ivote ...
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  
  			//calculate x,y,z coordinates for this thread
  			int xi = blockIdx.x * blockDim.x + threadIdx.x;
  			//find the grid size along y
  			int grid_y = y / blockDim.y;
  			int blockidx_y = blockIdx.y % grid_y;
  			int yi = blockidx_y * blockDim.y + threadIdx.y;
  			int zi = blockIdx.y / grid_y;
  			int i = zi * x * y + yi * x + xi;			
  			
  			if(xi>=x || yi>=y || zi>=z) return;
  			
  			// define a local variable to sum the votes from the voters
  			float sum = 0;
  			
  			int rx_sq = rx * rx;
  			int ry_sq = ry * ry;
  			int rz_sq = rz * rz;
  
  			for (int z_v = -rz; z_v<=rz; z_v++){
  							
  				for(int y_v = -ry; y_v <= ry; y_v++){
  
  					for(int x_v = -rx; x_v <= rx; x_v++){
  
  						//calculate the x, y ,z indices for the current voter
  						int xi_v = (xi + x_v) ;
  						int yi_v = (yi + y_v) ;
  						int zi_v = (zi + z_v) ; 
  						if (zi_v >=0 && zi_v < z && yi_v >=0 && yi_v < y && xi_v >=0 && xi_v < x){
  														
  							//calculate the 1D index for the current voter
  							unsigned int id_v = (zi_v) * x * y + (yi_v) * x + (xi_v);
  
  							//find the gradient values along the x, y ,z axis, and the gradient magnitude for this voter
02fb26b3   Laila Saadatifard   change the vote a...
49
  							
5c079506   Laila Saadatifard   upload the ivote ...
50
51
52
  							float g_v_x = gpu_grad[id_v * 3 + 0];
  							float g_v_y = gpu_grad[id_v * 3 + 1];
  							float g_v_z = gpu_grad[id_v * 3 + 2];
02fb26b3   Laila Saadatifard   change the vote a...
53
  							float mag_v = sqrt( g_v_x * g_v_x + g_v_y * g_v_y + g_v_z * g_v_z);
5c079506   Laila Saadatifard   upload the ivote ...
54
55
56
57
58
59
60
  							//calculate the distance between the pixel and the current voter.
  							float x_sq = x_v * x_v;
  							float y_sq = y_v * y_v;
  							float z_sq = z_v * z_v;
  							float d_pv = sqrt(x_sq + y_sq + z_sq);
  
  							// calculate the angle between the pixel and the current voter.
02fb26b3   Laila Saadatifard   change the vote a...
61
  							float cos_diff = (g_v_x * (-x_v) + g_v_y * (-y_v) + g_v_z * (-z_v))/(d_pv * mag_v);
5c079506   Laila Saadatifard   upload the ivote ...
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  						
  							// check if the current voter is located in the voting area of this pixel.
  							if ((((x_sq)/rx_sq + (y_sq)/ry_sq + (z_sq)/rz_sq)<= 1) && (cos_diff >= cos_phi)){
  												
  								sum += mag_v;	
  							}
  						}
  					}	
  				}
  			}
  						
  			gpu_vote[i] = sum;			
  		}
  
  		template<typename T>
02fb26b3   Laila Saadatifard   change the vote a...
77
  		void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
5c079506   Laila Saadatifard   upload the ivote ...
78
  
02fb26b3   Laila Saadatifard   change the vote a...
79
  			
5c079506   Laila Saadatifard   upload the ivote ...
80
81
82
83
84
  			unsigned int max_threads = stim::maxThreadsPerBlock();
  			dim3 threads(sqrt (max_threads),sqrt (max_threads));
  			dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z);
  						
  			//call the kernel to do the voting
02fb26b3   Laila Saadatifard   change the vote a...
85
  			vote3 <T> <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z);
5c079506   Laila Saadatifard   upload the ivote ...
86
87
88
89
90
  
  		}
  
  
  		template<typename T>
02fb26b3   Laila Saadatifard   change the vote a...
91
  		void cpu_vote3(T* cpu_vote, T* cpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){
5c079506   Laila Saadatifard   upload the ivote ...
92
93
94
95
96
97
98
99
100
101
102
103
104
  
  			//calculate the number of bytes in the array
  			unsigned int bytes = x * y * z * sizeof(T);
  
  			
  			//allocate space on the GPU for the Vote Image
  			T* gpu_vote;
  			cudaMalloc(&gpu_vote, bytes);		
  
  			//allocate space on the GPU for the input Gradient image
  			T* gpu_grad;
  			cudaMalloc(&gpu_grad, bytes*3);
  			
5c079506   Laila Saadatifard   upload the ivote ...
105
  			
5c079506   Laila Saadatifard   upload the ivote ...
106
107
108
  			//copy the Gradient data to the GPU
  			cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);
  			
02fb26b3   Laila Saadatifard   change the vote a...
109
  					
5c079506   Laila Saadatifard   upload the ivote ...
110
  			//call the GPU version of the vote calculation function
02fb26b3   Laila Saadatifard   change the vote a...
111
  			gpu_vote3<T>(gpu_vote, gpu_grad, cos_phi, r, x , y, z);
5c079506   Laila Saadatifard   upload the ivote ...
112
113
114
115
116
117
118
  							
  			//copy the Vote Data back to the CPU
  			cudaMemcpy(cpu_vote, gpu_vote, bytes, cudaMemcpyDeviceToHost) ;
  
  			//free allocated memory
  			cudaFree(gpu_vote);
  			cudaFree(gpu_grad);
02fb26b3   Laila Saadatifard   change the vote a...
119
  			
5c079506   Laila Saadatifard   upload the ivote ...
120
121
122
123
  		}
  
  
  #endif