update_dir3_aabb.cuh 6.88 KB
#ifndef STIM_CUDA_UPDATE_DIR3_AABB_H
#define STIM_CUDA_UPDATE_DIR3_AABB_H

# include <iostream>
# include <cuda.h>
#include <stim/cuda/cudatools.h>
#include "cpyToshare.cuh"
#define M_PI	3.14159
#include <stim/math/circle.h>
#include <stim/math/vec3.h>
#include <stim/math/plane.h>
#include <stim/math/vector.h>
#include <stim/visualization/aabb3.h>

		// this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
		template<typename T>
		__global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T phi, T cos_phi, int rx, int ry, int rz,  int x,  int y, int z){
			
			int xi = blockIdx.x * blockDim.x + threadIdx.x;			//calculate x,y,z coordinates for this thread
			
			int grid_y = y / blockDim.y;						//find the grid size along y
			int blockidx_y = blockIdx.y % grid_y;
			int yi = blockidx_y * blockDim.y + threadIdx.y;
			int zi = blockIdx.y / grid_y;
			if(xi >= x|| yi >= y || zi>= z) return;
			int i = zi * x * y + yi * x + xi;				//compute the global 1D index for this pixel

			float rx_sq = rx * rx;        // compute the square for rmax 
			float ry_sq = ry * ry;
			float rz_sq = rz * rz;
			
			stim::vec3<float> g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]);   // form a vec3 variable for the gradient vector
			stim::vec3<float> g_sph = g.cart2sph();			//convert cartesian coordinate to spherical for the gradient vector
			float n =8;										//set the number of points to find the boundaries of the conical voting area
			float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ;			//calculate the center point of the surface of the voting area for the voter
			float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ;
			float zc = rz * cos(g_sph[2]) ;
			float r = sqrt(xc*xc + yc*yc + zc*zc);
			xc+=xi;
			yc+=yi;
			zc+=zi;
			stim::vec3<float> center(xc,yc,zc);

			float d = 2 * r * tan(phi);		//find the diameter of the conical voting area
			stim::vec3<float> norm = g.norm();			//compute the normalize gradient vector
			float step = 360.0/n;
			stim::circle<float>  cir(center, d, norm);
			stim::aabb3<int> bb(xi,yi,zi);
			bb.insert((int) xc, (int)yc, (int)zc);
			for(float j = 0; j <360.0; j += step){
				stim::vec3<float> out = cir.p(j);
				bb.insert(out[0], out[1], out[2]);
			}
			bb.trim_low(xi-rx, yi-ry, zi-rz);
			bb.trim_low(0,0,0);
			bb.trim_high(xi+rx, yi+ry, zi+rz);
			bb.trim_high(x-1, y-1, z-1);
			int bx,by,bz;
			int dx, dy, dz;
			float dx_sq, dy_sq, dz_sq;
			float dist, cos_diff;
			int idx_c;

			float max = 0;					// define a local variable to maximum value of the vote image in the voting area for this voter
			float l_vote = 0;
			
			float id_x = g[0];				// define local variables for the x, y, and z coordinations point to the vote direction
			float id_y = g[1];
			float id_z = g[2];
			
			for (bz=bb.low[2]; bz<=bb.high[2]; bz++){
				dz = bz - zi;							//compute the distance bw the voter and the current counter along z axis
				dz_sq = dz * dz;
				for (by=bb.low[1]; by<=bb.high[1]; by++){
					dy = by - yi;								//compute the distance bw the voter and the current counter along y axis
					dy_sq = dy * dy;
					for (bx=bb.low[0]; bx<=bb.high[0]; bx++){
						dx = bx - xi;								//compute the distance bw the voter and the current counter along x axis
						dx_sq = dx * dx;

						dist = sqrt(dx_sq + dy_sq + dz_sq);			//calculate the distance between the voter and the current counter
						cos_diff = (norm[0] * dx + norm[1] * dy +  norm[2] * dz)/dist;			 // calculate the cosine of angle between the voter and the current counter
						if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){			//check if the current counter located in the voting area of the voter    
							idx_c = (bz* y + by) * x + bx;			//calculate the 1D index for the current counter
							l_vote = gpu_vote[idx_c];
							if  (l_vote>max) {
								max = l_vote;
								id_x = (float)dx;
								id_y = (float)dy;
								id_z = (float)dz;
							}
						}								
					}						
				}				
			}
			float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z);
			gpu_dir[i * 3 + 0] = g_sph[0] * (id_x/m_id);
			gpu_dir[i * 3 + 1] = g_sph[0] * (id_y/m_id);
			gpu_dir[i * 3 + 2] = g_sph[0] * (id_z/m_id);
		}



		// this kernel updates the gradient direction by the calculated voting direction.
		template<typename T>
		__global__ void update_grad3(T* gpu_grad, T* gpu_dir, size_t x, size_t y, size_t z){

			size_t xi = blockIdx.x * blockDim.x + threadIdx.x;				//calculate x,y,z coordinates for this thread
			
			size_t grid_y = y / blockDim.y;							//find the grid size along y
			size_t blockidx_y = blockIdx.y % grid_y;
			size_t yi = blockidx_y * blockDim.y + threadIdx.y;
			size_t zi = blockIdx.y / grid_y;
			
			if(xi >= x || yi >= y || zi >= z) return;
			
			size_t i = zi * x * y + yi * x + xi;

			gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0];			//update the gradient image with the new direction direction
			gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1];
			gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2];
		}
		
		template<typename T>
		void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], size_t x, size_t y, size_t z){

			unsigned int max_threads = stim::maxThreadsPerBlock();
			dim3 threads((unsigned int)sqrt (max_threads), (unsigned int)sqrt (max_threads));
			dim3 blocks((unsigned int)x / threads.x + 1, ((unsigned int)y / threads.y + 1) * (unsigned int)z);
				
			T* gpu_dir;												// allocate space on the GPU for the updated vote direction
			cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3);	

			//call the kernel to calculate the new voting direction
			update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, phi, cos_phi, (int)r[0], (int)r[1], (int)r[2], (int)x , (int)y, (int)z);
			
			//call the kernel to update the gradient direction
			update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z);
						
			cudaFree(gpu_dir);														//free allocated memory

		}
		
		template<typename T>
		void cpu_update_dir3(T* cpu_grad, T* cpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){

			//calculate the number of bytes in the array
			unsigned int bytes = x * y * z * sizeof(T);

			//allocate space on the GPU for the Vote data
			T* gpu_vote;
			cudaMalloc(&gpu_vote, bytes);

			//copy the input vote data to the GPU
			cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice);	

			//allocate space on the GPU for the Gradient data
			T* gpu_grad;
			cudaMalloc(&gpu_grad, bytes*3);

			//copy the Gradient data to the GPU
			cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice);

			//call the GPU version of the update direction function
			gpu_update_dir3<T>(gpu_grad, gpu_vote, cos_phi, r, x , y, z);
							
			//copy the new gradient image back to the CPU
			cudaMemcpy(cpu_grad, gpu_grad, bytes*3, cudaMemcpyDeviceToHost) ;

			//free allocated memory
			cudaFree(gpu_vote);
			cudaFree(gpu_grad);
		}
		


#endif