#ifndef STIM_CUDA_UPDATE_DIR3_AABB_H #define STIM_CUDA_UPDATE_DIR3_AABB_H # include # include #include #include "cpyToshare.cuh" #define M_PI 3.14159 #include #include #include #include #include // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. template __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread int grid_y = y / blockDim.y; //find the grid size along y int blockidx_y = blockIdx.y % grid_y; int yi = blockidx_y * blockDim.y + threadIdx.y; int zi = blockIdx.y / grid_y; if(xi >= x|| yi >= y || zi>= z) return; int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel float rx_sq = rx * rx; // compute the square for rmax float ry_sq = ry * ry; float rz_sq = rz * rz; stim::vec3 g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector stim::vec3 g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector float n =8; //set the number of points to find the boundaries of the conical voting area float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; float zc = rz * cos(g_sph[2]) ; float r = sqrt(xc*xc + yc*yc + zc*zc); xc+=xi; yc+=yi; zc+=zi; stim::vec3 center(xc,yc,zc); float d = 2 * r * tan(phi); //find the diameter of the conical voting area stim::vec3 norm = g.norm(); //compute the normalize gradient vector float step = 360.0/n; stim::circle cir(center, d, norm); stim::aabb3 bb(xi,yi,zi); bb.insert((int) xc, (int)yc, (int)zc); for(float j = 0; j <360.0; j += step){ stim::vec3 out = cir.p(j); bb.insert(out[0], out[1], out[2]); } bb.trim_low(xi-rx, yi-ry, zi-rz); bb.trim_low(0,0,0); bb.trim_high(xi+rx, yi+ry, zi+rz); bb.trim_high(x-1, y-1, z-1); int bx,by,bz; int dx, dy, dz; float dx_sq, dy_sq, dz_sq; float dist, cos_diff; int idx_c; float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter float l_vote = 0; float id_x = g[0]; // define local variables for the x, y, and z coordinations point to the vote direction float id_y = g[1]; float id_z = g[2]; for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ dz = bz - zi; //compute the distance bw the voter and the current counter along z axis dz_sq = dz * dz; for (by=bb.low[1]; by<=bb.high[1]; by++){ dy = by - yi; //compute the distance bw the voter and the current counter along y axis dy_sq = dy * dy; for (bx=bb.low[0]; bx<=bb.high[0]; bx++){ dx = bx - xi; //compute the distance bw the voter and the current counter along x axis dx_sq = dx * dx; dist = sqrt(dx_sq + dy_sq + dz_sq); //calculate the distance between the voter and the current counter cos_diff = (norm[0] * dx + norm[1] * dy + norm[2] * dz)/dist; // calculate the cosine of angle between the voter and the current counter if ( ( (dx_sq/rx_sq + dy_sq/ry_sq + dz_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ //check if the current counter located in the voting area of the voter idx_c = (bz* y + by) * x + bx; //calculate the 1D index for the current counter l_vote = gpu_vote[idx_c]; if (l_vote>max) { max = l_vote; id_x = (float)dx; id_y = (float)dy; id_z = (float)dz; } } } } } float m_id = sqrt (id_x*id_x + id_y*id_y + id_z*id_z); gpu_dir[i * 3 + 0] = g_sph[0] * (id_x/m_id); gpu_dir[i * 3 + 1] = g_sph[0] * (id_y/m_id); gpu_dir[i * 3 + 2] = g_sph[0] * (id_z/m_id); } // this kernel updates the gradient direction by the calculated voting direction. template __global__ void update_grad3(T* gpu_grad, T* gpu_dir, size_t x, size_t y, size_t z){ size_t xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread size_t grid_y = y / blockDim.y; //find the grid size along y size_t blockidx_y = blockIdx.y % grid_y; size_t yi = blockidx_y * blockDim.y + threadIdx.y; size_t zi = blockIdx.y / grid_y; if(xi >= x || yi >= y || zi >= z) return; size_t i = zi * x * y + yi * x + xi; gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; //update the gradient image with the new direction direction gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; } template void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], size_t x, size_t y, size_t z){ unsigned int max_threads = stim::maxThreadsPerBlock(); dim3 threads((unsigned int)sqrt (max_threads), (unsigned int)sqrt (max_threads)); dim3 blocks((unsigned int)x / threads.x + 1, ((unsigned int)y / threads.y + 1) * (unsigned int)z); T* gpu_dir; // allocate space on the GPU for the updated vote direction cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); //call the kernel to calculate the new voting direction update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, phi, cos_phi, (int)r[0], (int)r[1], (int)r[2], (int)x , (int)y, (int)z); //call the kernel to update the gradient direction update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); cudaFree(gpu_dir); //free allocated memory } template void cpu_update_dir3(T* cpu_grad, T* cpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ //calculate the number of bytes in the array unsigned int bytes = x * y * z * sizeof(T); //allocate space on the GPU for the Vote data T* gpu_vote; cudaMalloc(&gpu_vote, bytes); //copy the input vote data to the GPU cudaMemcpy(gpu_vote, cpu_vote, bytes, cudaMemcpyHostToDevice); //allocate space on the GPU for the Gradient data T* gpu_grad; cudaMalloc(&gpu_grad, bytes*3); //copy the Gradient data to the GPU cudaMemcpy(gpu_grad, cpu_grad, bytes*3, cudaMemcpyHostToDevice); //call the GPU version of the update direction function gpu_update_dir3(gpu_grad, gpu_vote, cos_phi, r, x , y, z); //copy the new gradient image back to the CPU cudaMemcpy(cpu_grad, gpu_grad, bytes*3, cudaMemcpyDeviceToHost) ; //free allocated memory cudaFree(gpu_vote); cudaFree(gpu_grad); } #endif