#include #include #include #include void atan_2(float* cpuTable, unsigned int rmax){ //initialize the width and height of the window which atan2 are computed in. int xsize = 2*rmax +1; int ysize = 2*rmax +1; // assign the center coordinates of the atan2 window to yi and xi int yi = rmax; int xi = rmax; for (int xt = 0; xt < xsize; xt++){ for(int yt = 0; yt < ysize; yt++){ //convert the current 2D coordinates to 1D int id = yt * xsize + xt; // calculate the distance between the pixel and the center of the atan2 window float xd = xi - xt; float yd = yi - yt; // calculate the angle between the pixel and the center of the atan2 window and store the result. float atan_2d_vote = atan2(yd, xd); cpuTable[id] = atan_2d_vote; } } } std::vector > find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) { float* cpuTable = (float unsigned int pixels = x * y; unsigned int bytes = sizeof(float) * pixels; //calculate the number of bytes in the atan2 table unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float); //allocate space on the GPU for the atan2 table float* gpuTable; cudaMalloc(&gpuTable, bytes_table); cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice); unsigned int sigma_ds = 1/resize; unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1)); unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1)); unsigned int bytes_ds = sizeof(float) * x_ds * y_ds; float* gpuI; cudaMalloc(&gpuI, bytes_ds); float* gpuGrad; cudaMalloc(&gpuGrad, bytes_ds*2); float* gpuVote; cudaMalloc(&gpuVote, bytes_ds); // allocate space on the GPU for the detected cell centes float* gpuCenters; cudaMalloc(&gpuCenters, bytes_ds); stim::cuda::gpu_down_sample(gpuI, gpuI0, resize, x , y); cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost); x = x_ds; y = y_ds; t = t * resize; //sigma = sigma * resize; cudaDeviceSynchronize(); stim::cuda::gpu_gaussian_blur2(gpuI,sigma, x, y); cudaDeviceSynchronize(); cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); stim::cuda::gpu_gradient_2d(gpuGrad, gpuI, x, y); cudaDeviceSynchronize(); cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost); stim::cuda::gpu_cart2polar(gpuGrad, x, y); cudaDeviceSynchronize(); cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost); //multiply the gradient by a constant and calculate the absolute value (to save an image) stim::cuda::cpu_multiply(cpuCart2Polar, 40, x * y * 2); cudaDeviceSynchronize(); stim::cuda::cpu_abs(cpuCart2Polar, x * y * 2); cudaDeviceSynchronize(); for (int i =0; i(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); cudaDeviceSynchronize(); stim::cuda::gpu_update_dir(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); cudaDeviceSynchronize(); switch (i){ case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); break; case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); break; case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); break; case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); break; case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); break; default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost); break; } phi = phi - dphi; } stim::cuda::gpu_local_max(gpuCenters, gpuVote, t, conn, x, y); cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost); }