#include #include #include #include //#include #include #include #include #include #include #include #include typedef unsigned int uint; typedef unsigned int uchar; stim::cuda::cuda_texture t; float* gpuTable; float* gpuGrad; float* gpuVote; float* gpuI; float* gpuCenters; void atan_2d(float* cpuTable, unsigned int rmax) { //initialize the width and height of the window which atan2 are computed in. int xsize = 2*rmax +1; int ysize = 2*rmax +1; // assign the center coordinates of the atan2 window to yi and xi int yi = rmax; int xi = rmax; for (int xt = 0; xt < xsize; xt++){ for(int yt = 0; yt < ysize; yt++){ //convert the current 2D coordinates to 1D int id = yt * xsize + xt; // calculate the distance between the pixel and the center of the atan2 window float xd = xi - xt; float yd = yi - yt; // calculate the angle between the pixel and the center of the atan2 window and store the result. float atan_2d_vote = atan2(yd, xd); cpuTable[id] = atan_2d_vote; } } } void initCuda(unsigned int bytes_table, unsigned int bytes_ds) { HANDLE_ERROR( cudaMalloc((void**) &gpuTable, bytes_table) ); HANDLE_ERROR( cudaMalloc((void**) &gpuI, bytes_ds) ); HANDLE_ERROR( cudaMalloc((void**) &gpuGrad, bytes_ds*2) ); HANDLE_ERROR( cudaMalloc((void**) &gpuVote, bytes_ds) ); HANDLE_ERROR( cudaMalloc((void**) &gpuCenters, bytes_ds) ); } void cleanCuda() { HANDLE_ERROR( cudaFree(gpuTable) ); HANDLE_ERROR( cudaFree(gpuGrad) ); HANDLE_ERROR( cudaFree(gpuVote) ); HANDLE_ERROR( cudaFree(gpuCenters) ); HANDLE_ERROR( cudaFree(gpuI) ); } std::vector< stim::vec > find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) { float phi = 15.1*M_PI/180; int iter = 5; float dphi = phi/iter; float rmax = 10; float sigma = 4; unsigned int pixels = x * y; unsigned int bytes = sizeof(float) * pixels; unsigned int bytes_table = sizeof(float) * (2*rmax + 1) * (2*rmax + 1); unsigned int x_ds = (x + (x % 1 == 0 ? 0:1)); unsigned int y_ds = (y + (x % 1 == 0 ? 0:1)); unsigned int bytes_ds = sizeof(float) * x_ds * y_ds; unsigned int conn = 5; float final_t = 200.0; float* cpuTable = (float*) malloc(bytes_table); float* cpuCenters = (float*) malloc(bytes_ds); stringstream name; std::vector > output; initCuda(bytes_table, bytes_ds); atan_2d(cpuTable, rmax); cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice); t.MapCudaTexture(texbufferID, texType); cudaDeviceSynchronize(); stim::cuda::tex_gaussian_blur2( gpuI, sigma, x, y, t.getTexture(), t.getArray() ); cudaDeviceSynchronize(); stim::cuda::gpu_gradient_2d( gpuGrad, gpuI, x, y ); cudaDeviceSynchronize(); stim::cuda::gpu_cart2polar(gpuGrad, x, y); cudaDeviceSynchronize(); cudaDeviceSynchronize(); for (int i = 0; i < iter; i++) { stim::cuda::gpu_vote(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); cudaDeviceSynchronize(); stim::cuda::gpu_update_dir(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); cudaDeviceSynchronize(); phi = phi - dphi; } cudaDeviceSynchronize(); stim::cuda::gpu_local_max(gpuCenters, gpuVote, final_t, conn, x, y); cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost); for(int i = 0; i < pixels; i++) { int ix = (i % x); int iy = (i / x); if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4)) { float x_v = (float) ix; float y_v = (float) iy; output.push_back(stim::vec((x_v/(float)x), (y_v/(float)y), 0.0)); } } t.UnmapCudaTexture(); cleanCuda(); free(cpuTable); free(cpuCenters); return output; }