diff --git a/Matlab_3D/gt-128.vol b/Matlab_3D/gt-128.vol new file mode 100644 index 0000000..1399c1a Binary files /dev/null and b/Matlab_3D/gt-128.vol differ diff --git a/Matlab_3D/gt2.vol b/Matlab_3D/gt2.vol deleted file mode 100644 index 1399c1a..0000000 Binary files a/Matlab_3D/gt2.vol and /dev/null differ diff --git a/Matlab_3D/nissl-float-64.64.64.vol b/Matlab_3D/nissl-float-64.64.64.vol new file mode 100644 index 0000000..020b564 Binary files /dev/null and b/Matlab_3D/nissl-float-64.64.64.vol differ diff --git a/Matlab_3D/validation.m b/Matlab_3D/validation.m index 6e0056c..4c45605 100644 --- a/Matlab_3D/validation.m +++ b/Matlab_3D/validation.m @@ -1,6 +1,6 @@ clear all; %for t=100:100:5000 -t=2350; +t=2200; X = 128; Y = 128; Z = 128; @@ -12,7 +12,7 @@ r2=10; itr=5; vote=7; std = [5 5]; -gt_filename = 'gt2.vol'; +gt_filename = 'gt-128.vol'; % out_filename = sprintf('128-128-128/0-nissl-std%d.%d-t0%d-r%d.%d-t%d-out%d.%d.vol',std(1), std(2),t0,r1,r2,t,itr,vote); out_filename = sprintf('D:/build/ivote3-bld/0-out.%d.vol',t); % txt_filename = sprintf('128-128-128/0-validation-nissl-std%d.%d-r%d.%d-t%d-out%d.%d-D%d.txt',std(1), std(2),r1,r2,t,itr,vote,D); diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 387d47e..ed32ace 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -51,3 +51,4 @@ target_link_libraries(ivote3 #copy an image test case configure_file(nissl-raw-data/nissl-float-256.256.256.vol nissl-float-256.256.256.vol COPYONLY) configure_file(nissl-raw-data/nissl-float-128.128.128.vol nissl-float-128.128.128.vol COPYONLY) +configure_file(nissl-raw-data/nissl-float-64.64.64.vol nissl-float-64.64.64.vol COPYONLY) diff --git a/cpp/cudafunc.cu b/cpp/cudafunc.cu index fe5734d..8ecbe66 100644 --- a/cpp/cudafunc.cu +++ b/cpp/cudafunc.cu @@ -10,7 +10,7 @@ gpu_test3(gpu_out, gpu_grad, rmax, phi, n, x, y, z); #include "gradient3.cuh" #include "mag3.cuh" #include "vote3_atomic_aabb.cuh" -#include "update_dir3.cuh" +#include "update_dir3_aabb.cuh" #include "local_max3.cuh" @@ -19,27 +19,21 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, cudaSetDevice(1); - // compute the number of bytes in the input data - unsigned int bytes = x * y * z * sizeof(float); + + unsigned int bytes = x * y * z * sizeof(float); // compute the number of bytes in the input data - //assign memory on gpu for the input data.z - float* gpuI0; + float* gpuI0; //assign memory on gpu for the input data cudaMalloc(&gpuI0, bytes); + cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. - //copy the image data to the GPU. - cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); - - //call the blurring function from the gpu. - gpu_gaussian_blur3(gpuI0, sigma, x, y, z); - //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); + + gpu_gaussian_blur3(gpuI0, sigma, x, y, z); //call the blurring function from the gpu. cudaDeviceSynchronize(); - //assign memory on the gpu for the gradient along the X, y, z. - float* gpu_grad; + float* gpu_grad; //assign memory on the gpu for the gradient along the X, y, z. cudaMalloc(&gpu_grad, bytes*3); - //call the gradient function from the gpu. - gpu_gradient3(gpu_grad, gpuI0, anisotropy, x, y, z); + gpu_gradient3(gpu_grad, gpuI0, anisotropy, x, y, z); //call the gradient function from the gpu. cudaFree(gpuI0); float* gpu_vote; @@ -51,11 +45,11 @@ void ivote3(float* img, float sigma[], float anisotropy, float phi, float d_phi, for (int i = 0; i < iter; i++){ cudaMemset(gpu_vote, 0, bytes); - gpu_vote3(gpu_vote, gpu_grad, cos_phi, r, x, y, z); + gpu_vote3(gpu_vote, gpu_grad, phi, cos_phi, r, x, y, z); cudaDeviceSynchronize(); //if (phi >= d_phi){ - gpu_update_dir3(gpu_grad, gpu_vote, cos_phi, r, x, y, z); + gpu_update_dir3(gpu_grad, gpu_vote, phi, cos_phi, r, x, y, z); cudaDeviceSynchronize(); phi = phi - d_phi; cos_phi = cos(phi); @@ -87,21 +81,18 @@ void lmax(float* out, float* in, float t, unsigned int conn[], unsigned int x, u cudaSetDevice(1); - //assign memory on gpu for the input data. - float* gpuV; + + float* gpuV; //assign memory on gpu for the input data. cudaMalloc(&gpuV, bytes); - //copy the image data to the GPU. - cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); + cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU. float* gpuOut; cudaMalloc(&gpuOut, bytes); - //call the local max function - gpu_local_max3(gpuOut, gpuV, t, conn, x, y, z); + gpu_local_max3(gpuOut, gpuV, t, conn, x, y, z); //call the local max function - //copy the final result to the cpu. - cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); + cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); //copy the final result to the cpu. cudaFree(gpuV); cudaFree(gpuOut); diff --git a/cpp/main.cpp b/cpp/main.cpp index d2a9636..dbb6ae8 100644 --- a/cpp/main.cpp +++ b/cpp/main.cpp @@ -9,14 +9,13 @@ #include #define pi 3.14159 -#define M_PI 3.14159 -#include -#include -#include -#include -//#include -//#include -//#include +//#define M_PI 3.14159 +//#include +//#include +//#include +//#include +//#include + /*void test_3(float* gpu_out, float* gpu_grad, float rmax, float phi, int n, int x, int y, int z); @@ -52,42 +51,78 @@ int main(){ } list.close(); */ - /* - int n=10; - stim::circle cir; - float* c0= (float*) malloc(3*sizeof(float)); - c0[0] =-4; - c0[1]=0; - c0[2] = 3; - stim::vec3 c(c0[0],c0[1],c0[2]); - float len = c.len(); - stim::vec3 norm(c0[0]/len,c0[1]/len,c0[2]/len); - std::cout<< len << '\n'; - std::cout<< norm << '\n'; - cir.center(c); - cir.normal(norm); - cir.scale(2); - stim::vec3 out = cir.p(45); - std::vector> out2 = cir.getPoints(n); +/* + int main(){ + + + stim::vec3 g(-44,-3.4,-0.005); // form a vec3 variable for the gradient vector + stim::vec3 g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector + int n =36; //set the number of points to find the boundaries of the conical voting area + int xi = 105; + int yi = 17; + int zi = 23; + float xc = 12 * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter + float yc = 10 * sin(g_sph[1]) * sin(g_sph[2]) ; + float zc = 10 * cos(g_sph[2]) ; + float r = sqrt(xc*xc + yc*yc + zc*zc); + xc+=xi; + yc+=yi; + zc+=zi; + stim::vec3 center(xc,yc,zc); + + float d = 2 * r * tan(25*pi/180 ); //find the diameter of the conical voting area + stim::vec3 norm = g.norm(); //compute the normalize gradient vector + float step = 360.0/(float) n; + stim::circle cir(center, d, norm); + stim::aabb3 bb(xi,yi,zi); + bb.insert(xc,yc,zc); + for(float j = 0; j <360.0; j += step){ + stim::vec3 out = cir.p(j); + bb.insert(out[0], out[1], out[2]); + } + + bb.trim_low(0,0,0); + bb.trim_high(128-1, 128-1, 128-1); + + std::cout<< bb.low[0] << '\t' << bb.low[1] << '\t' << bb.low[2] << '\n'; + std::cout<< bb.high[0] << '\t' << bb.high[1] << '\t' << bb.high[2] << '\n'; + std::cin >> n; +*/ + /*int n=10; + stim::circle cir; + float* c0= (float*) malloc(3*sizeof(float)); + c0[0] =-4; + c0[1]=0; + c0[2] = 3; + stim::vec3 c(c0[0],c0[1],c0[2]); + float len = c.len(); + stim::vec3 norm(c0[0]/len,c0[1]/len,c0[2]/len); + std::cout<< len << '\n'; + std::cout<< norm << '\n'; + cir.center(c); + cir.normal(norm); + cir.scale(2); + stim::vec3 out = cir.p(45); + std::vector> out2 = cir.getPoints(n); - std::cout<< out << '\n'; - std::cout <>::const_iterator i = out2.begin(); i != out2.end(); ++i) - std::cout << *i << '\n'; - std::ofstream list("circle_check.txt"); - if (list.is_open()){ - for (std::vector>::const_iterator j = out2.begin(); j != out2.end(); ++j) - list << *j << '\n'; - } - list.close(); - std::cin >> n; + for (std::vector>::const_iterator i = out2.begin(); i != out2.end(); ++i) + std::cout << *i << '\n'; + std::ofstream list("circle_check.txt"); + if (list.is_open()){ + for (std::vector>::const_iterator j = out2.begin(); j != out2.end(); ++j) + list << *j << '\n'; + } + list.close(); + std::cin >> n; } - */ + void ivote3(float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z); void lmax(float* center, float* vote, float t1, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z); @@ -140,7 +175,7 @@ int main(int argc, char** argv){ args.add("z", "size of the dataset along Z axis", "positive value"); args.add("t", "threshold value for the final result", "positive valu"); args.add("invert", "to invert the input data set", "string"); - args.add("anisotropy", "anisotropy value of the imaging", "positive value"); + args.add("anisotropy", "anisotropy value of the imaging", "1"); //parse the command line arguments. args.parse(argc, argv); diff --git a/cpp/nissl-raw-data/nissl-float-64.64.64.vol b/cpp/nissl-raw-data/nissl-float-64.64.64.vol new file mode 100644 index 0000000..020b564 Binary files /dev/null and b/cpp/nissl-raw-data/nissl-float-64.64.64.vol differ diff --git a/cpp/update_dir3.cuh b/cpp/update_dir3.cuh index da9837b..54104c2 100644 --- a/cpp/update_dir3.cuh +++ b/cpp/update_dir3.cuh @@ -129,7 +129,7 @@ } template - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ unsigned int max_threads = stim::maxThreadsPerBlock(); dim3 threads(sqrt (max_threads),sqrt (max_threads)); diff --git a/cpp/update_dir3_aabb.cuh b/cpp/update_dir3_aabb.cuh index 1d533c5..bd64026 100644 --- a/cpp/update_dir3_aabb.cuh +++ b/cpp/update_dir3_aabb.cuh @@ -14,8 +14,7 @@ // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. template - __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ - //extern __shared__ float s_vote[]; + __global__ void update_dir3(T* gpu_dir, T* gpu_grad, T* gpu_vote, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread @@ -26,26 +25,13 @@ if(xi >= x|| yi >= y || zi>= z) return; int i = zi * x * y + yi * x + xi; //compute the global 1D index for this pixel - - // find the starting points for this block along the x and y directions - //int bxi = blockIdx.x * blockDim.x; - //int byi = blockidx_y * blockDim.y; - //find the starting points and the size of the window, which will be copied to the 2D-shared memory - //int bxs = bxi - rx; - //int bys = byi - ry; - //int xwidth = 2 * rx + blockDim.x; - //int ywidth = 2 * ry + blockDim.y; - //compute the coordinations of this pixel in the 2D-shared memory. - //int sx_rx = threadIdx.x + rx; - //int sy_ry = threadIdx.y + ry; - float rx_sq = rx * rx; // compute the square for rmax float ry_sq = ry * ry; float rz_sq = rz * rz; stim::vec3 g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector stim::vec3 g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector - int n =4; //set the number of points to find the boundaries of the conical voting area + float n =8; //set the number of points to find the boundaries of the conical voting area float xc = rx * cos(g_sph[1]) * sin(g_sph[2]) ; //calculate the center point of the surface of the voting area for the voter float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; float zc = rz * cos(g_sph[2]) ; @@ -54,9 +40,10 @@ yc+=yi; zc+=zi; stim::vec3 center(xc,yc,zc); - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area + + float d = 2 * r * tan(phi); //find the diameter of the conical voting area stim::vec3 norm = g.norm(); //compute the normalize gradient vector - float step = 360.0/(float) n; + float step = 360.0/n; stim::circle cir(center, d, norm); stim::aabb3 bb(xi,yi,zi); bb.insert(xc,yc,zc); @@ -64,13 +51,13 @@ stim::vec3 out = cir.p(j); bb.insert(out[0], out[1], out[2]); } - + bb.trim_low(xi-rx, yi-ry, zi-rz); bb.trim_low(0,0,0); + bb.trim_high(xi+rx, yi+ry, zi+rz); bb.trim_high(x-1, y-1, z-1); int bx,by,bz; int dx, dy, dz; float dx_sq, dy_sq, dz_sq; - float dist, cos_diff; int idx_c; @@ -118,42 +105,38 @@ template __global__ void update_grad3(T* gpu_grad, T* gpu_dir, int x, int y, int z){ - //calculate x,y,z coordinates for this thread - int xi = blockIdx.x * blockDim.x + threadIdx.x; - //find the grid size along y - int grid_y = y / blockDim.y; + int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread + + int grid_y = y / blockDim.y; //find the grid size along y int blockidx_y = blockIdx.y % grid_y; int yi = blockidx_y * blockDim.y + threadIdx.y; int zi = blockIdx.y / grid_y; int i = zi * x * y + yi * x + xi; if(xi >= x || yi >= y || zi >= z) return; - //update the gradient image with the new direction direction - gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; + + gpu_grad[i * 3 + 0] = gpu_dir [i * 3 + 0]; //update the gradient image with the new direction direction gpu_grad[i * 3 + 1] = gpu_dir [i * 3 + 1]; gpu_grad[i * 3 + 2] = gpu_dir [i * 3 + 2]; } template - void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ + void gpu_update_dir3(T* gpu_grad, T* gpu_vote, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ unsigned int max_threads = stim::maxThreadsPerBlock(); dim3 threads(sqrt (max_threads),sqrt (max_threads)); dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); - //unsigned int shared_bytes = (threads.x + 2*r[0])*(threads.y + 2*r[1])*sizeof(T); - // allocate space on the GPU for the updated vote direction - T* gpu_dir; + + T* gpu_dir; // allocate space on the GPU for the updated vote direction cudaMalloc(&gpu_dir, x * y * z * sizeof(T) * 3); //call the kernel to calculate the new voting direction - update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, cos_phi, r[0], r[1], r[2], x , y, z); - + update_dir3 <<< blocks, threads >>>(gpu_dir, gpu_grad, gpu_vote, phi, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to update the gradient direction update_grad3 <<< blocks, threads >>>(gpu_grad, gpu_dir, x , y, z); - - //free allocated memory - cudaFree(gpu_dir); + + cudaFree(gpu_dir); //free allocated memory } diff --git a/cpp/vote3.cuh b/cpp/vote3.cuh index 5af3cef..6b06fb7 100644 --- a/cpp/vote3.cuh +++ b/cpp/vote3.cuh @@ -101,7 +101,7 @@ } template - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ unsigned int max_threads = stim::maxThreadsPerBlock(); diff --git a/cpp/vote3_atomic.cuh b/cpp/vote3_atomic.cuh index 8d7f609..96e40c3 100644 --- a/cpp/vote3_atomic.cuh +++ b/cpp/vote3_atomic.cuh @@ -27,14 +27,14 @@ float mag_v = sqrt(gx_v*gx_v + gy_v*gy_v + gz_v*gz_v); // compute the gradient magnitude for the voter - float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter - float gy_v_n = gy_v/mag_v; - float gz_v_n = gz_v/mag_v; + //float gx_v_n = gx_v/mag_v; // normalize the gradient vector for the voter + //float gy_v_n = gy_v/mag_v; + //float gz_v_n = gz_v/mag_v; float rx_sq = rx * rx; // compute the square for rmax float ry_sq = ry * ry; float rz_sq = rz * rz; - float x_sq, y_sq, z_sq, d_c, cos_diff; + float x_sq, y_sq, z_sq, dist, cos_diff; int xi_c, yi_c, zi_c, idx_c; for (int z_c=-rz; z_c<=rz; z_c++){ @@ -49,8 +49,8 @@ xi_c = xi + x_c; if (xi_c < x && xi_c>=0){ x_sq = x_c * x_c; - d_c = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter - cos_diff = (gx_v_n * x_c + gy_v_n * y_c + gz_v_n * z_c)/(d_c); // calculate the cosine of angle between the voter and the current counter + dist = sqrt(x_sq + y_sq + z_sq); //calculate the distance between the voter and the current counter + cos_diff = (gx_v * x_c + gy_v * y_c + gz_v * z_c)/(dist * mag_v); // calculate the cosine of angle between the voter and the current counter if ( ( (x_sq/rx_sq + y_sq/ry_sq + z_sq/rz_sq) <=1 ) && (cos_diff >=cos_phi) ){ idx_c = (zi_c * y + yi_c) * x + xi_c; //calculate the 1D index for the current counter atomicAdd (&gpu_vote[idx_c] , mag_v); @@ -64,7 +64,7 @@ } template - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ unsigned int max_threads = stim::maxThreadsPerBlock(); diff --git a/cpp/vote3_atomic_aabb.cuh b/cpp/vote3_atomic_aabb.cuh index ab0b802..25d263f 100644 --- a/cpp/vote3_atomic_aabb.cuh +++ b/cpp/vote3_atomic_aabb.cuh @@ -15,7 +15,7 @@ // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area template - __global__ void vote3(T* gpu_vote, T* gpu_grad, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ + __global__ void vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, int rx, int ry, int rz, int x, int y, int z){ int xi = blockIdx.x * blockDim.x + threadIdx.x; //calculate x,y,z coordinates for this thread @@ -32,13 +32,10 @@ float rx_sq = rx * rx; // compute the square for rmax float ry_sq = ry * ry; float rz_sq = rz * rz; - float dist, cos_diff; - int idx_c; - - //float rmax = sqrt(rx_sq + ry_sq + rz_sq); + stim::vec3 g(gpu_grad[3*i],gpu_grad[3*i+1],gpu_grad[3*i+2]); // form a vec3 variable for the gradient vector stim::vec3 g_sph = g.cart2sph(); //convert cartesian coordinate to spherical for the gradient vector - int n =4; //set the number of points to find the boundaries of the conical voting area + float n =8; //set the number of points to find the boundaries of the conical voting area float xc = rx * cos(g_sph[1]) * sin(g_sph[2]); //calculate the center point of the surface of the voting area for the voter float yc = ry * sin(g_sph[1]) * sin(g_sph[2]) ; float zc = rz * cos(g_sph[2]) ; @@ -48,9 +45,9 @@ zc+=zi; stim::vec3 center(xc,yc,zc); - float d = 2 * r * tan(acos(cos_phi) ); //find the diameter of the conical voting area + float d = 2 * r * tan(phi); //find the diameter of the conical voting area stim::vec3 norm = g.norm(); //compute the normalize gradient vector - float step = 360.0/(float) n; + float step = 360.0/n; stim::circle cir(center, d, norm); stim::aabb3 bb(xi,yi,zi); bb.insert(xc,yc,zc); @@ -58,12 +55,15 @@ stim::vec3 out = cir.p(j); bb.insert(out[0], out[1], out[2]); } - + bb.trim_low(xi-rx, yi-ry, zi-rz); bb.trim_low(0,0,0); + bb.trim_high(xi+rx, yi+ry, zi+rz); bb.trim_high(x-1, y-1, z-1); int bx,by,bz; int dx, dy, dz; float dx_sq, dy_sq, dz_sq; + float dist, cos_diff; + int idx_c; for (bz=bb.low[2]; bz<=bb.high[2]; bz++){ dz = bz - zi; //compute the distance bw the voter and the current counter along z axis dz_sq = dz * dz; @@ -86,13 +86,13 @@ } template - void gpu_vote3(T* gpu_vote, T* gpu_grad, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ + void gpu_vote3(T* gpu_vote, T* gpu_grad, T phi, T cos_phi, unsigned int r[], unsigned int x, unsigned int y, unsigned int z){ unsigned int max_threads = stim::maxThreadsPerBlock(); dim3 threads(sqrt (max_threads),sqrt (max_threads)); dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); - vote3 <<< blocks, threads >>>(gpu_vote, gpu_grad, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting + vote3 <<< blocks, threads >>>(gpu_vote, gpu_grad, phi, cos_phi, r[0], r[1], r[2], x , y, z); //call the kernel to do the voting } -- libgit2 0.21.4