From 94d437dd6ed950af9ba7b5f1d3596800697adccf Mon Sep 17 00:00:00 2001 From: Laila Saadatifard Date: Fri, 22 Jan 2016 11:54:11 -0600 Subject: [PATCH] ivote3 code compiling on the gpu and use three channels for voting direction and gradient magnitude --- Matlab_3D/main.m | 229 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Matlab_3D/main_rmax.m | 246 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ cpp/cudafunc.cu | 14 ++++++++------ cpp/gradient3.cuh | 12 +++++++----- cpp/local_max3.cuh | 2 +- cpp/main.cpp | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------ cpp/update_dir3.cuh | 2 +- cpp/vote3.cuh | 1 + 8 files changed, 81 insertions(+), 506 deletions(-) delete mode 100644 Matlab_3D/main.m delete mode 100644 Matlab_3D/main_rmax.m diff --git a/Matlab_3D/main.m b/Matlab_3D/main.m deleted file mode 100644 index a45f0fa..0000000 --- a/Matlab_3D/main.m +++ /dev/null @@ -1,229 +0,0 @@ - -clc; -clear; -disp('***************** NEW RUN *********************'); -total = tic; - - -% ******* Initialize voting parameters ************************************** -rmax = 9; %maximum radius of the cell -ang_deg = 15.1; %half the angular range of the voting area -ang = ang_deg * pi / 180; -iter = 6; %number of voting iterations -t0 = 1.0; %threshold color -sigma = [2, 2, 1]; -% t = 0.1; -iter = iter-1; -d_ang= ang / (iter); - -% ******** Testing parameters ****************************************** -p = [100, 50, 100]; -ps = [100, 100, 50]; -% ps = [100, 50, 40]; -% I = syn_Img(rmax , ps); -volfile = 'nissl-rat.vol'; -fid = fopen(volfile); % open the file that include the image -S = fread(fid, 3, 'int32'); -X = S(1); -Y = S(2); -Z = S(3); - -% load the VOL data into a 2D matrix -I = fread(fid,[X Y*Z], 'uint8'); -fclose(fid); - -%change this to a 3D matrix -I = (reshape(I, [X, Y, Z])); - -% invert the intensity -I = 255 - I; - -%perform a gaussian blur -Iblur = gauss_blur3d(I, sigma); - -%crop out a small subregion of I and Iblur -Iblur = Iblur(p(1):p(1)+ps(1)-1, p(2):p(2)+ps(2)-1, p(3):p(3)+ps(3)-1); -I = I(p(1):p(1)+ps(1)-1, p(2):p(2)+ps(2)-1, p(3):p(3)+ps(3)-1); - -% compute the gradient -[Igrad_y, Igrad_x, Igrad_z] = gradient(Iblur); - -%calculate the gradient magnitude -Imag = sqrt(Igrad_x .^ 2 + Igrad_y .^ 2 + Igrad_z .^2); - -%set a threshold for the gradient magnitude -It = Imag > t0; - -%Set the boundaries of the threshold image to zero -It(1:rmax, :, :) = 0; -It(ps(1) - rmax:ps(1), :,:) = 0; -It(:, 1:rmax, :) = 0; -It(:, ps(2) - rmax:ps(2),:) = 0; -It(:, :, 1:rmax) = 0; -It(:,:, ps(3) - rmax:ps(3)) = 0; - -%get the indices of all of the nonzero values in the threshold image -% (voter positions) -[Itx,Ity,Itz] = ind2sub(size(It),find(It)); -Vi = find(It); -nV = nnz(It); - -% create a meshgrid describing coordinates relative to the voter position -range = -rmax:rmax; %create an array of values between -rmax and rmax -[mx, my, mz] = meshgrid(range, range, range); %create a template describing local pixel position in a small cube -m_mag = sqrt(mx.^2 + my.^2 + mz.^2); %create a template describing the distance from the center of a small cube - -% create a mask for the voting area -M_dist = m_mag <= rmax; %mask for the voting area distance (all values < rmax from the center) - -%calculate the direction vector between a pixel and voter -LV_x = mx./m_mag; -LV_y = my./m_mag; -LV_z = mz./m_mag; - -%number of pixels in the voting area of each voter (initialize to zero) -validPixels = (zeros(nV,1)); - -%indices of pixels in the voting area of each voter -% indices reference the 3D image -g_v_prime = (zeros(nV, (rmax^3))); - - -%% vote -tic; -%for each iteration (in iterative voting) -for itr = 1 : iter+1 - - %initialize the vote image to zero - Ivote = (zeros(size(I))); - - %for each voter (nonzero pixels in the threshold image It) - for v = 1: nV - - %get the cartesian coordinates of the voter v in the main image I - vx = Itx(v); - vy = Ity(v); - vz = Itz(v); - vi = Vi(v); - - %retreive the gradient magnitude at the voter position - %vmag = Imag(vx,vy,vz); - vmag = Imag(vi); - - %retrieve the gradient - gx = Igrad_x(vi); - gy = Igrad_y(vi); - gz = Igrad_z(vi); - - %calculate the gradient magnitude - dmag = sqrt (gx^2 + gy^2 + gz^2); - - %calculate the normalized gradient direction - dx = gx / dmag; - dy = gy / dmag; - dz = gz / dmag; - - %calculate the angle between the voter direction and the pixel direction - cos_diff = LV_x .* dx + LV_y .* dy + LV_z .* dz; - ang_diff = acos(cos_diff); - - %create an angular mask for the voting area - M_angle = cos_diff > cos(ang); - - %combine the two masks to mask out the voting angle - M = M_angle .* M_dist; - - % get the coordinates of each pixel in the final voter mask M - pi = find(M); - - %calculate the number of pixels in the voting region - npts = nnz(M); - validPixels(v) = npts; - - %convert every index in the voting area from a local 3D index to a global 3D index (into the original image I) - global_px = vx + mx(pi); - global_py = vy + my(pi); - global_pz = vz + mz(pi); - - %convert the global 3D index of each point into a global 1D index - global_pi = sub2ind(ps, global_px, global_py, global_pz); - - g_v_prime (v, 1:npts) = global_pi; - - - Ivote( global_pi ) = Ivote( global_pi ) + vmag; - - end - - if itr ==1 - Ivote1 = Ivote; - - elseif itr ==2 - Ivote2 = Ivote; - - elseif itr ==3 - Ivote3 = Ivote; - - elseif itr ==4 - Ivote4 = Ivote; - - elseif itr == 5 - Ivote5 = Ivote; - end - t_v1 = toc; - disp(['voting done. time =',num2str(t_v1)]); - - % update the voting direction - if ang>0 - tic; - for v = 1: nV - % coordinates of the current voter - vx = Itx(v); - vy = Ity(v); - vz = Itz(v); - - %get the local value of the voting image - local_Ivote = Ivote(g_v_prime(v,1:validPixels(v))); - - %find the index of the maximum value - [~, local_max_idx] = max(local_Ivote); - - %convert this into a global subscript - [g_px, g_py, g_pz] = ind2sub(size(Ivote), g_v_prime(v,local_max_idx)); - - %compute the vector from the voter position to this position - Igrad_x(vx, vy, vz) = g_px - vx; - Igrad_y(vx, vy, vz) = g_py - vy; - Igrad_z(vx, vy, vz) = g_pz - vz; - - end - - - tdir1 = toc; - display (['updating dir done. time = ', num2str(tdir1)]); - ang = ang - d_ang; - end - end - - -%% -% t = 50; -% center = Ivote; -% % center(center t0; - -%Set the boundaries of the threshold image to zero -It(1:rmax(1), :, :) = 0; -It(ps(1) - rmax(1):ps(1), :,:) = 0; -It(:, 1:rmax(2), :) = 0; -It(:, ps(2) - rmax(2):ps(2),:) = 0; -It(:, :, 1:rmax(3)) = 0; -It(:,:, ps(3) - rmax(3):ps(3)) = 0; -% -%get the indices of all of the nonzero values in the threshold image -% (voter positions) -[Itx,Ity,Itz] = ind2sub(size(It),find(It)); -Vi =(find(It)); -nV = nnz(It); -% -% create a meshgrid describing coordinates relative to the voter position -rangex = -rmax(1):rmax(1); %create an array of values between -rmax and rmax -rangey = -rmax(2):rmax(2); -rangez = -rmax(3):rmax(3); -[mx, my, mz] = meshgrid(rangex, rangey, rangez); %create a template describing local pixel position in a small cube -m_mag = (sqrt(mx.^2 + my.^2 + mz.^2)); %create a template describing the distance from the center of a small cube - -% create a mask for the voting area -M_dist = (mx.^2/rmax(1)^2 + my.^2/rmax(2)^2 + mz.^2/rmax(3)^2) <= 1; %mask for the voting area distance (all values < rmax from the center) - -% calculate the direction vector between a pixel and voter -LV_x = mx./m_mag; -LV_y = my./m_mag; -LV_z = mz./m_mag; - -%number of pixels in the voting area of each voter (initialize to zero) -validPixels = (zeros(nV,1)); -%% -%indices of pixels in the voting area of each voter -% indices reference the 3D image -g_v_prime = zeros(nV, ceil(rmax(1)*rmax(2)*rmax(3)*ang)); - - -%% vote -tic; -%for each iteration (in iterative voting) -for itr = 1 : iter+1 - - %initialize the vote image to zero - Ivote = zeros(Isize); - - %for each voter (nonzero pixels in the threshold image It) - for v = 1: nV - - %get the cartesian coordinates of the voter v in the main image I - vx = Itx(v); - vy = Ity(v); - vz = Itz(v); - vi = Vi(v); - - %retreive the gradient magnitude at the voter position - vmag = Imag(vi); - - %retrieve the gradient - gx = Igrad_x(vi); - gy = Igrad_y(vi); - gz = Igrad_z(vi); - - %calculate the gradient magnitude - dmag = sqrt (gx^2 + gy^2 + gz^2); - - %calculate the normalized gradient direction - dx = gx / dmag; - dy = gy / dmag; - dz = gz / dmag; - - %calculate the angle between the voter direction and the pixel direction - cos_diff = LV_x .* dx + LV_y .* dy + LV_z .* dz; - ang_diff = acos(cos_diff); - - %create an angular mask for the voting area - M_angle = cos_diff >= cos(ang); - - %combine the two masks to mask out the voting angle - M = M_angle .* M_dist; - - % get the coordinates of each pixel in the final voter mask M - pi = find(M); - - %calculate the number of pixels in the voting region - npts = nnz(M); - validPixels(v) = npts; - - %convert every index in the voting area from a local 3D index to a global 3D index (into the original image I) - global_px = vx + mx(pi); - global_py = vy + my(pi); - global_pz = vz + mz(pi); - - %convert the global 3D index of each point into a global 1D index - global_pi = sub2ind(ps, global_px, global_py, global_pz); - - g_v_prime (v, 1:npts) = global_pi; - - - Ivote( global_pi ) = Ivote( global_pi ) + vmag; - - end - - if itr ==1 - Ivote1 = single(Ivote); - - elseif itr ==2 - Ivote2 = single(Ivote); - - elseif itr ==3 - Ivote3 = single(Ivote); - - elseif itr ==4 - Ivote4 = single(Ivote); - - elseif itr == 5 - Ivote5 = single(Ivote); - end - t_v1 = toc; - disp(['voting done. time =',num2str(t_v1)]); - - % update the voting direction - if ang>0 - tic; - for v = 1: nV - % coordinates of the current voter - vx = Itx(v); - vy = Ity(v); - vz = Itz(v); - - %get the local value of the voting image - local_Ivote = Ivote(g_v_prime(v,1:validPixels(v))); - - %find the index of the maximum value - [~, local_max_idx] = max(local_Ivote); - - %convert this into a global subscript - [g_px, g_py, g_pz] = ind2sub(size(Ivote), g_v_prime(v,local_max_idx)); - - %compute the vector from the voter position to this position - Igrad_x(vx, vy, vz) = g_px - vx; - Igrad_y(vx, vy, vz) = g_py - vy; - Igrad_z(vx, vy, vz) = g_pz - vz; - - end - - - tdir1 = toc; - display (['updating dir done. time = ', num2str(tdir1)]); - ang = ang - d_ang; - end - end - - -%% -t = 350; -conn = [5 5 3]; -Icenter = local_max(Ivote, conn, t); -% center = Ivote1; -% center(center(gpuI0, sigma, x, y, z); - + //cudaMemcpy(img, gpuI0, bytes, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); //assign memory on the gpu for the gradient along the X, y, z. @@ -32,7 +34,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un cudaMalloc(&gpu_grad, bytes*3); //call the gradient function from the gpu. - gpu_gradient3(gpu_grad, gpuI0, x, y, z); + gpu_gradient3(gpu_grad, gpuI0, anisotropy, x, y, z); cudaFree(gpuI0); float* gpu_vote; @@ -45,7 +47,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un gpu_vote3(gpu_vote, gpu_grad, cos_phi, r, x, y, z); cudaDeviceSynchronize(); - if (i==0) + if (i==7) cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost); if (phi >= d_phi){ @@ -58,7 +60,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un } cudaFree(gpu_grad); - cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost); + //cudaMemcpy(center, gpu_grad, bytes, cudaMemcpyDeviceToHost); //allocate space on the gpu for the final detected cells. float* gpu_output; @@ -68,7 +70,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un gpu_local_max3(gpu_output, gpu_vote, t, conn, x, y, z); //copy the final result to the cpu. - //cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); + cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); cudaFree(gpu_vote); diff --git a/cpp/gradient3.cuh b/cpp/gradient3.cuh index bcdf4e3..e5407d2 100644 --- a/cpp/gradient3.cuh +++ b/cpp/gradient3.cuh @@ -7,7 +7,7 @@ #include template -__global__ void gradient3(T* out, T* in, int x, int y, int z){ +__global__ void gradient3(T* out, T* in, float anisotropy, int x, int y, int z){ //calculate x,y,z coordinates for this thread int xi = blockIdx.x * blockDim.x + threadIdx.x; @@ -55,11 +55,13 @@ __global__ void gradient3(T* out, T* in, int x, int y, int z){ if(zi > 0 && zi < z-1) out[i * 3 + 2] = (in[i_zp] - in[i_zn]) / 2; + out[i * 3 + 2] *= 1/anisotropy; + } template -void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned int z){ +void gpu_gradient3(T* gpuGrad, T* gpuI, float anisotropy, unsigned int x, unsigned int y, unsigned int z){ int max_threads = stim::maxThreadsPerBlock(); @@ -67,12 +69,12 @@ void gpu_gradient3(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y, unsigned dim3 blocks(x / threads.x + 1, (y / threads.y + 1) * z); //call the GPU kernel to determine the gradient - gradient3 <<< blocks, threads >>>(gpuGrad, gpuI, x, y, z); + gradient3 <<< blocks, threads >>>(gpuGrad, gpuI, anisotropy, x, y, z); } template -void cpu_gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigned int z){ +void cpu_gradient3(T* out, T* in, float anisotropy, unsigned int x, unsigned int y, unsigned int z){ //get the number of pixels in the image unsigned int pixels = x * y * z; @@ -90,7 +92,7 @@ void cpu_gradient3(T* out, T* in, unsigned int x, unsigned int y, unsigned int z cudaMalloc(&gpuOut, bytes * 3); //the output image will have two channels (x, y) //call the GPU version of this function - gpu_gradient3(gpuOut, gpuIn, x, y, z); + gpu_gradient3(gpuOut, gpuIn, anisotropy, x, y, z); //copy the results to the CPU cudaMemcpy(out, gpuOut, bytes * 3, cudaMemcpyDeviceToHost); diff --git a/cpp/local_max3.cuh b/cpp/local_max3.cuh index d189c4f..c94f875 100644 --- a/cpp/local_max3.cuh +++ b/cpp/local_max3.cuh @@ -36,7 +36,7 @@ __global__ void cuda_local_max3(T* gpu_center, T* gpu_vote, T t, int conn_x, int if (xl>=0 && yl>=0 && zl>=0 && xl lv_i) return; } } diff --git a/cpp/main.cpp b/cpp/main.cpp index cb012cc..4438624 100644 --- a/cpp/main.cpp +++ b/cpp/main.cpp @@ -11,7 +11,7 @@ #define pi 3.14159 -void ivote3(float* center, float* img, float std[], float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], +void ivote3(float* center, float* img, float std[], float anisotropy, float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z); void invert_data(float* cpuI, unsigned int x, unsigned int y, unsigned int z){ @@ -24,9 +24,23 @@ void invert_data(float* cpuI, unsigned int x, unsigned int y, unsigned int z){ } } } - + int main(int argc, char** argv){ + + cudaDeviceProp prop; + int count; + cudaGetDeviceCount(&count); + //printf("cudadevicecount: %i\n", count); + for (int i=0; iimgrad3; + imgrad3.set_interleaved3(cpu_out, 128,128,128,3); + std::ofstream fgx("syn/gx-128.vol", std::ofstream::out | std::ofstream::binary); + fgx.write((char*)imgrad3.channel(0).data(), bytes); + fgx.close(); + */ //write the output file. - std::ofstream fo("output/" + OutName.str(), std::ofstream::out | std::ofstream::binary); + std::ofstream fo("std5.5-r10.10-v8/" + OutName.str(), std::ofstream::out | std::ofstream::binary); fo.write((char*)cpu_out, bytes); fo.close(); - + // creat a file for saving the list centers + + std::ofstream list("std5.5-r10.10-v8/center.txt"); + if (list.is_open()){ + + for (int ix=0; ix