fix one bug in the gaussian_blur3 code, this ivote3 project works well for the d…

…ata image size less than 512 by 512 by 256

fix one bug in the gaussian_blur3 code, this ivote3 project works well for the d…
…ata image size less than 512 by 512 by 256
Laila Saadatifard
1 parent 5c079506
Showing 5 changed files with 18 additions and 15 deletions Show diff stats
cpp/cudafunc.cu
cpp/float_to_half.cuh
cpp/gaussian_blur3.cuh
cpp/main.cpp
cpp/update_dir3.cuh
+#include "cuda_fp16.h"
+#include "float_to_half.cuh"
+#include "half_to_float.cuh"
 #include "gaussian_blur3.cuh"
 #include "gradient3.cuh"
 #include "mag3.cuh"
@@ -9,7 +12,6 @@
 void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[],
 			int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
  
-
 	// compute the number of bytes in the input data
 	unsigned int bytes = x * y * z * sizeof(float);
  
@@ -68,7 +70,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
  
 	cudaFree(gpu_grad);
 	cudaFree(gpu_mag);
-	//cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);
+	cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);
  
 	//allocate space on the gpu for the final detected cells.
 	float* gpu_output;
@@ -78,7 +80,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
 	gpu_local_max3<float>(gpu_output, gpu_vote, t, conn, x, y, z);
  
 	//copy the final result to the cpu.
-	cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost);
+	//cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost);
  
  
 	cudaFree(gpu_vote);
@@ -6,8 +6,8 @@
 #include <stim/cuda/cudatools.h>
 #include <stim/cuda/sharedmem.cuh>
 #include <stim/cuda/cudatools/error.h>
-#include "cuda_fp16.h"
-
+#include <cuda_fp16.h>
+#include <stdio.h>
 		__global__ void cuda_f2h(half* gpu_half, float* gpu_float, int x, int y, int z){
  
  
@@ -24,7 +24,8 @@
  
  
 			gpu_half[i] = __float2half(gpu_float[i]);
-							
+			
+			
 		}
  
  
@@ -12,7 +12,7 @@
  
  
 		template<typename T>
-		__global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
+		__global__ void blur_x(T* out, T* in, T sigma, int x, int y, int z){
  
 			//calculate x,y,z coordinates for this thread
 			int xi = blockIdx.x * blockDim.x + threadIdx.x;
@@ -55,7 +55,7 @@
  
  
 		template<typename T>
-		__global__ void blur_y(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
+		__global__ void blur_y(T* out, T* in, T sigma, int x, int y, int z){
  
 			//calculate x,y,z coordinates for this thread
 			int xi = blockIdx.x * blockDim.x + threadIdx.x;
@@ -98,7 +98,7 @@
 		}
  
 		template<typename T>
-		__global__ void blur_z(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
+		__global__ void blur_z(T* out, T* in, T sigma, int x, int y, int z){
  
 			//calculate x,y,z coordinates for this thread
 			int xi = blockIdx.x * blockDim.x + threadIdx.x;
@@ -104,7 +104,7 @@ int main(int argc, char** argv){
 	invert_data(cpuI, x, y, z);
  
 	//write a new file from the cpuI.
-	std::ofstream original("output/0-original_invert.vol", std::ofstream::out | std::ofstream::binary);
+	std::ofstream original("output/original_invert--512.vol", std::ofstream::out | std::ofstream::binary);
 	original.write((char*)cpuI, bytes);
 	original.close();
  
@@ -115,7 +115,7 @@ int main(int argc, char** argv){
 	ivote3(cpu_out, cpuI, sigma, phi, d_phi, r, iter, t, conn, x, y, z);
  
 	//write the blurred file from the cpuI.
-	std::ofstream fblur("output/test0.vol", std::ofstream::out | std::ofstream::binary);
+	std::ofstream fblur("output/v1--512.vol", std::ofstream::out | std::ofstream::binary);
 	fblur.write((char*)cpuI, bytes);
 	fblur.close();
  
@@ -5,7 +5,7 @@
 # include <cuda.h>
 #include <stim/cuda/cudatools.h>
 #include <stim/cuda/sharedmem.cuh>
-
+#include <cuda_fp16.h>
  
 		// this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
 		template<typename T>
@@ -33,9 +33,9 @@
 			float max = 0;
 			float l_vote = 0;
 			// define local variables for the x, y, and z coordinations where the maximum happened
-			int id_x = g_v_x;
-			int id_y = g_v_y;
-			int id_z = g_v_z;
+			float id_x = g_v_x;
+			float id_y = g_v_y;
+			float id_z = g_v_z;
  
 			int rx_sq = rx * rx;
 			int ry_sq = ry * ry;