Commit 6ef1dab9e7cfa8e30d297ba698ffab696b112dc7

Authored by Laila Saadatifard
1 parent 5c079506

fix one bug in the gaussian_blur3 code, this ivote3 project works well for the d…

…ata image size less than 512 by 512 by 256
cpp/cudafunc.cu
  1 +#include "cuda_fp16.h"
  2 +#include "float_to_half.cuh"
  3 +#include "half_to_float.cuh"
1 4 #include "gaussian_blur3.cuh"
2 5 #include "gradient3.cuh"
3 6 #include "mag3.cuh"
... ... @@ -9,7 +12,6 @@
9 12 void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[],
10 13 int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){
11 14  
12   -
13 15 // compute the number of bytes in the input data
14 16 unsigned int bytes = x * y * z * sizeof(float);
15 17  
... ... @@ -68,7 +70,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
68 70  
69 71 cudaFree(gpu_grad);
70 72 cudaFree(gpu_mag);
71   - //cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);
  73 + cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost);
72 74  
73 75 //allocate space on the gpu for the final detected cells.
74 76 float* gpu_output;
... ... @@ -78,7 +80,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un
78 80 gpu_local_max3<float>(gpu_output, gpu_vote, t, conn, x, y, z);
79 81  
80 82 //copy the final result to the cpu.
81   - cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost);
  83 + //cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost);
82 84  
83 85  
84 86 cudaFree(gpu_vote);
... ...
cpp/float_to_half.cuh
... ... @@ -6,8 +6,8 @@
6 6 #include <stim/cuda/cudatools.h>
7 7 #include <stim/cuda/sharedmem.cuh>
8 8 #include <stim/cuda/cudatools/error.h>
9   -#include "cuda_fp16.h"
10   -
  9 +#include <cuda_fp16.h>
  10 +#include <stdio.h>
11 11 __global__ void cuda_f2h(half* gpu_half, float* gpu_float, int x, int y, int z){
12 12  
13 13  
... ... @@ -24,7 +24,8 @@
24 24  
25 25  
26 26 gpu_half[i] = __float2half(gpu_float[i]);
27   -
  27 +
  28 +
28 29 }
29 30  
30 31  
... ...
cpp/gaussian_blur3.cuh
... ... @@ -12,7 +12,7 @@
12 12  
13 13  
14 14 template<typename T>
15   - __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
  15 + __global__ void blur_x(T* out, T* in, T sigma, int x, int y, int z){
16 16  
17 17 //calculate x,y,z coordinates for this thread
18 18 int xi = blockIdx.x * blockDim.x + threadIdx.x;
... ... @@ -55,7 +55,7 @@
55 55  
56 56  
57 57 template<typename T>
58   - __global__ void blur_y(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
  58 + __global__ void blur_y(T* out, T* in, T sigma, int x, int y, int z){
59 59  
60 60 //calculate x,y,z coordinates for this thread
61 61 int xi = blockIdx.x * blockDim.x + threadIdx.x;
... ... @@ -98,7 +98,7 @@
98 98 }
99 99  
100 100 template<typename T>
101   - __global__ void blur_z(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){
  101 + __global__ void blur_z(T* out, T* in, T sigma, int x, int y, int z){
102 102  
103 103 //calculate x,y,z coordinates for this thread
104 104 int xi = blockIdx.x * blockDim.x + threadIdx.x;
... ...
cpp/main.cpp
... ... @@ -104,7 +104,7 @@ int main(int argc, char** argv){
104 104 invert_data(cpuI, x, y, z);
105 105  
106 106 //write a new file from the cpuI.
107   - std::ofstream original("output/0-original_invert.vol", std::ofstream::out | std::ofstream::binary);
  107 + std::ofstream original("output/original_invert--512.vol", std::ofstream::out | std::ofstream::binary);
108 108 original.write((char*)cpuI, bytes);
109 109 original.close();
110 110  
... ... @@ -115,7 +115,7 @@ int main(int argc, char** argv){
115 115 ivote3(cpu_out, cpuI, sigma, phi, d_phi, r, iter, t, conn, x, y, z);
116 116  
117 117 //write the blurred file from the cpuI.
118   - std::ofstream fblur("output/test0.vol", std::ofstream::out | std::ofstream::binary);
  118 + std::ofstream fblur("output/v1--512.vol", std::ofstream::out | std::ofstream::binary);
119 119 fblur.write((char*)cpuI, bytes);
120 120 fblur.close();
121 121  
... ...
cpp/update_dir3.cuh
... ... @@ -5,7 +5,7 @@
5 5 # include <cuda.h>
6 6 #include <stim/cuda/cudatools.h>
7 7 #include <stim/cuda/sharedmem.cuh>
8   -
  8 +#include <cuda_fp16.h>
9 9  
10 10 // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
11 11 template<typename T>
... ... @@ -33,9 +33,9 @@
33 33 float max = 0;
34 34 float l_vote = 0;
35 35 // define local variables for the x, y, and z coordinations where the maximum happened
36   - int id_x = g_v_x;
37   - int id_y = g_v_y;
38   - int id_z = g_v_z;
  36 + float id_x = g_v_x;
  37 + float id_y = g_v_y;
  38 + float id_z = g_v_z;
39 39  
40 40 int rx_sq = rx * rx;
41 41 int ry_sq = ry * ry;
... ...