cudafunc.cu
2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include "gaussian_blur3.cuh"
#include "gradient3.cuh"
#include "mag3.cuh"
#include "vote3_atomic_aabb.cuh"
#include "update_dir3_aabb.cuh"
#include "local_max3.cuh"
#include <fstream>
#include <sstream>
void ivote3(float* img, float sigma[], float phi, float d_phi, unsigned int r[],
int iter, float t, unsigned int conn[], size_t x, size_t y, size_t z){
cudaSetDevice(0);
size_t bytes = x * y * z * sizeof(float); // compute the number of bytes in the input data
float* gpuI0; //assign memory on gpu for the input data
cudaMalloc(&gpuI0, bytes);
cudaMemcpy(gpuI0, img, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU.
gpu_gaussian_blur3<float>(gpuI0, sigma, x, y, z); //call the blurring function from the gpu.
cudaDeviceSynchronize();
float* gpu_grad; //assign memory on the gpu for the gradient along the X, y, z.
cudaMalloc(&gpu_grad, bytes*3);
gpu_gradient3<float>(gpu_grad, gpuI0, x, y, z); //call the gradient function from the gpu.
cudaFree(gpuI0);
float* gpu_vote;
cudaMalloc(&gpu_vote, bytes);
float cos_phi = cos(phi);
//call the vote function.
for (int i = 0; i < iter; i++){
cudaMemset(gpu_vote, 0, bytes);
gpu_vote3<float>(gpu_vote, gpu_grad, phi, cos_phi, r, x, y, z);
cudaDeviceSynchronize();
cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);
std::string filename = "0-vote";
std::ofstream fvote(filename + std::to_string(i+1) + "_aabb.vol", std::ofstream::out | std::ofstream::binary);
fvote.write((char*)img, bytes);
fvote.close();
gpu_update_dir3<float>(gpu_grad, gpu_vote, phi, cos_phi, r, x, y, z);
cudaDeviceSynchronize();
phi = phi - d_phi;
cos_phi = cos(phi);
}
cudaFree(gpu_grad);
cudaMemcpy(img, gpu_vote, bytes, cudaMemcpyDeviceToHost);
cudaFree(gpu_vote);
//cudaFree(gpu_output);
}
void lmax(float* out, float* in, float t, unsigned int conn[], size_t x, size_t y, size_t z){
size_t bytes = x * y * z * sizeof(float);
cudaSetDevice(0);
float* gpuV; //assign memory on gpu for the input data.
cudaMalloc(&gpuV, bytes);
cudaMemcpy(gpuV, in, bytes, cudaMemcpyHostToDevice); //copy the image data to the GPU.
float* gpuOut;
cudaMalloc(&gpuOut, bytes);
gpu_local_max3<float>(gpuOut, gpuV, t, conn, x, y, z); //call the local max function
cudaMemcpy(out, gpuOut, bytes, cudaMemcpyDeviceToHost); //copy the final result to the cpu.
cudaFree(gpuV);
cudaFree(gpuOut);
}