Commit cd67346534b8aea38810379218f23a2da95e591b
1 parent
01707489
add gradient to the math/filter directory
Showing
1 changed file
with
100 additions
and
0 deletions
Show diff stats
1 | +#ifndef STIM_CUDA_GRADIENT_H | ||
2 | +#define STIM_CUDA_GRADIENT_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | + | ||
8 | +namespace stim{ | ||
9 | + namespace cuda{ | ||
10 | + | ||
11 | + template<typename T> | ||
12 | + __global__ void gradient_2d(T* out, T* in, int x, int y){ | ||
13 | + | ||
14 | + | ||
15 | + // calculate the 2D coordinates for this current thread. | ||
16 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | + int yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
18 | + // convert 2D coordinates to 1D | ||
19 | + int i = yi * x + xi; | ||
20 | + | ||
21 | + //return if the pixel is outside of the image | ||
22 | + if(xi >= x || yi >= y) return; | ||
23 | + | ||
24 | + //calculate indices for the forward difference | ||
25 | + int i_xp = yi * x + (xi + 1); | ||
26 | + int i_yp = (yi + 1) * x + xi; | ||
27 | + | ||
28 | + //calculate indices for the backward difference | ||
29 | + int i_xn = yi * x + (xi - 1); | ||
30 | + int i_yn = (yi - 1) * x + xi; | ||
31 | + | ||
32 | + //use forward differences if a coordinate is zero | ||
33 | + if(xi == 0) | ||
34 | + out[i * 2 + 0] = in[i_xp] - in[i]; | ||
35 | + if(yi == 0) | ||
36 | + out[i * 2 + 1] = in[i_yp] - in[i]; | ||
37 | + | ||
38 | + //use backward differences if the coordinate is at the maximum edge | ||
39 | + if(xi == x-1) | ||
40 | + out[i * 2 + 0] = in[i] - in[i_xn]; | ||
41 | + if(yi == y-1) | ||
42 | + out[i * 2 + 1] = in[i] - in[i_yn]; | ||
43 | + | ||
44 | + //otherwise use central differences | ||
45 | + if(xi > 0 && xi < x-1) | ||
46 | + out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2; | ||
47 | + | ||
48 | + if(yi > 0 && yi < y-1) | ||
49 | + out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2; | ||
50 | + | ||
51 | + } | ||
52 | + | ||
53 | + template<typename T> | ||
54 | + void gpu_gradient_2d(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y){ | ||
55 | + | ||
56 | + //get the maximum number of threads per block for the CUDA device | ||
57 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | ||
58 | + dim3 threads(max_threads, 1); | ||
59 | + dim3 blocks(x/threads.x + 1 , y); | ||
60 | + | ||
61 | + | ||
62 | + //call the GPU kernel to determine the gradient | ||
63 | + gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y); | ||
64 | + | ||
65 | + } | ||
66 | + | ||
67 | + template<typename T> | ||
68 | + void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){ | ||
69 | + | ||
70 | + //get the number of pixels in the image | ||
71 | + unsigned int pixels = x * y; | ||
72 | + unsigned int bytes = pixels * sizeof(T); | ||
73 | + | ||
74 | + //allocate space on the GPU for the input image | ||
75 | + T* gpuIn; | ||
76 | + HANDLE_ERROR(cudaMalloc(&gpuIn, bytes)); | ||
77 | + | ||
78 | + //copy the image data to the GPU | ||
79 | + HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice)); | ||
80 | + | ||
81 | + //allocate space on the GPU for the output gradient image | ||
82 | + T* gpuOut; | ||
83 | + cudaMalloc(&gpuOut, bytes * 2); //the output image will have two channels (x, y) | ||
84 | + | ||
85 | + //call the GPU version of this function | ||
86 | + gpu_gradient_2d(gpuOut, gpuIn, x, y); | ||
87 | + | ||
88 | + //copy the results to the CPU | ||
89 | + cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost); | ||
90 | + | ||
91 | + //free allocated memory | ||
92 | + cudaFree(gpuOut); | ||
93 | + cudaFree(gpuIn); | ||
94 | + } | ||
95 | + | ||
96 | + } | ||
97 | +} | ||
98 | + | ||
99 | + | ||
100 | +#endif | ||
0 | \ No newline at end of file | 101 | \ No newline at end of file |