Commit cd67346534b8aea38810379218f23a2da95e591b
1 parent
01707489
add gradient to the math/filter directory
Showing
1 changed file
with
100 additions
and
0 deletions
Show diff stats
1 | +#ifndef STIM_CUDA_GRADIENT_H | |
2 | +#define STIM_CUDA_GRADIENT_H | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <cuda.h> | |
6 | +#include <stim/cuda/cudatools.h> | |
7 | + | |
8 | +namespace stim{ | |
9 | + namespace cuda{ | |
10 | + | |
11 | + template<typename T> | |
12 | + __global__ void gradient_2d(T* out, T* in, int x, int y){ | |
13 | + | |
14 | + | |
15 | + // calculate the 2D coordinates for this current thread. | |
16 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; | |
17 | + int yi = blockIdx.y * blockDim.y + threadIdx.y; | |
18 | + // convert 2D coordinates to 1D | |
19 | + int i = yi * x + xi; | |
20 | + | |
21 | + //return if the pixel is outside of the image | |
22 | + if(xi >= x || yi >= y) return; | |
23 | + | |
24 | + //calculate indices for the forward difference | |
25 | + int i_xp = yi * x + (xi + 1); | |
26 | + int i_yp = (yi + 1) * x + xi; | |
27 | + | |
28 | + //calculate indices for the backward difference | |
29 | + int i_xn = yi * x + (xi - 1); | |
30 | + int i_yn = (yi - 1) * x + xi; | |
31 | + | |
32 | + //use forward differences if a coordinate is zero | |
33 | + if(xi == 0) | |
34 | + out[i * 2 + 0] = in[i_xp] - in[i]; | |
35 | + if(yi == 0) | |
36 | + out[i * 2 + 1] = in[i_yp] - in[i]; | |
37 | + | |
38 | + //use backward differences if the coordinate is at the maximum edge | |
39 | + if(xi == x-1) | |
40 | + out[i * 2 + 0] = in[i] - in[i_xn]; | |
41 | + if(yi == y-1) | |
42 | + out[i * 2 + 1] = in[i] - in[i_yn]; | |
43 | + | |
44 | + //otherwise use central differences | |
45 | + if(xi > 0 && xi < x-1) | |
46 | + out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2; | |
47 | + | |
48 | + if(yi > 0 && yi < y-1) | |
49 | + out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2; | |
50 | + | |
51 | + } | |
52 | + | |
53 | + template<typename T> | |
54 | + void gpu_gradient_2d(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y){ | |
55 | + | |
56 | + //get the maximum number of threads per block for the CUDA device | |
57 | + unsigned int max_threads = stim::maxThreadsPerBlock(); | |
58 | + dim3 threads(max_threads, 1); | |
59 | + dim3 blocks(x/threads.x + 1 , y); | |
60 | + | |
61 | + | |
62 | + //call the GPU kernel to determine the gradient | |
63 | + gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y); | |
64 | + | |
65 | + } | |
66 | + | |
67 | + template<typename T> | |
68 | + void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){ | |
69 | + | |
70 | + //get the number of pixels in the image | |
71 | + unsigned int pixels = x * y; | |
72 | + unsigned int bytes = pixels * sizeof(T); | |
73 | + | |
74 | + //allocate space on the GPU for the input image | |
75 | + T* gpuIn; | |
76 | + HANDLE_ERROR(cudaMalloc(&gpuIn, bytes)); | |
77 | + | |
78 | + //copy the image data to the GPU | |
79 | + HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice)); | |
80 | + | |
81 | + //allocate space on the GPU for the output gradient image | |
82 | + T* gpuOut; | |
83 | + cudaMalloc(&gpuOut, bytes * 2); //the output image will have two channels (x, y) | |
84 | + | |
85 | + //call the GPU version of this function | |
86 | + gpu_gradient_2d(gpuOut, gpuIn, x, y); | |
87 | + | |
88 | + //copy the results to the CPU | |
89 | + cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost); | |
90 | + | |
91 | + //free allocated memory | |
92 | + cudaFree(gpuOut); | |
93 | + cudaFree(gpuIn); | |
94 | + } | |
95 | + | |
96 | + } | |
97 | +} | |
98 | + | |
99 | + | |
100 | +#endif | |
0 | 101 | \ No newline at end of file | ... | ... |