Commit cd67346534b8aea38810379218f23a2da95e591b

Authored by David Mayerich
1 parent 01707489

add gradient to the math/filter directory

Showing 1 changed file with 100 additions and 0 deletions   Show diff stats
stim/math/filters/gradient.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_GRADIENT_H
  2 +#define STIM_CUDA_GRADIENT_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +
  8 +namespace stim{
  9 + namespace cuda{
  10 +
  11 + template<typename T>
  12 + __global__ void gradient_2d(T* out, T* in, int x, int y){
  13 +
  14 +
  15 + // calculate the 2D coordinates for this current thread.
  16 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  17 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
  18 + // convert 2D coordinates to 1D
  19 + int i = yi * x + xi;
  20 +
  21 + //return if the pixel is outside of the image
  22 + if(xi >= x || yi >= y) return;
  23 +
  24 + //calculate indices for the forward difference
  25 + int i_xp = yi * x + (xi + 1);
  26 + int i_yp = (yi + 1) * x + xi;
  27 +
  28 + //calculate indices for the backward difference
  29 + int i_xn = yi * x + (xi - 1);
  30 + int i_yn = (yi - 1) * x + xi;
  31 +
  32 + //use forward differences if a coordinate is zero
  33 + if(xi == 0)
  34 + out[i * 2 + 0] = in[i_xp] - in[i];
  35 + if(yi == 0)
  36 + out[i * 2 + 1] = in[i_yp] - in[i];
  37 +
  38 + //use backward differences if the coordinate is at the maximum edge
  39 + if(xi == x-1)
  40 + out[i * 2 + 0] = in[i] - in[i_xn];
  41 + if(yi == y-1)
  42 + out[i * 2 + 1] = in[i] - in[i_yn];
  43 +
  44 + //otherwise use central differences
  45 + if(xi > 0 && xi < x-1)
  46 + out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2;
  47 +
  48 + if(yi > 0 && yi < y-1)
  49 + out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2;
  50 +
  51 + }
  52 +
  53 + template<typename T>
  54 + void gpu_gradient_2d(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y){
  55 +
  56 + //get the maximum number of threads per block for the CUDA device
  57 + unsigned int max_threads = stim::maxThreadsPerBlock();
  58 + dim3 threads(max_threads, 1);
  59 + dim3 blocks(x/threads.x + 1 , y);
  60 +
  61 +
  62 + //call the GPU kernel to determine the gradient
  63 + gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y);
  64 +
  65 + }
  66 +
  67 + template<typename T>
  68 + void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){
  69 +
  70 + //get the number of pixels in the image
  71 + unsigned int pixels = x * y;
  72 + unsigned int bytes = pixels * sizeof(T);
  73 +
  74 + //allocate space on the GPU for the input image
  75 + T* gpuIn;
  76 + HANDLE_ERROR(cudaMalloc(&gpuIn, bytes));
  77 +
  78 + //copy the image data to the GPU
  79 + HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice));
  80 +
  81 + //allocate space on the GPU for the output gradient image
  82 + T* gpuOut;
  83 + cudaMalloc(&gpuOut, bytes * 2); //the output image will have two channels (x, y)
  84 +
  85 + //call the GPU version of this function
  86 + gpu_gradient_2d(gpuOut, gpuIn, x, y);
  87 +
  88 + //copy the results to the CPU
  89 + cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost);
  90 +
  91 + //free allocated memory
  92 + cudaFree(gpuOut);
  93 + cudaFree(gpuIn);
  94 + }
  95 +
  96 + }
  97 +}
  98 +
  99 +
  100 +#endif
0 101 \ No newline at end of file
... ...