From cd67346534b8aea38810379218f23a2da95e591b Mon Sep 17 00:00:00 2001
From: David <mayerich@uh.edu>
Date: Mon, 13 Feb 2017 12:41:47 -0600
Subject: [PATCH] add gradient to the math/filter directory

---
 stim/math/filters/gradient.cuh | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+), 0 deletions(-)
 create mode 100644 stim/math/filters/gradient.cuh
diff --git a/stim/math/filters/gradient.cuh b/stim/math/filters/gradient.cuh
new file mode 100644
index 0000000..88dd259
--- /dev/null
+++ b/stim/math/filters/gradient.cuh
@@ -0,0 +1,100 @@
+#ifndef STIM_CUDA_GRADIENT_H
+#define STIM_CUDA_GRADIENT_H
+
+#include <iostream>
+#include <cuda.h>
+#include <stim/cuda/cudatools.h>
+
+namespace stim{
+	namespace cuda{
+
+		template<typename T>
+		__global__ void gradient_2d(T* out, T* in, int x, int y){
+
+			
+			// calculate the 2D coordinates for this current thread.
+			int xi = blockIdx.x * blockDim.x + threadIdx.x;
+			int yi = blockIdx.y * blockDim.y + threadIdx.y;
+			// convert 2D coordinates to 1D
+			int i = yi * x + xi;
+
+			//return if the pixel is outside of the image
+			if(xi >= x || yi >= y) return;
+
+			//calculate indices for the forward difference
+			int i_xp = yi * x + (xi + 1);
+			int i_yp = (yi + 1) * x + xi;
+
+			//calculate indices for the backward difference
+			int i_xn = yi * x + (xi - 1);
+			int i_yn = (yi - 1) * x + xi;
+
+			//use forward differences if a coordinate is zero
+			if(xi == 0)
+				out[i * 2 + 0] = in[i_xp] - in[i];
+			if(yi == 0)
+				out[i * 2 + 1] = in[i_yp] - in[i];
+
+			//use backward differences if the coordinate is at the maximum edge
+			if(xi == x-1)
+				out[i * 2 + 0] = in[i] - in[i_xn];
+			if(yi == y-1)
+				out[i * 2 + 1] = in[i] - in[i_yn];
+
+			//otherwise use central differences
+			if(xi > 0 && xi < x-1)
+				out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2;
+
+			if(yi > 0 && yi < y-1)
+				out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2;
+
+		}
+
+		template<typename T>
+		void gpu_gradient_2d(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y){
+			
+			//get the maximum number of threads per block for the CUDA device
+			unsigned int max_threads = stim::maxThreadsPerBlock();
+			dim3 threads(max_threads, 1);
+			dim3 blocks(x/threads.x + 1 , y);
+			
+
+			//call the GPU kernel to determine the gradient
+			gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y);
+
+		}
+
+		template<typename T>
+		void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){
+
+			//get the number of pixels in the image
+			unsigned int pixels = x * y;
+			unsigned int bytes = pixels * sizeof(T);
+
+			//allocate space on the GPU for the input image
+			T* gpuIn;
+			HANDLE_ERROR(cudaMalloc(&gpuIn, bytes));
+
+			//copy the image data to the GPU
+			HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice));
+
+			//allocate space on the GPU for the output gradient image
+			T* gpuOut;
+			cudaMalloc(&gpuOut, bytes * 2);		//the output image will have two channels (x, y)
+
+			//call the GPU version of this function
+			gpu_gradient_2d(gpuOut, gpuIn, x, y);	
+
+			//copy the results to the CPU
+			cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost);
+
+			//free allocated memory
+			cudaFree(gpuOut);
+			cudaFree(gpuIn);
+		}
+
+	}
+}
+
+
+#endif
\ No newline at end of file
--
libgit2 0.21.4