codebase / stimlib

Browse Code »

Commit cd67346534b8aea38810379218f23a2da95e591b

Authored by David Mayerich 2017-02-13 12:41:47 -0600

1 parent 01707489

add gradient to the math/filter directory

Showing 1 changed file with 100 additions and 0 deletions Show diff stats

Inline Side-by-side

stim/math/filters/gradient.cuh 0 → 100644

Wrap text Show/Hide comments View file @cd67346

	1	+#ifndef STIM_CUDA_GRADIENT_H
	2	+#define STIM_CUDA_GRADIENT_H
	3	+
	4	+#include <iostream>
	5	+#include <cuda.h>
	6	+#include <stim/cuda/cudatools.h>
	7	+
	8	+namespace stim{
	9	+ namespace cuda{
	10	+
	11	+ template<typename T>
	12	+ __global__ void gradient_2d(T* out, T* in, int x, int y){
	13	+
	14	+
	15	+ // calculate the 2D coordinates for this current thread.
	16	+ int xi = blockIdx.x * blockDim.x + threadIdx.x;
	17	+ int yi = blockIdx.y * blockDim.y + threadIdx.y;
	18	+ // convert 2D coordinates to 1D
	19	+ int i = yi * x + xi;
	20	+
	21	+ //return if the pixel is outside of the image
	22	+ if(xi >= x \|\| yi >= y) return;
	23	+
	24	+ //calculate indices for the forward difference
	25	+ int i_xp = yi * x + (xi + 1);
	26	+ int i_yp = (yi + 1) * x + xi;
	27	+
	28	+ //calculate indices for the backward difference
	29	+ int i_xn = yi * x + (xi - 1);
	30	+ int i_yn = (yi - 1) * x + xi;
	31	+
	32	+ //use forward differences if a coordinate is zero
	33	+ if(xi == 0)
	34	+ out[i * 2 + 0] = in[i_xp] - in[i];
	35	+ if(yi == 0)
	36	+ out[i * 2 + 1] = in[i_yp] - in[i];
	37	+
	38	+ //use backward differences if the coordinate is at the maximum edge
	39	+ if(xi == x-1)
	40	+ out[i * 2 + 0] = in[i] - in[i_xn];
	41	+ if(yi == y-1)
	42	+ out[i * 2 + 1] = in[i] - in[i_yn];
	43	+
	44	+ //otherwise use central differences
	45	+ if(xi > 0 && xi < x-1)
	46	+ out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2;
	47	+
	48	+ if(yi > 0 && yi < y-1)
	49	+ out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2;
	50	+
	51	+ }
	52	+
	53	+ template<typename T>
	54	+ void gpu_gradient_2d(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y){
	55	+
	56	+ //get the maximum number of threads per block for the CUDA device
	57	+ unsigned int max_threads = stim::maxThreadsPerBlock();
	58	+ dim3 threads(max_threads, 1);
	59	+ dim3 blocks(x/threads.x + 1 , y);
	60	+
	61	+
	62	+ //call the GPU kernel to determine the gradient
	63	+ gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y);
	64	+
	65	+ }
	66	+
	67	+ template<typename T>
	68	+ void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){
	69	+
	70	+ //get the number of pixels in the image
	71	+ unsigned int pixels = x * y;
	72	+ unsigned int bytes = pixels * sizeof(T);
	73	+
	74	+ //allocate space on the GPU for the input image
	75	+ T* gpuIn;
	76	+ HANDLE_ERROR(cudaMalloc(&gpuIn, bytes));
	77	+
	78	+ //copy the image data to the GPU
	79	+ HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice));
	80	+
	81	+ //allocate space on the GPU for the output gradient image
	82	+ T* gpuOut;
	83	+ cudaMalloc(&gpuOut, bytes * 2); //the output image will have two channels (x, y)
	84	+
	85	+ //call the GPU version of this function
	86	+ gpu_gradient_2d(gpuOut, gpuIn, x, y);
	87	+
	88	+ //copy the results to the CPU
	89	+ cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost);
	90	+
	91	+ //free allocated memory
	92	+ cudaFree(gpuOut);
	93	+ cudaFree(gpuIn);
	94	+ }
	95	+
	96	+ }
	97	+}
	98	+
	99	+
	100	+#endif
0	101	\ No newline at end of file
...	...