codebase / stimlib

Browse Code »

Commit cd67346534b8aea38810379218f23a2da95e591b

Authored by David Mayerich 2017-02-13 12:41:47 -0600

1 parent 01707489

add gradient to the math/filter directory

Showing 1 changed file with 100 additions and 0 deletions Show diff stats

Inline Side-by-side

stim/math/filters/gradient.cuh 0 → 100644

Show/Hide comments View file @cd67346

		1	+#ifndef STIM_CUDA_GRADIENT_H
		2	+#define STIM_CUDA_GRADIENT_H
		3	+
		4	+#include <iostream>
		5	+#include <cuda.h>
		6	+#include <stim/cuda/cudatools.h>
		7	+
		8	+namespace stim{
		9	+ namespace cuda{
		10	+
		11	+ template<typename T>
		12	+ __global__ void gradient_2d(T* out, T* in, int x, int y){
		13	+
		14	+
		15	+ // calculate the 2D coordinates for this current thread.
		16	+ int xi = blockIdx.x * blockDim.x + threadIdx.x;
		17	+ int yi = blockIdx.y * blockDim.y + threadIdx.y;
		18	+ // convert 2D coordinates to 1D
		19	+ int i = yi * x + xi;
		20	+
		21	+ //return if the pixel is outside of the image
		22	+ if(xi >= x \|\| yi >= y) return;
		23	+
		24	+ //calculate indices for the forward difference
		25	+ int i_xp = yi * x + (xi + 1);
		26	+ int i_yp = (yi + 1) * x + xi;
		27	+
		28	+ //calculate indices for the backward difference
		29	+ int i_xn = yi * x + (xi - 1);
		30	+ int i_yn = (yi - 1) * x + xi;
		31	+
		32	+ //use forward differences if a coordinate is zero
		33	+ if(xi == 0)
		34	+ out[i * 2 + 0] = in[i_xp] - in[i];
		35	+ if(yi == 0)
		36	+ out[i * 2 + 1] = in[i_yp] - in[i];
		37	+
		38	+ //use backward differences if the coordinate is at the maximum edge
		39	+ if(xi == x-1)
		40	+ out[i * 2 + 0] = in[i] - in[i_xn];
		41	+ if(yi == y-1)
		42	+ out[i * 2 + 1] = in[i] - in[i_yn];
		43	+
		44	+ //otherwise use central differences
		45	+ if(xi > 0 && xi < x-1)
		46	+ out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2;
		47	+
		48	+ if(yi > 0 && yi < y-1)
		49	+ out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2;
		50	+
		51	+ }
		52	+
		53	+ template<typename T>
		54	+ void gpu_gradient_2d(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y){
		55	+
		56	+ //get the maximum number of threads per block for the CUDA device
		57	+ unsigned int max_threads = stim::maxThreadsPerBlock();
		58	+ dim3 threads(max_threads, 1);
		59	+ dim3 blocks(x/threads.x + 1 , y);
		60	+
		61	+
		62	+ //call the GPU kernel to determine the gradient
		63	+ gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y);
		64	+
		65	+ }
		66	+
		67	+ template<typename T>
		68	+ void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){
		69	+
		70	+ //get the number of pixels in the image
		71	+ unsigned int pixels = x * y;
		72	+ unsigned int bytes = pixels * sizeof(T);
		73	+
		74	+ //allocate space on the GPU for the input image
		75	+ T* gpuIn;
		76	+ HANDLE_ERROR(cudaMalloc(&gpuIn, bytes));
		77	+
		78	+ //copy the image data to the GPU
		79	+ HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice));
		80	+
		81	+ //allocate space on the GPU for the output gradient image
		82	+ T* gpuOut;
		83	+ cudaMalloc(&gpuOut, bytes * 2); //the output image will have two channels (x, y)
		84	+
		85	+ //call the GPU version of this function
		86	+ gpu_gradient_2d(gpuOut, gpuIn, x, y);
		87	+
		88	+ //copy the results to the CPU
		89	+ cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost);
		90	+
		91	+ //free allocated memory
		92	+ cudaFree(gpuOut);
		93	+ cudaFree(gpuIn);
		94	+ }
		95	+
		96	+ }
		97	+}
		98	+
		99	+
		100	+#endif
0	\ No newline at end of file	101	\ No newline at end of file