Blame view

stim/cuda/templates/gradient.cuh 2.64 KB
13fe3c84   Laila Saadatifard   update the stimli...
1
2
3
4
5
  #ifndef STIM_CUDA_GRADIENT_H
  #define STIM_CUDA_GRADIENT_H
  
  #include <iostream>
  #include <cuda.h>
96f9b10f   Laila Saadatifard   change the header...
6
  #include <stim/cuda/cudatools.h>
13fe3c84   Laila Saadatifard   update the stimli...
7
8
9
10
11
  
  namespace stim{
  	namespace cuda{
  
  		template<typename T>
84ca9bba   Laila Saadatifard   fix some bugs in ...
12
  		__global__ void gradient_2d(T* out, T* in, int x, int y){
13fe3c84   Laila Saadatifard   update the stimli...
13
  
bf731970   Laila Saadatifard   fix some bugs in ...
14
15
16
17
18
19
  			
  			// calculate the 2D coordinates for this current thread.
  			int xi = blockIdx.x * blockDim.x + threadIdx.x;
  			int yi = blockIdx.y * blockDim.y + threadIdx.y;
  			// convert 2D coordinates to 1D
  			int i = yi * x + xi;
13fe3c84   Laila Saadatifard   update the stimli...
20
21
22
23
24
25
26
27
  
  			//return if the pixel is outside of the image
  			if(xi >= x || yi >= y) return;
  
  			//calculate indices for the forward difference
  			int i_xp = yi * x + (xi + 1);
  			int i_yp = (yi + 1) * x + xi;
  
bf731970   Laila Saadatifard   fix some bugs in ...
28
29
30
31
  			//calculate indices for the backward difference
  			int i_xn = yi * x + (xi - 1);
  			int i_yn = (yi - 1) * x + xi;
  
13fe3c84   Laila Saadatifard   update the stimli...
32
33
34
35
36
37
  			//use forward differences if a coordinate is zero
  			if(xi == 0)
  				out[i * 2 + 0] = in[i_xp] - in[i];
  			if(yi == 0)
  				out[i * 2 + 1] = in[i_yp] - in[i];
  
13fe3c84   Laila Saadatifard   update the stimli...
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
  			//use backward differences if the coordinate is at the maximum edge
  			if(xi == x-1)
  				out[i * 2 + 0] = in[i] - in[i_xn];
  			if(yi == y-1)
  				out[i * 2 + 1] = in[i] - in[i_yn];
  
  			//otherwise use central differences
  			if(xi > 0 && xi < x-1)
  				out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2;
  
  			if(yi > 0 && yi < y-1)
  				out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2;
  
  		}
  
  		template<typename T>
13fe3c84   Laila Saadatifard   update the stimli...
54
55
56
57
58
  		void gpu_gradient_2d(T* gpuGrad, T* gpuI, unsigned int x, unsigned int y){
  
  			//get the number of pixels in the image
  			unsigned int pixels = x * y;
  			
13fe3c84   Laila Saadatifard   update the stimli...
59
  			//get the maximum number of threads per block for the CUDA device
bf731970   Laila Saadatifard   fix some bugs in ...
60
61
62
63
  			unsigned int max_threads = stim::maxThreadsPerBlock();
  			dim3 threads(max_threads, 1);
  			dim3 blocks(x/threads.x + 1 , y);
  			
13fe3c84   Laila Saadatifard   update the stimli...
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  
  			//call the GPU kernel to determine the gradient
  			gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y);
  
  		}
  
  		template<typename T>
  		void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){
  
  			//get the number of pixels in the image
  			unsigned int pixels = x * y;
  			unsigned int bytes = pixels * sizeof(T);
  
  			//allocate space on the GPU for the input image
  			T* gpuIn;
  			HANDLE_ERROR(cudaMalloc(&gpuIn, bytes));
  
  			//copy the image data to the GPU
  			HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice));
  
  			//allocate space on the GPU for the output gradient image
  			T* gpuOut;
  			cudaMalloc(&gpuOut, bytes * 2);		//the output image will have two channels (x, y)
  
  			//call the GPU version of this function
  			gpu_gradient_2d(gpuOut, gpuIn, x, y);	
  
  			//copy the results to the CPU
  			cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost);
  
  			//free allocated memory
  			cudaFree(gpuOut);
bf731970   Laila Saadatifard   fix some bugs in ...
96
  			cudaFree(gpuIn);
13fe3c84   Laila Saadatifard   update the stimli...
97
98
99
100
101
102
103
  		}
  
  	}
  }
  
  
  #endif