Blame view

stim/cuda/templates/gradient.cuh 2.47 KB
13fe3c84   Laila Saadatifard   update the stimli...
1
2
3
4
5
  #ifndef STIM_CUDA_GRADIENT_H
  #define STIM_CUDA_GRADIENT_H
  
  #include <iostream>
  #include <cuda.h>
96f9b10f   Laila Saadatifard   change the header...
6
  #include <stim/cuda/cudatools.h>
13fe3c84   Laila Saadatifard   update the stimli...
7
8
9
10
11
  
  namespace stim{
  	namespace cuda{
  
  		template<typename T>
276f9b23   David Mayerich   forced matching t...
12
  		__global__ void gradient_2d(T* out, T* in, size_t x, size_t y){
13fe3c84   Laila Saadatifard   update the stimli...
13
  
bf731970   Laila Saadatifard   fix some bugs in ...
14
15
  			
  			// calculate the 2D coordinates for this current thread.
276f9b23   David Mayerich   forced matching t...
16
17
  			size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
  			size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
bf731970   Laila Saadatifard   fix some bugs in ...
18
  			// convert 2D coordinates to 1D
276f9b23   David Mayerich   forced matching t...
19
  			size_t i = yi * x + xi;
13fe3c84   Laila Saadatifard   update the stimli...
20
21
22
23
24
  
  			//return if the pixel is outside of the image
  			if(xi >= x || yi >= y) return;
  
  			//calculate indices for the forward difference
276f9b23   David Mayerich   forced matching t...
25
26
  			size_t i_xp = yi * x + (xi + 1);
  			size_t i_yp = (yi + 1) * x + xi;
13fe3c84   Laila Saadatifard   update the stimli...
27
  
bf731970   Laila Saadatifard   fix some bugs in ...
28
  			//calculate indices for the backward difference
276f9b23   David Mayerich   forced matching t...
29
30
  			size_t i_xn = yi * x + (xi - 1);
  			size_t i_yn = (yi - 1) * x + xi;
bf731970   Laila Saadatifard   fix some bugs in ...
31
  
13fe3c84   Laila Saadatifard   update the stimli...
32
33
34
  			//use forward differences if a coordinate is zero
  			if(xi == 0)
  				out[i * 2 + 0] = in[i_xp] - in[i];
800ff264   David Mayerich   simplified condit...
35
  			else if (xi == x - 1)
13fe3c84   Laila Saadatifard   update the stimli...
36
  				out[i * 2 + 0] = in[i] - in[i_xn];
800ff264   David Mayerich   simplified condit...
37
  			else
13fe3c84   Laila Saadatifard   update the stimli...
38
39
  				out[i * 2 + 0] = (in[i_xp] - in[i_xn]) / 2;
  
800ff264   David Mayerich   simplified condit...
40
41
42
43
44
  			if(yi == 0)
  				out[i * 2 + 1] = in[i_yp] - in[i];
  			else if(yi == y-1)
  				out[i * 2 + 1] = in[i] - in[i_yn];
  			else
13fe3c84   Laila Saadatifard   update the stimli...
45
46
47
48
49
  				out[i * 2 + 1] = (in[i_yp] - in[i_yn]) / 2;
  
  		}
  
  		template<typename T>
276f9b23   David Mayerich   forced matching t...
50
  		void gpu_gradient_2d(T* gpuGrad, T* gpuI, size_t x, size_t y){
13fe3c84   Laila Saadatifard   update the stimli...
51
  			
13fe3c84   Laila Saadatifard   update the stimli...
52
  			//get the maximum number of threads per block for the CUDA device
bf731970   Laila Saadatifard   fix some bugs in ...
53
54
  			unsigned int max_threads = stim::maxThreadsPerBlock();
  			dim3 threads(max_threads, 1);
276f9b23   David Mayerich   forced matching t...
55
  			dim3 blocks((unsigned int)(x/threads.x) + 1 , (unsigned int)y);
bf731970   Laila Saadatifard   fix some bugs in ...
56
  			
13fe3c84   Laila Saadatifard   update the stimli...
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  
  			//call the GPU kernel to determine the gradient
  			gradient_2d<T> <<< blocks, threads >>>(gpuGrad, gpuI, x, y);
  
  		}
  
  		template<typename T>
  		void cpu_gradient_2d(T* out, T* in, unsigned int x, unsigned int y){
  
  			//get the number of pixels in the image
  			unsigned int pixels = x * y;
  			unsigned int bytes = pixels * sizeof(T);
  
  			//allocate space on the GPU for the input image
  			T* gpuIn;
  			HANDLE_ERROR(cudaMalloc(&gpuIn, bytes));
  
  			//copy the image data to the GPU
  			HANDLE_ERROR(cudaMemcpy(gpuIn, in, bytes, cudaMemcpyHostToDevice));
  
  			//allocate space on the GPU for the output gradient image
  			T* gpuOut;
  			cudaMalloc(&gpuOut, bytes * 2);		//the output image will have two channels (x, y)
  
  			//call the GPU version of this function
  			gpu_gradient_2d(gpuOut, gpuIn, x, y);	
  
  			//copy the results to the CPU
  			cudaMemcpy(out, gpuOut, bytes * 2, cudaMemcpyDeviceToHost);
  
  			//free allocated memory
  			cudaFree(gpuOut);
bf731970   Laila Saadatifard   fix some bugs in ...
89
  			cudaFree(gpuIn);
13fe3c84   Laila Saadatifard   update the stimli...
90
91
92
93
94
95
96
  		}
  
  	}
  }
  
  
  #endif