Blame view

stim/cuda/crop.cuh 1.8 KB
4252d827   David Mayerich   ivote3 fixes and ...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  
  template<typename T>
  __global__ void cuda_crop2d(T* dest, T* src, size_t sx, size_t sy, size_t x, size_t y, size_t dx, size_t dy){
  
  	size_t xi = blockIdx.x * blockDim.x + threadIdx.x;				//calculate the current working position within the destination image
  	size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  	if(xi >= dx || yi >= dy) return;									//if this thread is outside of the destination image, return
  
  	size_t di = yi * dx + xi;										//calculate the 1D index into the destination image
  	size_t si = (y + yi) * sx + (x + xi);							//calculate the 1D index into the source image
  
  	dest[di] = src[si];												//copy the corresponding source pixel to the destination image
  }
  
  /// Crops a 2D image composed of elements of type T
  /// @param dest is a device pointer to memory of size dx*dy that will store the cropped image
  /// @param src is a device pointer to memory of size sx*sy that stores the original image
  /// @param sx is the size of the source image along x
  /// @param sy is the size of the source image along y
  /// @param x is the x-coordinate of the start position of the cropped region within the source image
  /// @param y is the y-coordinate of the start position of the cropped region within the source image
  /// @param dx is the size of the destination image along x
  /// @param dy is the size of the destination image along y
  template<typename T>
  void gpu_crop2d(T* dest, T* src, size_t sx, size_t sy, size_t x, size_t y, size_t dx, size_t dy){
  	int max_threads = stim::maxThreadsPerBlock();												//get the maximum number of threads per block for the CUDA device
  	dim3 threads( sqrt(max_threads), sqrt(max_threads) );
  	dim3 blocks( dx / sqrt(threads.x) + 1, dy / sqrt(threads.y) + 1);							//calculate the optimal number of blocks
  	cuda_crop2d<T> <<< blocks, threads >>>(dest, src, sx, sy, x, y, dx, dy);
  }