Blame view

stim/cuda/sharedmem.cuh 1.07 KB
13fe3c84   Laila Saadatifard   update the stimli...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  
  #ifndef STIM_CUDA_SHAREDMEM_H
  #define STIM_CUDA_SHAREDMEM_H
  
  namespace stim{
  	namespace cuda{
  
  		// Copies values from global memory to shared memory, optimizing threads
  		template<typename T>
  		__device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
  										 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
  										 dim3 threadIdx, dim3 blockDim){
  
  			//calculate the number of iterations required for the copy
  			unsigned int xI, yI;
  			xI = X/blockDim.x + 1;				//number of iterations along X
  			yI = Y/blockDim.y + 1;				//number of iterations along Y
  
  			//for each iteration
  			for(unsigned int xi = 0; xi < xI; xi++){
  				for(unsigned int yi = 0; yi < yI; yi++){
  
  					//calculate the index into shared memory
  					unsigned int sx = xi * blockDim.x + threadIdx.x;
  					unsigned int sy = yi * blockDim.y + threadIdx.y;
  
  					//calculate the index into the texture
  					unsigned int tx = x + sx;
  					unsigned int ty = y + sy;
  
  					//perform the copy
  					if(sx < X && sy < Y)
  						dest[sy * X + sx] = tex2D<T>(src, tx, ty);
  				}
  			}
  		}
84eff8b1   Pavel Govyadinov   Merged only the n...
37
  
13fe3c84   Laila Saadatifard   update the stimli...
38
39
40
41
42
  		
  	}
  }
  
  
84eff8b1   Pavel Govyadinov   Merged only the n...
43
  #endif