Blame view

stim/cuda/sharedmem.cuh 1.61 KB
13fe3c84   Laila Saadatifard   update the stimli...
1
2
3
4
5
6
7
  
  #ifndef STIM_CUDA_SHAREDMEM_H
  #define STIM_CUDA_SHAREDMEM_H
  
  namespace stim{
  	namespace cuda{
  
8e4f8364   David Mayerich   started a new opt...
8
  		// Copies values from texture memory to shared memory, optimizing threads
13fe3c84   Laila Saadatifard   update the stimli...
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  		template<typename T>
  		__device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
  										 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
  										 dim3 threadIdx, dim3 blockDim){
  
  			//calculate the number of iterations required for the copy
  			unsigned int xI, yI;
  			xI = X/blockDim.x + 1;				//number of iterations along X
  			yI = Y/blockDim.y + 1;				//number of iterations along Y
  
  			//for each iteration
  			for(unsigned int xi = 0; xi < xI; xi++){
  				for(unsigned int yi = 0; yi < yI; yi++){
  
  					//calculate the index into shared memory
  					unsigned int sx = xi * blockDim.x + threadIdx.x;
  					unsigned int sy = yi * blockDim.y + threadIdx.y;
  
  					//calculate the index into the texture
  					unsigned int tx = x + sx;
  					unsigned int ty = y + sy;
  
  					//perform the copy
  					if(sx < X && sy < Y)
  						dest[sy * X + sx] = tex2D<T>(src, tx, ty);
  				}
  			}
  		}
84eff8b1   Pavel Govyadinov   Merged only the n...
37
  
8e4f8364   David Mayerich   started a new opt...
38
39
40
41
42
43
44
45
46
47
48
49
50
  		// Copies values from global memory to shared memory, optimizing threads
  		template<typename T>
  		__device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){
  
  			size_t I = N / nt + 1;	//calculate the number of iterations required to make the copy
  			size_t xi = tid;							//initialize the source and destination index to the thread ID
  			for(size_t i = 0; i < I; i++){ 				//for each iteration
  				if(xi < N)								//if the index is within the copy region
  					dest[xi] = src[xi];					//perform the copy
  				xi += nt;
  			}
  		}
  
13fe3c84   Laila Saadatifard   update the stimli...
51
52
53
54
55
  		
  	}
  }
  
  
84eff8b1   Pavel Govyadinov   Merged only the n...
56
  #endif