Blame view

stim/cuda/sharedmem.cuh 1.98 KB
13fe3c84   Laila Saadatifard   update the stimli...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  
  #ifndef STIM_CUDA_SHAREDMEM_H
  #define STIM_CUDA_SHAREDMEM_H
  
  namespace stim{
  	namespace cuda{
  
  		// Copies values from global memory to shared memory, optimizing threads
  		template<typename T>
  		__device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
  										 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
  										 dim3 threadIdx, dim3 blockDim){
  
  			//calculate the number of iterations required for the copy
  			unsigned int xI, yI;
  			xI = X/blockDim.x + 1;				//number of iterations along X
  			yI = Y/blockDim.y + 1;				//number of iterations along Y
  
  			//for each iteration
  			for(unsigned int xi = 0; xi < xI; xi++){
  				for(unsigned int yi = 0; yi < yI; yi++){
  
  					//calculate the index into shared memory
  					unsigned int sx = xi * blockDim.x + threadIdx.x;
  					unsigned int sy = yi * blockDim.y + threadIdx.y;
  
  					//calculate the index into the texture
  					unsigned int tx = x + sx;
  					unsigned int ty = y + sy;
  
  					//perform the copy
  					if(sx < X && sy < Y)
  						dest[sy * X + sx] = tex2D<T>(src, tx, ty);
  				}
  			}
  		}
84eff8b1   Pavel Govyadinov   Merged only the n...
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  
  		template<typename T, typename D>
  		__device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
  										 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
  										 dim3 threadIdx, dim3 blockDim){
  
  			//calculate the number of iterations required for the copy
  			unsigned int xI, yI;
  			xI = X/blockDim.x + 1;				//number of iterations along X
  			yI = Y/blockDim.y + 1;				//number of iterations along Y
  
  			//for each iteration
  			for(unsigned int xi = 0; xi < xI; xi++){
  				for(unsigned int yi = 0; yi < yI; yi++){
  
  					//calculate the index into shared memory
  					unsigned int sx = xi * blockDim.x + threadIdx.x;
  					unsigned int sy = yi * blockDim.y + threadIdx.y;
  
  					//calculate the index into the texture
  					unsigned int tx = x + sx;
  					unsigned int ty = y + sy;
  
  					//perform the copy
  					if(sx < X && sy < Y)
59781ee3   Pavel Govyadinov   fixed a stask bug...
62
  						dest[sy * X + sx] = abs(255 - tex2D<D>(src, tx, ty));
84eff8b1   Pavel Govyadinov   Merged only the n...
63
64
65
  				}
  			}
  		}
13fe3c84   Laila Saadatifard   update the stimli...
66
67
68
69
70
  		
  	}
  }
  
  
84eff8b1   Pavel Govyadinov   Merged only the n...
71
  #endif