sharedmem.cuh 1.07 KB

Edit Raw Blame History


#ifndef STIM_CUDA_SHAREDMEM_H
#define STIM_CUDA_SHAREDMEM_H

namespace stim{
	namespace cuda{

		// Copies values from global memory to shared memory, optimizing threads
		template<typename T>
		__device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
										 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
										 dim3 threadIdx, dim3 blockDim){

			//calculate the number of iterations required for the copy
			unsigned int xI, yI;
			xI = X/blockDim.x + 1;				//number of iterations along X
			yI = Y/blockDim.y + 1;				//number of iterations along Y

			//for each iteration
			for(unsigned int xi = 0; xi < xI; xi++){
				for(unsigned int yi = 0; yi < yI; yi++){

					//calculate the index into shared memory
					unsigned int sx = xi * blockDim.x + threadIdx.x;
					unsigned int sy = yi * blockDim.y + threadIdx.y;

					//calculate the index into the texture
					unsigned int tx = x + sx;
					unsigned int ty = y + sy;

					//perform the copy
					if(sx < X && sy < Y)
						dest[sy * X + sx] = tex2D<T>(src, tx, ty);
				}
			}
		}


	}
}


#endif