re_sample.cuh 2.79 KB
#ifndef STIM_CUDA_RE_SAMPLE_H
#define STIM_CUDA_RE_SAMPLE_H

#include <iostream>
#include <cuda.h>
#include <stim/cuda/cudatools.h>
#include <stim/cuda/templates/gaussian_blur.cuh>

namespace stim{
	namespace cuda{

		template<typename T>
		__global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){

			unsigned int sigma_ds = 1/resize;
			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
			
			
			// calculate the 2D coordinates for this current thread.
			int xi = blockIdx.x * blockDim.x + threadIdx.x;
			int yi = blockIdx.y;
			// convert 2D coordinates to 1D
			int i = yi * x + xi;
			
			if(xi< x && yi< y){
				if(xi%sigma_ds==0){
					if(yi%sigma_ds==0){
						gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
					}
				}
				else gpuI[i] = 0;

				//int x_org = xi * sigma_ds ;
				//int y_org = yi * sigma_ds ;
				//int i_org = y_org * x + x_org;
				//gpuI[i] = gpuI0[i_org];
			}

		}


		/// Applies a Gaussian blur to a 2D image stored on the GPU
		template<typename T>
		void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){

			
			//unsigned int sigma_ds = 1/resize;
			//unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
			//unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
			
			//get the number of pixels in the image
			//unsigned int pixels_ds = x_ds * y_ds;
			
			unsigned int max_threads = stim::maxThreadsPerBlock();
			dim3 threads(max_threads, 1);
			dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
			
			//stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
			
			//resample the image
			cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);

		}

		/// Applies a Gaussian blur to a 2D image stored on the CPU
		template<typename T>
		void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){

			//get the number of pixels in the image
			unsigned int pixels = x*y;
			unsigned int bytes = sizeof(T) * pixels;
			
			unsigned int sigma_ds = 1/resize;
			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
			unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
			


			//allocate space on the GPU for the original image
			T* gpuI0;
			cudaMalloc(&gpuI0, bytes_ds);
			
			
			//copy the image data to the GPU
			cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);

			//allocate space on the GPU for the down sampled image
			T* gpuI;
			cudaMalloc(&gpuI, bytes);

			//run the GPU-based version of the algorithm
			gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);

			//copy the image data to the GPU
			cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);

			cudaFree(gpuI0);
			cudeFree(gpuI);
		}
	
	}
}

#endif