Blame view

stim/cuda/ivote/re_sample.cuh 2.79 KB
3f0de7dd   Laila Saadatifard   upload the vote a...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  #ifndef STIM_CUDA_RE_SAMPLE_H
  #define STIM_CUDA_RE_SAMPLE_H
  
  #include <iostream>
  #include <cuda.h>
  #include <stim/cuda/cudatools.h>
  #include <stim/cuda/templates/gaussian_blur.cuh>
  
  namespace stim{
  	namespace cuda{
  
  		template<typename T>
  		__global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
  
  			unsigned int sigma_ds = 1/resize;
  			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  			
  			
  			// calculate the 2D coordinates for this current thread.
  			int xi = blockIdx.x * blockDim.x + threadIdx.x;
  			int yi = blockIdx.y;
  			// convert 2D coordinates to 1D
  			int i = yi * x + xi;
  			
  			if(xi< x && yi< y){
  				if(xi%sigma_ds==0){
  					if(yi%sigma_ds==0){
  						gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
  					}
  				}
  				else gpuI[i] = 0;
  
  				//int x_org = xi * sigma_ds ;
  				//int y_org = yi * sigma_ds ;
  				//int i_org = y_org * x + x_org;
  				//gpuI[i] = gpuI0[i_org];
  			}
  
  		}
  
  
  		/// Applies a Gaussian blur to a 2D image stored on the GPU
  		template<typename T>
  		void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
  
  			
  			//unsigned int sigma_ds = 1/resize;
  			//unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  			//unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  			
  			//get the number of pixels in the image
  			//unsigned int pixels_ds = x_ds * y_ds;
  			
  			unsigned int max_threads = stim::maxThreadsPerBlock();
  			dim3 threads(max_threads, 1);
  			dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
  			
  			//stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
  			
  			//resample the image
  			cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
  
  		}
  
  		/// Applies a Gaussian blur to a 2D image stored on the CPU
  		template<typename T>
  		void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){
  
  			//get the number of pixels in the image
  			unsigned int pixels = x*y;
  			unsigned int bytes = sizeof(T) * pixels;
  			
  			unsigned int sigma_ds = 1/resize;
  			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  			unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
  			
  
  
  			//allocate space on the GPU for the original image
  			T* gpuI0;
  			cudaMalloc(&gpuI0, bytes_ds);
  			
  			
  			//copy the image data to the GPU
  			cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);
  
  			//allocate space on the GPU for the down sampled image
  			T* gpuI;
  			cudaMalloc(&gpuI, bytes);
  
  			//run the GPU-based version of the algorithm
  			gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);
  
  			//copy the image data to the GPU
  			cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
  
  			cudaFree(gpuI0);
  			cudeFree(gpuI);
  		}
  	
  	}
  }
  
  #endif