Commit 96f9b10f641f32443600d9789e6d244a285b564f

Authored by Laila Saadatifard
1 parent 5cc0976c

change the header files to be compatible with the new organized stim/cuda direct…

…ory , and add the stim/cude/ivote subdirectory that includes the ivote related functions
stim/cuda/arraymath/array_add.cuh
... ... @@ -3,8 +3,7 @@
3 3  
4 4 #include <iostream>
5 5 #include <cuda.h>
6   -#include <stim/cuda/devices.h>
7   -#include <stim/cuda/error.h>
  6 +#include <stim/cuda/cudatools.h>
8 7  
9 8 namespace stim{
10 9 namespace cuda{
... ...
stim/cuda/arraymath/array_multiply.cuh
... ... @@ -3,8 +3,7 @@
3 3  
4 4 #include <iostream>
5 5 #include <cuda.h>
6   -#include <stim/cuda/devices.h>
7   -#include <stim/cuda/error.h>
  6 +#include <stim/cuda/cudatools.h>
8 7  
9 8 namespace stim{
10 9 namespace cuda{
... ...
stim/cuda/gaussian_blur.cuh deleted
1   -#ifndef STIM_CUDA_GAUSSIAN_BLUR_H
2   -#define STIM_CUDA_GAUSSIAN_BLUR_H
3   -
4   -#include <iostream>
5   -#include <cuda.h>
6   -#include <stim/cuda/cudatools.h>
7   -#include <stim/cuda/sharedmem.cuh>
8   -#include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm
9   -
10   -#define pi 3.14159
11   -
12   -namespace stim{
13   - namespace cuda{
14   -
15   - template<typename T>
16   - void gen_gaussian(T* out, T sigma, unsigned int width){
17   -
18   - //fill the kernel with a gaussian
19   - for(unsigned int xi = 0; xi < width; xi++){
20   -
21   - float x = (float)xi - (float)(width/2); //calculate the x position of the gaussian
22   - float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) );
23   - out[xi] = g;
24   - }
25   -
26   - }
27   -
28   - template<typename T>
29   - void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){
30   -
31   - //allocate space for the kernel
32   - unsigned int kwidth = sigma * 8 + 1;
33   - float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
34   -
35   - //fill the kernel with a gaussian
36   - gen_gaussian(kernel0, sigma, kwidth);
37   -
38   - //copy the kernel to the GPU
39   - T* gpuKernel0;
40   - HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
41   -
42   - //perform the gaussian blur as a separable convolution
43   - stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
44   -
45   - HANDLE_ERROR(cudaFree(gpuKernel0));
46   -
47   - }
48   -
49   - template<typename T>
50   - void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
51   -
52   - //allocate space for the kernel
53   - unsigned int kwidth = sigma * 8 + 1;
54   - float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
55   -
56   - //fill the kernel with a gaussian
57   - gen_gaussian(kernel0, sigma, kwidth);
58   -
59   - //copy the kernel to the GPU
60   - T* gpuKernel0;
61   - HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
62   -
63   - //perform the gaussian blur as a separable convolution
64   - stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);
65   -
66   - HANDLE_ERROR(cudaFree(gpuKernel0));
67   -
68   - }
69   -
70   - /// Applies a Gaussian blur to a 2D image stored on the CPU
71   - template<typename T>
72   - void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
73   -
74   - //allocate space for the kernel
75   - unsigned int kwidth = sigma * 8 + 1;
76   - float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
77   -
78   - //fill the kernel with a gaussian
79   - gen_gaussian(kernel0, sigma, kwidth);
80   -
81   - //perform the gaussian blur as a separable convolution
82   - stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
83   -
84   - }
85   -
86   - };
87   -};
88   -
89   -#endif
90 0 \ No newline at end of file
stim/cuda/ivote.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_IVOTE_H
  2 +#define STIM_CUDA_IVOTE_H
  3 +
  4 +#include <stim/cuda/ivote/down_sample.cuh>
  5 +#include <stim/cuda/ivote/local_max.cuh>
  6 +#include <stim/cuda/ivote/update_dir.cuh>
  7 +#include <stim/cuda/ivote/vote.cuh>
  8 +
  9 +namespace stim{
  10 + namespace cuda{
  11 +
  12 + }
  13 +}
  14 +
  15 +
  16 +
  17 +#endif
0 18 \ No newline at end of file
... ...
stim/cuda/down_sample.cuh renamed to stim/cuda/ivote/down_sample.cuh
... ... @@ -3,9 +3,8 @@
3 3  
4 4 #include <iostream>
5 5 #include <cuda.h>
6   -#include <stim/cuda/devices.h>
7   -#include <stim/cuda/timer.h>
8   -#include <stim/cuda/gaussian_blur.cuh>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/templates/gaussian_blur.cuh>
9 8  
10 9 namespace stim{
11 10 namespace cuda{
... ... @@ -51,7 +50,7 @@ namespace stim{
51 50 dim3 threads(max_threads, 1);
52 51 dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds);
53 52  
54   - stim::cuda::gpu_gaussian_blur_2d<float>(gpuI0, sigma_ds,x ,y);
  53 + stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
55 54  
56 55 //resample the image
57 56 down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
... ...
stim/cuda/local_max.cuh renamed to stim/cuda/ivote/local_max.cuh
... ... @@ -4,8 +4,7 @@
4 4  
5 5 # include <iostream>
6 6 # include <cuda.h>
7   -# include <stim/cuda/devices.h>
8   -# include <stim/cuda/error.h>
  7 +#include <stim/cuda/cudatools.h>
9 8  
10 9 namespace stim{
11 10 namespace cuda{
... ...
stim/cuda/update_dir.cuh renamed to stim/cuda/ivote/update_dir.cuh
... ... @@ -4,8 +4,7 @@
4 4  
5 5 # include <iostream>
6 6 # include <cuda.h>
7   -# include <stim/cuda/devices.h>
8   -# include <stim/cuda/error.h>
  7 +#include <stim/cuda/cudatools.h>
9 8 #include <stim/cuda/sharedmem.cuh>
10 9  
11 10 namespace stim{
... ...
stim/cuda/vote.cuh renamed to stim/cuda/ivote/vote.cuh
... ... @@ -4,8 +4,7 @@
4 4  
5 5 # include <iostream>
6 6 # include <cuda.h>
7   -# include <stim/cuda/devices.h>
8   -# include <stim/cuda/error.h>
  7 +#include <stim/cuda/cudatools.h>
9 8 #include <stim/cuda/sharedmem.cuh>
10 9  
11 10  
... ...
stim/cuda/templates/conv2.cuh
... ... @@ -3,8 +3,7 @@
3 3  
4 4 #include <iostream>
5 5 #include <cuda.h>
6   -#include <stim/cuda/devices.h>
7   -#include <stim/cuda/error.h>
  6 +#include <stim/cuda/cudatools.h>
8 7 #include <cmath>
9 8 #include <algorithm>
10 9  
... ...
stim/cuda/templates/gaussian_blur.cuh
... ... @@ -58,6 +58,7 @@ namespace stim{
58 58  
59 59 //copy the kernel to the GPU
60 60 T* gpuKernel0;
  61 + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
61 62 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
62 63  
63 64 //perform the gaussian blur as a separable convolution
... ...
stim/cuda/templates/gradient.cuh
... ... @@ -3,8 +3,7 @@
3 3  
4 4 #include <iostream>
5 5 #include <cuda.h>
6   -#include <stim/cuda/devices.h>
7   -#include <stim/cuda/error.h>
  6 +#include <stim/cuda/cudatools.h>
8 7  
9 8 namespace stim{
10 9 namespace cuda{
... ...
stim/image/image.h
... ... @@ -174,7 +174,7 @@ public:
174 174  
175 175  
176 176 /// Returns the maximum pixel value in the image
177   - T max(){
  177 + T maxv(){
178 178 float max = 0;
179 179 unsigned long N = width() * height(); //get the number of pixels
180 180  
... ... @@ -190,7 +190,7 @@ public:
190 190 }
191 191  
192 192 /// Returns the minimum pixel value in the image
193   - T min(){
  193 + T minv(){
194 194 float min = 0;
195 195 unsigned long N = width() * height(); //get the number of pixels
196 196  
... ...
stim/math/vector.h
... ... @@ -5,7 +5,7 @@
5 5 #include <cmath>
6 6 #include <sstream>
7 7 #include <vector>
8   -#include "../cuda/callable.h"
  8 +#include "../cuda/cudatools/callable.h"
9 9  
10 10 namespace stim
11 11 {
... ...