Commit 817b3aba8b7f6ea8cb97ff62d43c41cba740f74d

Authored by David Mayerich
1 parent b7057451

changed filter files to CUDA support only

Since the CUDA compiler works on systems that don't support CUDA, there is no reason not to require the libraries at compile-time.
stim/math/filters/conv2.h renamed to stim/math/filters/conv2.cuh
... ... @@ -2,13 +2,10 @@
2 2 #define STIM_CUDA_CONV2_H
3 3 //#define __CUDACC__
4 4  
5   -#ifdef __CUDACC__
6 5 #include <stim/cuda/cudatools.h>
7 6 #include <stim/cuda/sharedmem.cuh>
8   -#endif
9 7  
10 8 namespace stim {
11   -#ifdef __CUDACC__
12 9 //Kernel function that performs the 2D convolution.
13 10 template<typename T, typename K>
14 11 __global__ void kernel_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
... ... @@ -68,7 +65,7 @@ namespace stim {
68 65 }
69 66 kernel_conv2 <<<nb, nt, sm>>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel
70 67 }
71   -#endif
  68 +//#endif
72 69 //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned.
73 70 // As a result, the output image will be smaller than the input image by (kx-1, ky-1)
74 71 //@param out is a pointer to the output image
... ... @@ -82,7 +79,6 @@ namespace stim {
82 79 size_t X = sx - kx + 1; //x size of the output image
83 80 size_t Y = sy - ky + 1; //y size of the output image
84 81  
85   -#ifdef __CUDACC__
86 82 //allocate memory and copy everything to the GPU
87 83 T* gpu_in;
88 84 HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T)));
... ... @@ -97,7 +93,7 @@ namespace stim {
97 93 HANDLE_ERROR(cudaFree(gpu_in));
98 94 HANDLE_ERROR(cudaFree(gpu_kernel));
99 95 HANDLE_ERROR(cudaFree(gpu_out));
100   -#else
  96 +/* CPU CODE
101 97 K v; //register stores the integral of the current pixel value
102 98 size_t yi, xi, kyi, kxi, yi_kyi_sx;
103 99 for (yi = 0; yi < Y; yi++) { //for each pixel in the output image
... ... @@ -113,9 +109,9 @@ namespace stim {
113 109 }
114 110 }
115 111  
116   -#endif
  112 + */
117 113 }
118   -
  114 +
119 115  
120 116 }
121 117  
... ...
stim/math/filters/gauss2.h renamed to stim/math/filters/gauss2.cuh
... ... @@ -2,7 +2,7 @@
2 2 #define STIM_CUDA_GAUSS2_H
3 3  
4 4 #include <stim/image/image.h>
5   -#include <stim/math/filters/sepconv2.h>
  5 +#include <stim/math/filters/sepconv2.cuh>
6 6 #include <stim/math/constants.h>
7 7  
8 8 namespace stim {
... ...
stim/math/filters/gauss3.h renamed to stim/math/filters/gauss3.cuh
1 1 #ifndef STIM_CUDA_GAUSS3_H
2 2 #define STIM_CUDA_GAUSS3_H
3   -#include <stim/math/filters/sepconv3.h>
4   -#include <stim/math/filters/gauss2.h>
  3 +#include <stim/math/filters/sepconv3.cuh>
  4 +#include <stim/math/filters/gauss2.cuh>
5 5 #include <stim/math/constants.h>
6 6  
7 7 namespace stim
... ...
stim/math/filters/resample2.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_RESAMPLE2_H
  2 +#define STIM_CUDA_RESAMPLE2_H
  3 +
  4 +#include <stim/cuda/cudatools.h>
  5 +#include <stim/cuda/sharedmem.cuh>
  6 +
  7 +
  8 +///Downsamples a 2D image by a factor f using a box filter. Any pixels outside of the valid region
  9 +/// (for example if X%f != 0) are chopped.
  10 +template<typename T, typename K>
  11 +void cpu_resample2(T* out, T* in, size_t f, size_t sx, size_t sy) {
  12 +
  13 +}
  14 +
  15 +
  16 +#endif
0 17 \ No newline at end of file
... ...
stim/math/filters/sepconv2.h renamed to stim/math/filters/sepconv2.cuh
1 1 #ifndef STIM_CUDA_SEPCONV2_H
2 2 #define STIM_CUDA_SEPCONV2_H
3   -#include <stim/math/filters/conv2.h>
  3 +#include <stim/math/filters/conv2.cuh>
4 4 #ifdef __CUDACC__
5 5 #include <stim/cuda/cudatools.h>
6 6 #include <stim/cuda/sharedmem.cuh>
... ...
stim/math/filters/sepconv3.h renamed to stim/math/filters/sepconv3.cuh
1 1 #ifndef STIM_CUDA_SEPCONV3_H
2 2 #define STIM_CUDA_SEPCONV3_H
3 3  
4   -#include <stim/math/filters/conv2.h>
5   -#include <stim/math/filters/sepconv2.h>
  4 +#include <stim/math/filters/conv2.cuh>
  5 +#include <stim/math/filters/sepconv2.cuh>
6 6 #ifdef __CUDACC__
7 7 #include <stim/cuda/cudatools.h>
8 8 #include <stim/cuda/sharedmem.cuh>
... ...