Commit 817b3aba8b7f6ea8cb97ff62d43c41cba740f74d
1 parent
b7057451
changed filter files to CUDA support only
Since the CUDA compiler works on systems that don't support CUDA, there is no reason not to require the libraries at compile-time.
Showing
6 changed files
with
26 additions
and
14 deletions
Show diff stats
stim/math/filters/conv2.h renamed to stim/math/filters/conv2.cuh
@@ -2,13 +2,10 @@ | @@ -2,13 +2,10 @@ | ||
2 | #define STIM_CUDA_CONV2_H | 2 | #define STIM_CUDA_CONV2_H |
3 | //#define __CUDACC__ | 3 | //#define __CUDACC__ |
4 | 4 | ||
5 | -#ifdef __CUDACC__ | ||
6 | #include <stim/cuda/cudatools.h> | 5 | #include <stim/cuda/cudatools.h> |
7 | #include <stim/cuda/sharedmem.cuh> | 6 | #include <stim/cuda/sharedmem.cuh> |
8 | -#endif | ||
9 | 7 | ||
10 | namespace stim { | 8 | namespace stim { |
11 | -#ifdef __CUDACC__ | ||
12 | //Kernel function that performs the 2D convolution. | 9 | //Kernel function that performs the 2D convolution. |
13 | template<typename T, typename K> | 10 | template<typename T, typename K> |
14 | __global__ void kernel_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { | 11 | __global__ void kernel_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { |
@@ -68,7 +65,7 @@ namespace stim { | @@ -68,7 +65,7 @@ namespace stim { | ||
68 | } | 65 | } |
69 | kernel_conv2 <<<nb, nt, sm>>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel | 66 | kernel_conv2 <<<nb, nt, sm>>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel |
70 | } | 67 | } |
71 | -#endif | 68 | +//#endif |
72 | //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned. | 69 | //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned. |
73 | // As a result, the output image will be smaller than the input image by (kx-1, ky-1) | 70 | // As a result, the output image will be smaller than the input image by (kx-1, ky-1) |
74 | //@param out is a pointer to the output image | 71 | //@param out is a pointer to the output image |
@@ -82,7 +79,6 @@ namespace stim { | @@ -82,7 +79,6 @@ namespace stim { | ||
82 | size_t X = sx - kx + 1; //x size of the output image | 79 | size_t X = sx - kx + 1; //x size of the output image |
83 | size_t Y = sy - ky + 1; //y size of the output image | 80 | size_t Y = sy - ky + 1; //y size of the output image |
84 | 81 | ||
85 | -#ifdef __CUDACC__ | ||
86 | //allocate memory and copy everything to the GPU | 82 | //allocate memory and copy everything to the GPU |
87 | T* gpu_in; | 83 | T* gpu_in; |
88 | HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T))); | 84 | HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T))); |
@@ -97,7 +93,7 @@ namespace stim { | @@ -97,7 +93,7 @@ namespace stim { | ||
97 | HANDLE_ERROR(cudaFree(gpu_in)); | 93 | HANDLE_ERROR(cudaFree(gpu_in)); |
98 | HANDLE_ERROR(cudaFree(gpu_kernel)); | 94 | HANDLE_ERROR(cudaFree(gpu_kernel)); |
99 | HANDLE_ERROR(cudaFree(gpu_out)); | 95 | HANDLE_ERROR(cudaFree(gpu_out)); |
100 | -#else | 96 | +/* CPU CODE |
101 | K v; //register stores the integral of the current pixel value | 97 | K v; //register stores the integral of the current pixel value |
102 | size_t yi, xi, kyi, kxi, yi_kyi_sx; | 98 | size_t yi, xi, kyi, kxi, yi_kyi_sx; |
103 | for (yi = 0; yi < Y; yi++) { //for each pixel in the output image | 99 | for (yi = 0; yi < Y; yi++) { //for each pixel in the output image |
@@ -113,9 +109,9 @@ namespace stim { | @@ -113,9 +109,9 @@ namespace stim { | ||
113 | } | 109 | } |
114 | } | 110 | } |
115 | 111 | ||
116 | -#endif | 112 | + */ |
117 | } | 113 | } |
118 | - | 114 | + |
119 | 115 | ||
120 | } | 116 | } |
121 | 117 |
stim/math/filters/gauss2.h renamed to stim/math/filters/gauss2.cuh
@@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
2 | #define STIM_CUDA_GAUSS2_H | 2 | #define STIM_CUDA_GAUSS2_H |
3 | 3 | ||
4 | #include <stim/image/image.h> | 4 | #include <stim/image/image.h> |
5 | -#include <stim/math/filters/sepconv2.h> | 5 | +#include <stim/math/filters/sepconv2.cuh> |
6 | #include <stim/math/constants.h> | 6 | #include <stim/math/constants.h> |
7 | 7 | ||
8 | namespace stim { | 8 | namespace stim { |
stim/math/filters/gauss3.h renamed to stim/math/filters/gauss3.cuh
1 | #ifndef STIM_CUDA_GAUSS3_H | 1 | #ifndef STIM_CUDA_GAUSS3_H |
2 | #define STIM_CUDA_GAUSS3_H | 2 | #define STIM_CUDA_GAUSS3_H |
3 | -#include <stim/math/filters/sepconv3.h> | ||
4 | -#include <stim/math/filters/gauss2.h> | 3 | +#include <stim/math/filters/sepconv3.cuh> |
4 | +#include <stim/math/filters/gauss2.cuh> | ||
5 | #include <stim/math/constants.h> | 5 | #include <stim/math/constants.h> |
6 | 6 | ||
7 | namespace stim | 7 | namespace stim |
1 | +#ifndef STIM_CUDA_RESAMPLE2_H | ||
2 | +#define STIM_CUDA_RESAMPLE2_H | ||
3 | + | ||
4 | +#include <stim/cuda/cudatools.h> | ||
5 | +#include <stim/cuda/sharedmem.cuh> | ||
6 | + | ||
7 | + | ||
8 | +///Downsamples a 2D image by a factor f using a box filter. Any pixels outside of the valid region | ||
9 | +/// (for example if X%f != 0) are chopped. | ||
10 | +template<typename T, typename K> | ||
11 | +void cpu_resample2(T* out, T* in, size_t f, size_t sx, size_t sy) { | ||
12 | + | ||
13 | +} | ||
14 | + | ||
15 | + | ||
16 | +#endif | ||
0 | \ No newline at end of file | 17 | \ No newline at end of file |
stim/math/filters/sepconv2.h renamed to stim/math/filters/sepconv2.cuh
1 | #ifndef STIM_CUDA_SEPCONV2_H | 1 | #ifndef STIM_CUDA_SEPCONV2_H |
2 | #define STIM_CUDA_SEPCONV2_H | 2 | #define STIM_CUDA_SEPCONV2_H |
3 | -#include <stim/math/filters/conv2.h> | 3 | +#include <stim/math/filters/conv2.cuh> |
4 | #ifdef __CUDACC__ | 4 | #ifdef __CUDACC__ |
5 | #include <stim/cuda/cudatools.h> | 5 | #include <stim/cuda/cudatools.h> |
6 | #include <stim/cuda/sharedmem.cuh> | 6 | #include <stim/cuda/sharedmem.cuh> |
stim/math/filters/sepconv3.h renamed to stim/math/filters/sepconv3.cuh
1 | #ifndef STIM_CUDA_SEPCONV3_H | 1 | #ifndef STIM_CUDA_SEPCONV3_H |
2 | #define STIM_CUDA_SEPCONV3_H | 2 | #define STIM_CUDA_SEPCONV3_H |
3 | 3 | ||
4 | -#include <stim/math/filters/conv2.h> | ||
5 | -#include <stim/math/filters/sepconv2.h> | 4 | +#include <stim/math/filters/conv2.cuh> |
5 | +#include <stim/math/filters/sepconv2.cuh> | ||
6 | #ifdef __CUDACC__ | 6 | #ifdef __CUDACC__ |
7 | #include <stim/cuda/cudatools.h> | 7 | #include <stim/cuda/cudatools.h> |
8 | #include <stim/cuda/sharedmem.cuh> | 8 | #include <stim/cuda/sharedmem.cuh> |