Commit 96f9b10f641f32443600d9789e6d244a285b564f
1 parent
5cc0976c
change the header files to be compatible with the new organized stim/cuda direct…
…ory , and add the stim/cude/ivote subdirectory that includes the ivote related functions
Showing
13 changed files
with
31 additions
and
110 deletions
Show diff stats
stim/cuda/arraymath/array_add.cuh
stim/cuda/arraymath/array_multiply.cuh
stim/cuda/gaussian_blur.cuh deleted
1 | -#ifndef STIM_CUDA_GAUSSIAN_BLUR_H | ||
2 | -#define STIM_CUDA_GAUSSIAN_BLUR_H | ||
3 | - | ||
4 | -#include <iostream> | ||
5 | -#include <cuda.h> | ||
6 | -#include <stim/cuda/cudatools.h> | ||
7 | -#include <stim/cuda/sharedmem.cuh> | ||
8 | -#include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm | ||
9 | - | ||
10 | -#define pi 3.14159 | ||
11 | - | ||
12 | -namespace stim{ | ||
13 | - namespace cuda{ | ||
14 | - | ||
15 | - template<typename T> | ||
16 | - void gen_gaussian(T* out, T sigma, unsigned int width){ | ||
17 | - | ||
18 | - //fill the kernel with a gaussian | ||
19 | - for(unsigned int xi = 0; xi < width; xi++){ | ||
20 | - | ||
21 | - float x = (float)xi - (float)(width/2); //calculate the x position of the gaussian | ||
22 | - float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) ); | ||
23 | - out[xi] = g; | ||
24 | - } | ||
25 | - | ||
26 | - } | ||
27 | - | ||
28 | - template<typename T> | ||
29 | - void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){ | ||
30 | - | ||
31 | - //allocate space for the kernel | ||
32 | - unsigned int kwidth = sigma * 8 + 1; | ||
33 | - float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); | ||
34 | - | ||
35 | - //fill the kernel with a gaussian | ||
36 | - gen_gaussian(kernel0, sigma, kwidth); | ||
37 | - | ||
38 | - //copy the kernel to the GPU | ||
39 | - T* gpuKernel0; | ||
40 | - HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); | ||
41 | - | ||
42 | - //perform the gaussian blur as a separable convolution | ||
43 | - stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth); | ||
44 | - | ||
45 | - HANDLE_ERROR(cudaFree(gpuKernel0)); | ||
46 | - | ||
47 | - } | ||
48 | - | ||
49 | - template<typename T> | ||
50 | - void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){ | ||
51 | - | ||
52 | - //allocate space for the kernel | ||
53 | - unsigned int kwidth = sigma * 8 + 1; | ||
54 | - float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); | ||
55 | - | ||
56 | - //fill the kernel with a gaussian | ||
57 | - gen_gaussian(kernel0, sigma, kwidth); | ||
58 | - | ||
59 | - //copy the kernel to the GPU | ||
60 | - T* gpuKernel0; | ||
61 | - HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); | ||
62 | - | ||
63 | - //perform the gaussian blur as a separable convolution | ||
64 | - stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth); | ||
65 | - | ||
66 | - HANDLE_ERROR(cudaFree(gpuKernel0)); | ||
67 | - | ||
68 | - } | ||
69 | - | ||
70 | - /// Applies a Gaussian blur to a 2D image stored on the CPU | ||
71 | - template<typename T> | ||
72 | - void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){ | ||
73 | - | ||
74 | - //allocate space for the kernel | ||
75 | - unsigned int kwidth = sigma * 8 + 1; | ||
76 | - float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); | ||
77 | - | ||
78 | - //fill the kernel with a gaussian | ||
79 | - gen_gaussian(kernel0, sigma, kwidth); | ||
80 | - | ||
81 | - //perform the gaussian blur as a separable convolution | ||
82 | - stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth); | ||
83 | - | ||
84 | - } | ||
85 | - | ||
86 | - }; | ||
87 | -}; | ||
88 | - | ||
89 | -#endif | ||
90 | \ No newline at end of file | 0 | \ No newline at end of file |
1 | +#ifndef STIM_CUDA_IVOTE_H | ||
2 | +#define STIM_CUDA_IVOTE_H | ||
3 | + | ||
4 | +#include <stim/cuda/ivote/down_sample.cuh> | ||
5 | +#include <stim/cuda/ivote/local_max.cuh> | ||
6 | +#include <stim/cuda/ivote/update_dir.cuh> | ||
7 | +#include <stim/cuda/ivote/vote.cuh> | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + namespace cuda{ | ||
11 | + | ||
12 | + } | ||
13 | +} | ||
14 | + | ||
15 | + | ||
16 | + | ||
17 | +#endif | ||
0 | \ No newline at end of file | 18 | \ No newline at end of file |
stim/cuda/down_sample.cuh renamed to stim/cuda/ivote/down_sample.cuh
@@ -3,9 +3,8 @@ | @@ -3,9 +3,8 @@ | ||
3 | 3 | ||
4 | #include <iostream> | 4 | #include <iostream> |
5 | #include <cuda.h> | 5 | #include <cuda.h> |
6 | -#include <stim/cuda/devices.h> | ||
7 | -#include <stim/cuda/timer.h> | ||
8 | -#include <stim/cuda/gaussian_blur.cuh> | 6 | +#include <stim/cuda/cudatools.h> |
7 | +#include <stim/cuda/templates/gaussian_blur.cuh> | ||
9 | 8 | ||
10 | namespace stim{ | 9 | namespace stim{ |
11 | namespace cuda{ | 10 | namespace cuda{ |
@@ -51,7 +50,7 @@ namespace stim{ | @@ -51,7 +50,7 @@ namespace stim{ | ||
51 | dim3 threads(max_threads, 1); | 50 | dim3 threads(max_threads, 1); |
52 | dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds); | 51 | dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds); |
53 | 52 | ||
54 | - stim::cuda::gpu_gaussian_blur_2d<float>(gpuI0, sigma_ds,x ,y); | 53 | + stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y); |
55 | 54 | ||
56 | //resample the image | 55 | //resample the image |
57 | down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y); | 56 | down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y); |
stim/cuda/local_max.cuh renamed to stim/cuda/ivote/local_max.cuh
@@ -4,8 +4,7 @@ | @@ -4,8 +4,7 @@ | ||
4 | 4 | ||
5 | # include <iostream> | 5 | # include <iostream> |
6 | # include <cuda.h> | 6 | # include <cuda.h> |
7 | -# include <stim/cuda/devices.h> | ||
8 | -# include <stim/cuda/error.h> | 7 | +#include <stim/cuda/cudatools.h> |
9 | 8 | ||
10 | namespace stim{ | 9 | namespace stim{ |
11 | namespace cuda{ | 10 | namespace cuda{ |
stim/cuda/update_dir.cuh renamed to stim/cuda/ivote/update_dir.cuh
@@ -4,8 +4,7 @@ | @@ -4,8 +4,7 @@ | ||
4 | 4 | ||
5 | # include <iostream> | 5 | # include <iostream> |
6 | # include <cuda.h> | 6 | # include <cuda.h> |
7 | -# include <stim/cuda/devices.h> | ||
8 | -# include <stim/cuda/error.h> | 7 | +#include <stim/cuda/cudatools.h> |
9 | #include <stim/cuda/sharedmem.cuh> | 8 | #include <stim/cuda/sharedmem.cuh> |
10 | 9 | ||
11 | namespace stim{ | 10 | namespace stim{ |
stim/cuda/vote.cuh renamed to stim/cuda/ivote/vote.cuh
@@ -4,8 +4,7 @@ | @@ -4,8 +4,7 @@ | ||
4 | 4 | ||
5 | # include <iostream> | 5 | # include <iostream> |
6 | # include <cuda.h> | 6 | # include <cuda.h> |
7 | -# include <stim/cuda/devices.h> | ||
8 | -# include <stim/cuda/error.h> | 7 | +#include <stim/cuda/cudatools.h> |
9 | #include <stim/cuda/sharedmem.cuh> | 8 | #include <stim/cuda/sharedmem.cuh> |
10 | 9 | ||
11 | 10 |
stim/cuda/templates/conv2.cuh
@@ -3,8 +3,7 @@ | @@ -3,8 +3,7 @@ | ||
3 | 3 | ||
4 | #include <iostream> | 4 | #include <iostream> |
5 | #include <cuda.h> | 5 | #include <cuda.h> |
6 | -#include <stim/cuda/devices.h> | ||
7 | -#include <stim/cuda/error.h> | 6 | +#include <stim/cuda/cudatools.h> |
8 | #include <cmath> | 7 | #include <cmath> |
9 | #include <algorithm> | 8 | #include <algorithm> |
10 | 9 |
stim/cuda/templates/gaussian_blur.cuh
@@ -58,6 +58,7 @@ namespace stim{ | @@ -58,6 +58,7 @@ namespace stim{ | ||
58 | 58 | ||
59 | //copy the kernel to the GPU | 59 | //copy the kernel to the GPU |
60 | T* gpuKernel0; | 60 | T* gpuKernel0; |
61 | + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T))); | ||
61 | HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); | 62 | HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); |
62 | 63 | ||
63 | //perform the gaussian blur as a separable convolution | 64 | //perform the gaussian blur as a separable convolution |
stim/cuda/templates/gradient.cuh
stim/image/image.h
@@ -174,7 +174,7 @@ public: | @@ -174,7 +174,7 @@ public: | ||
174 | 174 | ||
175 | 175 | ||
176 | /// Returns the maximum pixel value in the image | 176 | /// Returns the maximum pixel value in the image |
177 | - T max(){ | 177 | + T maxv(){ |
178 | float max = 0; | 178 | float max = 0; |
179 | unsigned long N = width() * height(); //get the number of pixels | 179 | unsigned long N = width() * height(); //get the number of pixels |
180 | 180 | ||
@@ -190,7 +190,7 @@ public: | @@ -190,7 +190,7 @@ public: | ||
190 | } | 190 | } |
191 | 191 | ||
192 | /// Returns the minimum pixel value in the image | 192 | /// Returns the minimum pixel value in the image |
193 | - T min(){ | 193 | + T minv(){ |
194 | float min = 0; | 194 | float min = 0; |
195 | unsigned long N = width() * height(); //get the number of pixels | 195 | unsigned long N = width() * height(); //get the number of pixels |
196 | 196 |
stim/math/vector.h