Commit 3173e54dd0c592a5d06da09f3e0884973e53edd2
Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib into bsds500
Showing
30 changed files
with
265 additions
and
145 deletions
Show diff stats
stim/cuda/arraymath.cuh
1 | 1 | #ifndef STIM_CUDA_ARRAYMATH_H |
2 | 2 | #define STIM_CUDA_ARRAYMATH_H |
3 | 3 | |
4 | -#include <stim/cuda/array_add.cuh> | |
5 | -#include <stim/cuda/array_multiply.cuh> | |
6 | -#include <stim/cuda/array_abs.cuh> | |
7 | -#include <stim/cuda/array_cart2polar.cuh> | |
8 | -#include <stim/cuda/gaussian_blur.cuh> | |
9 | -#include <stim/cuda/conv2.cuh> | |
4 | +#include <stim/cuda/arraymath/array_add.cuh> | |
5 | +#include <stim/cuda/arraymath/array_multiply.cuh> | |
6 | +#include <stim/cuda/arraymath/array_abs.cuh> | |
7 | +#include <stim/cuda/arraymath/array_cart2polar.cuh> | |
10 | 8 | |
11 | 9 | namespace stim{ |
12 | 10 | namespace cuda{ | ... | ... |
stim/cuda/array_abs.cuh renamed to stim/cuda/arraymath/array_abs.cuh
stim/cuda/array_add.cuh renamed to stim/cuda/arraymath/array_add.cuh
stim/cuda/array_cart2polar.cuh renamed to stim/cuda/arraymath/array_cart2polar.cuh
stim/cuda/array_multiply.cuh renamed to stim/cuda/arraymath/array_multiply.cuh
stim/cuda/cost.h
... | ... | @@ -3,11 +3,11 @@ |
3 | 3 | #include <cuda_runtime.h> |
4 | 4 | #include <cublas_v2.h> |
5 | 5 | #include <stdio.h> |
6 | -#include "../visualization/colormap.h" | |
6 | +#include <stim/visualization/colormap.h> | |
7 | 7 | #include <sstream> |
8 | -#include "../math/vector.h" | |
9 | -#include "../cuda/devices.h" | |
10 | -#include "../cuda/threads.h" | |
8 | +#include <stim/math/vector.h> | |
9 | +#include <stim/cuda/cudatools/devices.h> | |
10 | +#include <stim/cuda/cudatools/threads.h> | |
11 | 11 | |
12 | 12 | ///Cost function that works with the gl-spider class to find index of the item with min-cost. |
13 | 13 | typedef unsigned char uchar; | ... | ... |
stim/cuda/callable.h renamed to stim/cuda/cudatools/callable.h
stim/cuda/devices.h renamed to stim/cuda/cudatools/devices.h
stim/cuda/error.h renamed to stim/cuda/cudatools/error.h
stim/cuda/glbind.h renamed to stim/cuda/cudatools/glbind.h
stim/cuda/threads.h renamed to stim/cuda/cudatools/threads.h
stim/cuda/timer.h renamed to stim/cuda/cudatools/timer.h
1 | -#ifndef STIM_CUDA_TIMER | |
2 | -#define STIM_CUDA_TIMER | |
3 | - | |
4 | -static cudaEvent_t tStartEvent; | |
5 | -static cudaEvent_t tStopEvent; | |
6 | - | |
7 | -namespace stim{ | |
8 | - | |
9 | -/// These functions calculate the time between GPU functions in milliseconds | |
10 | -static void gpuStartTimer() | |
11 | -{ | |
12 | - //set up timing events | |
13 | - cudaEventCreate(&tStartEvent); | |
14 | - cudaEventCreate(&tStopEvent); | |
15 | - cudaEventRecord(tStartEvent, 0); | |
16 | -} | |
17 | - | |
18 | -static float gpuStopTimer() | |
19 | -{ | |
20 | - cudaEventRecord(tStopEvent, 0); | |
21 | - cudaEventSynchronize(tStopEvent); | |
22 | - float elapsedTime; | |
23 | - cudaEventElapsedTime(&elapsedTime, tStartEvent, tStopEvent); | |
24 | - cudaEventDestroy(tStartEvent); | |
25 | - cudaEventDestroy(tStopEvent); | |
26 | - return elapsedTime; | |
27 | -} | |
28 | - | |
29 | -} //end namespace stim | |
30 | - | |
1 | +#ifndef STIM_CUDA_TIMER | |
2 | +#define STIM_CUDA_TIMER | |
3 | + | |
4 | +static cudaEvent_t tStartEvent; | |
5 | +static cudaEvent_t tStopEvent; | |
6 | + | |
7 | +namespace stim{ | |
8 | + | |
9 | +/// These functions calculate the time between GPU functions in milliseconds | |
10 | +static void gpuStartTimer() | |
11 | +{ | |
12 | + //set up timing events | |
13 | + cudaEventCreate(&tStartEvent); | |
14 | + cudaEventCreate(&tStopEvent); | |
15 | + cudaEventRecord(tStartEvent, 0); | |
16 | +} | |
17 | + | |
18 | +static float gpuStopTimer() | |
19 | +{ | |
20 | + cudaEventRecord(tStopEvent, 0); | |
21 | + cudaEventSynchronize(tStopEvent); | |
22 | + float elapsedTime; | |
23 | + cudaEventElapsedTime(&elapsedTime, tStartEvent, tStopEvent); | |
24 | + cudaEventDestroy(tStartEvent); | |
25 | + cudaEventDestroy(tStopEvent); | |
26 | + return elapsedTime; | |
27 | +} | |
28 | + | |
29 | +} //end namespace stim | |
30 | + | |
31 | 31 | #endif |
32 | 32 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_CUDA_IVOTE_H | |
2 | +#define STIM_CUDA_IVOTE_H | |
3 | + | |
4 | +#include <stim/cuda/ivote/down_sample.cuh> | |
5 | +#include <stim/cuda/ivote/local_max.cuh> | |
6 | +#include <stim/cuda/ivote/update_dir.cuh> | |
7 | +#include <stim/cuda/ivote/vote.cuh> | |
8 | + | |
9 | +namespace stim{ | |
10 | + namespace cuda{ | |
11 | + | |
12 | + } | |
13 | +} | |
14 | + | |
15 | + | |
16 | + | |
17 | +#endif | |
0 | 18 | \ No newline at end of file | ... | ... |
stim/cuda/down_sample.cuh renamed to stim/cuda/ivote/down_sample.cuh
... | ... | @@ -3,9 +3,8 @@ |
3 | 3 | |
4 | 4 | #include <iostream> |
5 | 5 | #include <cuda.h> |
6 | -#include <stim/cuda/devices.h> | |
7 | -#include <stim/cuda/timer.h> | |
8 | -#include <stim/cuda/gaussian_blur.cuh> | |
6 | +#include <stim/cuda/cudatools.h> | |
7 | +#include <stim/cuda/templates/gaussian_blur.cuh> | |
9 | 8 | |
10 | 9 | namespace stim{ |
11 | 10 | namespace cuda{ |
... | ... | @@ -51,7 +50,7 @@ namespace stim{ |
51 | 50 | dim3 threads(max_threads, 1); |
52 | 51 | dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds); |
53 | 52 | |
54 | - stim::cuda::gpu_gaussian_blur_2d<float>(gpuI0, sigma_ds,x ,y); | |
53 | + stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y); | |
55 | 54 | |
56 | 55 | //resample the image |
57 | 56 | down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y); |
... | ... | @@ -89,7 +88,7 @@ namespace stim{ |
89 | 88 | gpu_down_sample<T>(gpuI, gpuI0, resize, x, y); |
90 | 89 | |
91 | 90 | //copy the image data to the GPU |
92 | - cudaMemcpy(re_image, gpuI, bytes_ds, cudaMemcpyHostToDevice); | |
91 | + cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice); | |
93 | 92 | |
94 | 93 | cudaFree(gpuI0); |
95 | 94 | cudeFree(gpuI); | ... | ... |
stim/cuda/local_max.cuh renamed to stim/cuda/ivote/local_max.cuh
stim/cuda/update_dir.cuh renamed to stim/cuda/ivote/update_dir.cuh
stim/cuda/vote.cuh renamed to stim/cuda/ivote/vote.cuh
stim/cuda/conv2.cuh renamed to stim/cuda/templates/conv2.cuh
stim/cuda/gaussian_blur.cuh renamed to stim/cuda/templates/conv2sep.cuh
1 | -#ifndef STIM_CUDA_GAUSSIAN_BLUR_H | |
2 | -#define STIM_CUDA_GAUSSIAN_BLUR_H | |
1 | +#ifndef STIM_CUDA_CONV2SEP_H | |
2 | +#define STIM_CUDA_CONV2SEP_H | |
3 | 3 | |
4 | 4 | #include <iostream> |
5 | 5 | #include <cuda.h> |
6 | -#include <stim/cuda/devices.h> | |
7 | -#include <stim/cuda/timer.h> | |
6 | +#include <stim/cuda/cudatools/devices.h> | |
7 | +#include <stim/cuda/cudatools/timer.h> | |
8 | 8 | #include <stim/cuda/sharedmem.cuh> |
9 | +#include <stim/cuda/cudatools/error.h> | |
9 | 10 | |
10 | 11 | #define pi 3.14159 |
11 | 12 | |
... | ... | @@ -13,12 +14,13 @@ namespace stim{ |
13 | 14 | namespace cuda{ |
14 | 15 | |
15 | 16 | template<typename T> |
16 | - __global__ void gaussian_blur_x(T* out, cudaTextureObject_t in, T sigma, unsigned int x, unsigned int y){ | |
17 | + __global__ void conv2sep_0(T* out, cudaTextureObject_t in, unsigned int x, unsigned int y, | |
18 | + T* kernel0, unsigned int k0){ | |
17 | 19 | |
18 | 20 | //generate a pointer to shared memory (size will be specified as a kernel parameter) |
19 | 21 | extern __shared__ T s[]; |
20 | 22 | |
21 | - int kr = sigma * 4; //calculate the kernel radius | |
23 | + int kr = k0/2; //calculate the kernel radius | |
22 | 24 | |
23 | 25 | //get a pointer to the gaussian in memory |
24 | 26 | T* g = (T*)&s[blockDim.x + 2 * kr]; |
... | ... | @@ -30,30 +32,25 @@ namespace stim{ |
30 | 32 | //copy the portion of the image necessary for this block to shared memory |
31 | 33 | stim::cuda::sharedMemcpy_tex2D(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim); |
32 | 34 | |
33 | - //calculate the thread index and block index | |
35 | + //calculate the thread index | |
34 | 36 | int ti = threadIdx.x; |
35 | 37 | |
36 | 38 | //calculate the spatial coordinate for this thread |
37 | 39 | int xi = bxi + ti; |
40 | + int yi = byi; | |
38 | 41 | |
39 | - //pre-compute the gaussian values for each kernel point | |
40 | - T a = 1.0 / (sigma * sqrt(2 * pi)); | |
41 | - T c = - 1.0 / (2*sigma*sigma); | |
42 | - int ki; | |
43 | - | |
44 | - //use the first 2kr+1 threads to evaluate a gaussian and store the result | |
45 | - if(ti <= 2* kr+1){ | |
46 | - ki = ti - kr; | |
47 | - g[ti] = a * exp((ki*ki) * c); | |
42 | + | |
43 | + //use the first 2kr+1 threads to transfer the kernel to shared memory | |
44 | + if(ti < k0){ | |
45 | + g[ti] = kernel0[ti]; | |
48 | 46 | } |
49 | 47 | |
50 | 48 | //make sure that all writing to shared memory is done before continuing |
51 | 49 | __syncthreads(); |
52 | 50 | |
53 | 51 | //if the current pixel is outside of the image |
54 | - if(bxi + ti > x || byi > y) | |
52 | + if(xi > x || yi > y) | |
55 | 53 | return; |
56 | - | |
57 | 54 | |
58 | 55 | |
59 | 56 | //calculate the coordinates of the current thread in shared memory |
... | ... | @@ -66,7 +63,7 @@ namespace stim{ |
66 | 63 | for(int ki = -kr; ki <= kr; ki++){ |
67 | 64 | sum += g[ki + kr] * s[si + ki]; |
68 | 65 | } |
69 | - | |
66 | + | |
70 | 67 | //calculate the 1D image index for this thread |
71 | 68 | unsigned int i = byi * x + xi; |
72 | 69 | |
... | ... | @@ -75,12 +72,13 @@ namespace stim{ |
75 | 72 | } |
76 | 73 | |
77 | 74 | template<typename T> |
78 | - __global__ void gaussian_blur_y(T* out, cudaTextureObject_t in, T sigma, unsigned int x, unsigned int y){ | |
75 | + __global__ void conv2sep_1(T* out, cudaTextureObject_t in, unsigned int x, unsigned int y, | |
76 | + T* kernel0, unsigned int k0){ | |
79 | 77 | |
80 | 78 | //generate a pointer to shared memory (size will be specified as a kernel parameter) |
81 | 79 | extern __shared__ T s[]; |
82 | 80 | |
83 | - int kr = sigma * 4; //calculate the kernel radius | |
81 | + int kr = k0/2; //calculate the kernel radius | |
84 | 82 | |
85 | 83 | //get a pointer to the gaussian in memory |
86 | 84 | T* g = (T*)&s[blockDim.y + 2 * kr]; |
... | ... | @@ -92,30 +90,25 @@ namespace stim{ |
92 | 90 | //copy the portion of the image necessary for this block to shared memory |
93 | 91 | stim::cuda::sharedMemcpy_tex2D(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim); |
94 | 92 | |
95 | - //calculate the thread index and block index | |
93 | + //calculate the thread index | |
96 | 94 | int ti = threadIdx.y; |
97 | 95 | |
98 | 96 | //calculate the spatial coordinate for this thread |
97 | + int xi = bxi; | |
99 | 98 | int yi = byi + ti; |
100 | 99 | |
101 | - //pre-compute the gaussian values for each kernel point | |
102 | - T a = 1.0 / (sigma * sqrt(2 * pi)); | |
103 | - T c = - 1.0 / (2*sigma*sigma); | |
104 | - int ki; | |
105 | - | |
106 | - //use the first 2kr+1 threads to evaluate a gaussian and store the result | |
107 | - if(ti <= 2* kr+1){ | |
108 | - ki = ti - kr; | |
109 | - g[ti] = a * exp((ki*ki) * c); | |
100 | + | |
101 | + //use the first 2kr+1 threads to transfer the kernel to shared memory | |
102 | + if(ti < k0){ | |
103 | + g[ti] = kernel0[ti]; | |
110 | 104 | } |
111 | 105 | |
112 | 106 | //make sure that all writing to shared memory is done before continuing |
113 | 107 | __syncthreads(); |
114 | 108 | |
115 | 109 | //if the current pixel is outside of the image |
116 | - if(bxi >= x || yi >= y) | |
110 | + if(xi > x || yi > y) | |
117 | 111 | return; |
118 | - | |
119 | 112 | |
120 | 113 | |
121 | 114 | //calculate the coordinates of the current thread in shared memory |
... | ... | @@ -128,17 +121,55 @@ namespace stim{ |
128 | 121 | for(int ki = -kr; ki <= kr; ki++){ |
129 | 122 | sum += g[ki + kr] * s[si + ki]; |
130 | 123 | } |
131 | - | |
124 | + | |
132 | 125 | //calculate the 1D image index for this thread |
133 | - unsigned int i = yi * x + bxi; | |
126 | + unsigned int i = yi * x + xi; | |
134 | 127 | |
135 | 128 | //output the result to global memory |
136 | 129 | out[i] = sum; |
137 | 130 | } |
138 | 131 | |
139 | - /// Applies a Gaussian blur to a 2D image stored on the GPU | |
140 | 132 | template<typename T> |
141 | - void gpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ | |
133 | + void tex_conv2sep(T* out, unsigned int x, unsigned int y, | |
134 | + cudaTextureObject_t texObj, cudaArray* cuArray, | |
135 | + T* kernel0, unsigned int k0, | |
136 | + T* kernel1, unsigned int k1){ | |
137 | + | |
138 | + //get the maximum number of threads per block for the CUDA device | |
139 | + int max_threads = stim::maxThreadsPerBlock(); | |
140 | + dim3 threads(max_threads, 1); | |
141 | + | |
142 | + //calculate the number of blocks | |
143 | + dim3 blocks(x / threads.x + 1, y); | |
144 | + | |
145 | + //calculate the shared memory used in the kernel | |
146 | + unsigned int pixel_bytes = max_threads * sizeof(T); //bytes devoted to pixel data being processed | |
147 | + unsigned int apron_bytes = k0/2 * sizeof(T); //bytes devoted to the apron on each side of the window | |
148 | + unsigned int gaussian_bytes = k0 * sizeof(T); //bytes devoted to memory used to store the pre-computed Gaussian window | |
149 | + unsigned int shared_bytes = pixel_bytes + 2 * apron_bytes + gaussian_bytes; //total number of bytes shared memory used | |
150 | + | |
151 | + //blur the image along the x-axis | |
152 | + conv2sep_0<T> <<< blocks, threads, shared_bytes >>>(out, texObj, x, y, kernel0, k0); | |
153 | + | |
154 | + // Copy the x-blurred data from global memory to the texture | |
155 | + cudaMemcpyToArray(cuArray, 0, 0, out, x * y * sizeof(T), | |
156 | + cudaMemcpyDeviceToDevice); | |
157 | + | |
158 | + //transpose the block and thread dimensions | |
159 | + threads.x = 1; | |
160 | + threads.y = max_threads; | |
161 | + blocks.x = x; | |
162 | + blocks.y = y / threads.y + 1; | |
163 | + | |
164 | + //blur the image along the y-axis | |
165 | + conv2sep_1<T> <<< blocks, threads, shared_bytes >>>(out, texObj, x, y, kernel1, k1); | |
166 | + | |
167 | + } | |
168 | + | |
169 | + template<typename T> | |
170 | + void gpu_conv2sep(T* image, unsigned int x, unsigned int y, | |
171 | + T* kernel0, unsigned int k0, | |
172 | + T* kernel1, unsigned int k1){ | |
142 | 173 | |
143 | 174 | //get the number of pixels in the image |
144 | 175 | unsigned int pixels = x * y; |
... | ... | @@ -176,36 +207,9 @@ namespace stim{ |
176 | 207 | cudaTextureObject_t texObj = 0; |
177 | 208 | cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL); |
178 | 209 | |
210 | + //call the texture version of the separable convolution function | |
211 | + tex_conv2sep(image, x, y, texObj, cuArray, kernel0, k0, kernel1, k1); | |
179 | 212 | |
180 | - //get the maximum number of threads per block for the CUDA device | |
181 | - int max_threads = stim::maxThreadsPerBlock(); | |
182 | - dim3 threads(max_threads, 1); | |
183 | - | |
184 | - //calculate the number of blocks | |
185 | - dim3 blocks(x / threads.x + 1, y); | |
186 | - | |
187 | - //calculate the shared memory used in the kernel | |
188 | - unsigned int pixel_bytes = max_threads * 4; //bytes devoted to pixel data being processed | |
189 | - unsigned int apron_bytes = sigma * 8 * 4; //bytes devoted to pixels outside the window | |
190 | - unsigned int gaussian_bytes = (sigma * 8 + 1) * 4; //bytes devoted to memory used to store the pre-computed Gaussian window | |
191 | - unsigned int shared_bytes = pixel_bytes + apron_bytes + gaussian_bytes; //total number of bytes shared memory used | |
192 | - | |
193 | - //blur the image along the x-axis | |
194 | - gaussian_blur_x <<< blocks, threads, shared_bytes >>>(image, texObj, sigma, x, y); | |
195 | - | |
196 | - // Copy the x-blurred data from global memory to the texture | |
197 | - cudaMemcpyToArray(cuArray, 0, 0, image, bytes, | |
198 | - cudaMemcpyDeviceToDevice); | |
199 | - | |
200 | - //transpose the block and thread dimensions | |
201 | - threads.x = 1; | |
202 | - threads.y = max_threads; | |
203 | - blocks.x = x; | |
204 | - blocks.y = y / threads.y + 1; | |
205 | - | |
206 | - //blur the image along the y-axis | |
207 | - gaussian_blur_y <<< blocks, threads, shared_bytes >>>(image, texObj, sigma, x, y); | |
208 | - | |
209 | 213 | //free allocated memory |
210 | 214 | cudaFree(cuArray); |
211 | 215 | |
... | ... | @@ -213,22 +217,35 @@ namespace stim{ |
213 | 217 | |
214 | 218 | /// Applies a Gaussian blur to a 2D image stored on the CPU |
215 | 219 | template<typename T> |
216 | - void cpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ | |
220 | + void cpu_conv2sep(T* image, unsigned int x, unsigned int y, | |
221 | + T* kernel0, unsigned int k0, | |
222 | + T* kernel1, unsigned int k1){ | |
217 | 223 | |
218 | 224 | //get the number of pixels in the image |
219 | 225 | unsigned int pixels = x * y; |
220 | 226 | unsigned int bytes = sizeof(T) * pixels; |
221 | 227 | |
222 | - //allocate space on the GPU | |
228 | + //---------Allocate Image--------- | |
229 | + //allocate space on the GPU for the image | |
223 | 230 | T* gpuI0; |
224 | - cudaMalloc(&gpuI0, bytes); | |
225 | - | |
231 | + HANDLE_ERROR(cudaMalloc(&gpuI0, bytes)); | |
226 | 232 | |
227 | 233 | //copy the image data to the GPU |
228 | - cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice); | |
234 | + HANDLE_ERROR(cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice)); | |
235 | + | |
236 | + //---------Allocate Kernel-------- | |
237 | + //allocate and copy the 0 (x) kernel | |
238 | + T* gpuK0; | |
239 | + HANDLE_ERROR(cudaMalloc(&gpuK0, k0 * sizeof(T))); | |
240 | + HANDLE_ERROR(cudaMemcpy(gpuK0, kernel0, k0 * sizeof(T), cudaMemcpyHostToDevice)); | |
241 | + | |
242 | + //allocate and copy the 1 (y) kernel | |
243 | + T* gpuK1; | |
244 | + HANDLE_ERROR(cudaMalloc(&gpuK1, k1 * sizeof(T))); | |
245 | + HANDLE_ERROR(cudaMemcpy(gpuK1, kernel1, k1 * sizeof(T), cudaMemcpyHostToDevice)); | |
229 | 246 | |
230 | 247 | //run the GPU-based version of the algorithm |
231 | - gpu_gaussian_blur_2d<T>(gpuI0, sigma, x, y); | |
248 | + gpu_conv2sep<T>(gpuI0, x, y, gpuK0, k0, gpuK1, k1); | |
232 | 249 | |
233 | 250 | //copy the image data from the device |
234 | 251 | cudaMemcpy(image, gpuI0, bytes, cudaMemcpyDeviceToHost); | ... | ... |
1 | +#ifndef STIM_CUDA_GAUSSIAN_BLUR_H | |
2 | +#define STIM_CUDA_GAUSSIAN_BLUR_H | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <cuda.h> | |
6 | +#include <stim/cuda/cudatools.h> | |
7 | +#include <stim/cuda/sharedmem.cuh> | |
8 | +#include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm | |
9 | + | |
10 | +#define pi 3.14159 | |
11 | + | |
12 | +namespace stim{ | |
13 | + namespace cuda{ | |
14 | + | |
15 | + template<typename T> | |
16 | + void gen_gaussian(T* out, T sigma, unsigned int width){ | |
17 | + | |
18 | + //fill the kernel with a gaussian | |
19 | + for(unsigned int xi = 0; xi < width; xi++){ | |
20 | + | |
21 | + float x = (float)xi - (float)(width/2); //calculate the x position of the gaussian | |
22 | + float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) ); | |
23 | + out[xi] = g; | |
24 | + } | |
25 | + | |
26 | + } | |
27 | + | |
28 | + template<typename T> | |
29 | + void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){ | |
30 | + | |
31 | + //allocate space for the kernel | |
32 | + unsigned int kwidth = sigma * 8 + 1; | |
33 | + float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); | |
34 | + | |
35 | + //fill the kernel with a gaussian | |
36 | + gen_gaussian(kernel0, sigma, kwidth); | |
37 | + | |
38 | + //copy the kernel to the GPU | |
39 | + T* gpuKernel0; | |
40 | + HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); | |
41 | + | |
42 | + //perform the gaussian blur as a separable convolution | |
43 | + stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth); | |
44 | + | |
45 | + HANDLE_ERROR(cudaFree(gpuKernel0)); | |
46 | + | |
47 | + } | |
48 | + | |
49 | + template<typename T> | |
50 | + void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){ | |
51 | + | |
52 | + //allocate space for the kernel | |
53 | + unsigned int kwidth = sigma * 8 + 1; | |
54 | + float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); | |
55 | + | |
56 | + //fill the kernel with a gaussian | |
57 | + gen_gaussian(kernel0, sigma, kwidth); | |
58 | + | |
59 | + //copy the kernel to the GPU | |
60 | + T* gpuKernel0; | |
61 | + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T))); | |
62 | + HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); | |
63 | + | |
64 | + //perform the gaussian blur as a separable convolution | |
65 | + stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth); | |
66 | + | |
67 | + HANDLE_ERROR(cudaFree(gpuKernel0)); | |
68 | + | |
69 | + } | |
70 | + | |
71 | + /// Applies a Gaussian blur to a 2D image stored on the CPU | |
72 | + template<typename T> | |
73 | + void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){ | |
74 | + | |
75 | + //allocate space for the kernel | |
76 | + unsigned int kwidth = sigma * 8 + 1; | |
77 | + float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); | |
78 | + | |
79 | + //fill the kernel with a gaussian | |
80 | + gen_gaussian(kernel0, sigma, kwidth); | |
81 | + | |
82 | + //perform the gaussian blur as a separable convolution | |
83 | + stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth); | |
84 | + | |
85 | + } | |
86 | + | |
87 | + }; | |
88 | +}; | |
89 | + | |
90 | +#endif | |
0 | 91 | \ No newline at end of file | ... | ... |
stim/cuda/gradient.cuh renamed to stim/cuda/templates/gradient.cuh
stim/gl/gl_spider.h
stim/image/image.h
... | ... | @@ -174,7 +174,7 @@ public: |
174 | 174 | |
175 | 175 | |
176 | 176 | /// Returns the maximum pixel value in the image |
177 | - T max(){ | |
177 | + T maxv(){ | |
178 | 178 | float max = 0; |
179 | 179 | unsigned long N = width() * height(); //get the number of pixels |
180 | 180 | |
... | ... | @@ -190,7 +190,7 @@ public: |
190 | 190 | } |
191 | 191 | |
192 | 192 | /// Returns the minimum pixel value in the image |
193 | - T min(){ | |
193 | + T minv(){ | |
194 | 194 | float min = 0; |
195 | 195 | unsigned long N = width() * height(); //get the number of pixels |
196 | 196 | ... | ... |
stim/math/matrix.h
stim/math/quaternion.h
stim/math/rect.h
stim/math/triangle.h
stim/math/vector.h
stim/visualization/colormap.h
... | ... | @@ -4,12 +4,12 @@ |
4 | 4 | #include <string> |
5 | 5 | #include <stdlib.h> |
6 | 6 | #ifdef __CUDACC__ |
7 | -#include "../cuda/error.h" | |
7 | +#include <stim/cuda/cudatools/error.h> | |
8 | 8 | #endif |
9 | 9 | |
10 | 10 | //saving an image to a file uses the CImg library |
11 | 11 | //this currently throws a lot of "unreachable" warnings (as of GCC 4.8.2, nvcc 6.5.12) |
12 | -#include "../image/image.h" | |
12 | +#include <stim/image/image.h> | |
13 | 13 | |
14 | 14 | |
15 | 15 | #define BREWER_CTRL_PTS 11 | ... | ... |