Commit 2e5e3a2645ea3b53494717fc90ac414c0d9ef448
1 parent
cfcf8619
added 2D convolution algorithm
Showing
2 changed files
with
181 additions
and
51 deletions
Show diff stats
stim/image/image.h
1 | 1 | #ifndef STIM_IMAGE_H |
2 | 2 | #define STIM_IMAGE_H |
3 | 3 | |
4 | -#include <opencv2/core/core.hpp> | |
5 | -#include <opencv2/highgui/highgui.hpp> | |
4 | +#ifdef USING_OPENCV | |
5 | + #include <opencv2/core/core.hpp> | |
6 | + #include <opencv2/highgui/highgui.hpp> | |
7 | +#endif | |
6 | 8 | #include <vector> |
7 | 9 | #include <iostream> |
8 | 10 | #include <limits> |
... | ... | @@ -18,7 +20,6 @@ namespace stim{ |
18 | 20 | template <class T> |
19 | 21 | class image{ |
20 | 22 | |
21 | - //cimg_library::CImg<T> img; | |
22 | 23 | T* img; //pointer to the image data (interleaved RGB for color) |
23 | 24 | size_t R[3]; |
24 | 25 | |
... | ... | @@ -57,18 +58,8 @@ class image{ |
57 | 58 | return y * C() * X() + x * C() + c; |
58 | 59 | } |
59 | 60 | |
60 | - | |
61 | +#ifdef USING_OPENCV | |
61 | 62 | int cv_type(){ |
62 | - // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution | |
63 | - | |
64 | - //if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C()); | |
65 | - //if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C()); | |
66 | - //if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C()); | |
67 | - //if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C()); | |
68 | - //if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C()); | |
69 | - //if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C()); | |
70 | - //if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C()); | |
71 | - | |
72 | 63 | if(typeid(T) == typeid(unsigned char)) return CV_MAKETYPE(CV_8U, (int)C()); |
73 | 64 | if(typeid(T) == typeid(char)) return CV_MAKETYPE(CV_8S, (int)C()); |
74 | 65 | if(typeid(T) == typeid(unsigned short)) return CV_MAKETYPE(CV_16U, (int)C()); |
... | ... | @@ -80,18 +71,9 @@ class image{ |
80 | 71 | std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<<std::endl; |
81 | 72 | exit(1); |
82 | 73 | } |
83 | - | |
74 | +#endif | |
84 | 75 | /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images) |
85 | 76 | T white(){ |
86 | - // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution | |
87 | - | |
88 | - //if(std::is_same<T, unsigned char>::value) return UCHAR_MAX; | |
89 | - //if(std::is_same<T, unsigned short>::value) return SHRT_MAX; | |
90 | - //if(std::is_same<T, unsigned>::value) return UINT_MAX; | |
91 | - //if(std::is_same<T, unsigned long>::value) return ULONG_MAX; | |
92 | - //if(std::is_same<T, unsigned long long>::value) return ULLONG_MAX; | |
93 | - //if(std::is_same<T, float>::value) return 1.0f; | |
94 | - //if(std::is_same<T, double>::value) return 1.0; | |
95 | 77 | |
96 | 78 | if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX; |
97 | 79 | if(typeid(T) == typeid(unsigned short)) return SHRT_MAX; |
... | ... | @@ -164,23 +146,31 @@ public: |
164 | 146 | exit(1); |
165 | 147 | } |
166 | 148 | |
167 | - size_t c; //allocate space for the number of channels | |
168 | - unsigned char format[2]; //allocate space to hold the image format tag | |
149 | + size_t nc; //allocate space for the number of channels | |
150 | + char format[2]; //allocate space to hold the image format tag | |
169 | 151 | infile.read(format, 2); //read the image format tag |
170 | - infile.seekg(1); //skip the newline character | |
152 | + infile.seekg(1, std::ios::cur); //skip the newline character | |
171 | 153 | |
172 | 154 | if (format[0] != 'P') { |
173 | 155 | std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; |
174 | 156 | exit(1); |
175 | 157 | } |
176 | - if (format[1] == '5') c = 1; //get the number of channels from the format flag | |
177 | - else if (format[1] == '6') c = 3; | |
158 | + if (format[1] == '5') nc = 1; //get the number of channels from the format flag | |
159 | + else if (format[1] == '6') nc = 3; | |
178 | 160 | else { |
179 | 161 | std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; |
180 | 162 | exit(1); |
181 | 163 | } |
164 | + | |
165 | + unsigned char c; //stores a character | |
166 | + while (infile.peek() == '#') { //if the next character indicates the start of a comment | |
167 | + while (true) { | |
168 | + c = infile.get(); | |
169 | + if (c == 0x0A) break; | |
170 | + } | |
171 | + } | |
172 | + | |
182 | 173 | std::string sw; //create a string to store the width of the image |
183 | - unsigned char c; | |
184 | 174 | while(true){ |
185 | 175 | c = infile.get(); //get a single character |
186 | 176 | if (c == ' ') break; //exit if we've encountered a space |
... | ... | @@ -194,18 +184,38 @@ public: |
194 | 184 | if (c == 0x0A) break; |
195 | 185 | sh.push_back(c); |
196 | 186 | } |
197 | - size_t h = atoi(ah.c_str()); //convert the string into an integer | |
198 | 187 | |
199 | - allocate(w, h, c); //allocate space for the image | |
200 | - infile.read(img, size()); //copy the binary data from the file to the image | |
188 | + while (true) { //skip the maximum value | |
189 | + c = infile.get(); | |
190 | + if (c == 0x0A) break; | |
191 | + } | |
192 | + size_t h = atoi(sh.c_str()); //convert the string into an integer | |
193 | + | |
194 | + allocate(w, h, nc); //allocate space for the image | |
195 | + infile.read((char*)img, size()); //copy the binary data from the file to the image | |
201 | 196 | infile.close(); |
202 | 197 | } |
203 | 198 | |
204 | 199 | |
205 | - | |
200 | +#ifdef USING_OPENCV | |
201 | + void from_opencv(unsigned char* buffer, size_t width, size_t height) { | |
202 | + allocate(width, height, 3); | |
203 | + T value; | |
204 | + size_t i; | |
205 | + for (size_t c = 0; c < C(); c++) { //copy directly | |
206 | + for (size_t y = 0; y < Y(); y++) { | |
207 | + for (size_t x = 0; x < X(); x++) { | |
208 | + i = y * X() * C() + x * C() + (2 - c); | |
209 | + value = buffer[i]; | |
210 | + img[idx(x, y, c)] = value; | |
211 | + } | |
212 | + } | |
213 | + } | |
214 | + } | |
215 | +#endif | |
206 | 216 | /// Load an image from a file |
207 | 217 | void load(std::string filename){ |
208 | - | |
218 | +#ifdef USING_OPENCV | |
209 | 219 | cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file |
210 | 220 | if(!cvImage.data){ |
211 | 221 | std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl; |
... | ... | @@ -220,22 +230,12 @@ public: |
220 | 230 | memcpy(img, cv_ptr, bytes()); |
221 | 231 | if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving |
222 | 232 | from_opencv(cv_ptr, X(), Y()); |
233 | +#else | |
234 | + load_netpbm(filename); | |
235 | +#endif | |
223 | 236 | } |
224 | 237 | |
225 | - void from_opencv(unsigned char* buffer, size_t width, size_t height){ | |
226 | - allocate(width, height, 3); | |
227 | - T value; | |
228 | - size_t i; | |
229 | - for(size_t c = 0; c < C(); c++){ //copy directly | |
230 | - for(size_t y = 0; y < Y(); y++){ | |
231 | - for(size_t x = 0; x < X(); x++){ | |
232 | - i = y * X() * C() + x * C() + (2-c); | |
233 | - value = buffer[i]; | |
234 | - img[idx(x, y, c)] = value; | |
235 | - } | |
236 | - } | |
237 | - } | |
238 | - } | |
238 | + | |
239 | 239 | |
240 | 240 | //save a Netpbm file |
241 | 241 | void save_netpbm(std::string filename) { |
... | ... | @@ -255,7 +255,9 @@ public: |
255 | 255 | std::cout << "Error in image::save_netpbm() - data must be grayscale or RGB." << std::endl; |
256 | 256 | exit(1); |
257 | 257 | } |
258 | - outfile << width() << " " << height() << (char)0x0A; //save the width and height | |
258 | + size_t w = width(); | |
259 | + size_t h = height(); | |
260 | + outfile << w << " " << h << (char)0x0A; //save the width and height | |
259 | 261 | outfile << "255" << (char)0x0A; //output the maximum value |
260 | 262 | outfile.write((const char*)img, size()); //write the binary data |
261 | 263 | outfile.close(); |
... | ... | @@ -263,6 +265,7 @@ public: |
263 | 265 | |
264 | 266 | //save a file |
265 | 267 | void save(std::string filename){ |
268 | +#ifdef USING_OPENCV | |
266 | 269 | //OpenCV uses an interleaved format, so convert first and then output |
267 | 270 | T* buffer = (T*) malloc(bytes()); |
268 | 271 | |
... | ... | @@ -273,6 +276,9 @@ public: |
273 | 276 | cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer); |
274 | 277 | cv::imwrite(filename, cvImage); |
275 | 278 | free(buffer); |
279 | +#else | |
280 | + save_netpbm(filename); | |
281 | +#endif | |
276 | 282 | } |
277 | 283 | |
278 | 284 | void set_interleaved(T* buffer, size_t width, size_t height, size_t channels){ |
... | ... | @@ -376,7 +382,7 @@ public: |
376 | 382 | size_t x, y; |
377 | 383 | for(y = 0; y < Y(); y++){ |
378 | 384 | for(x = 0; x < X(); x++){ |
379 | - img[idx(x, y, c)] = buffer[c]; | |
385 | + img[idx(x, y, c)] = buffer[y * X() + x]; | |
380 | 386 | } |
381 | 387 | } |
382 | 388 | } |
... | ... | @@ -499,6 +505,24 @@ public: |
499 | 505 | exit(1); |
500 | 506 | } |
501 | 507 | |
508 | + template<typename V> | |
509 | + operator image<V>() { | |
510 | + //create a new image | |
511 | + //image<V> r(X(), Y(), C()); | |
512 | + V* dst = (V*)malloc(size() * sizeof(V)); | |
513 | + | |
514 | + for (size_t x = 0; x < X(); x++) { | |
515 | + for (size_t y = 0; y < Y(); y++) { | |
516 | + for (size_t c = 0; c < C(); c++) { | |
517 | + dst[idx(x, y, c)] = (V)img[idx(x, y, c)]; | |
518 | + } | |
519 | + } | |
520 | + } | |
521 | + | |
522 | + image<V> r(dst, X(), Y(), C()); | |
523 | + return r; | |
524 | + } | |
525 | + | |
502 | 526 | }; |
503 | 527 | |
504 | 528 | }; //end namespace stim | ... | ... |
1 | +#ifndef STIM_CUDA_CONV2_H | |
2 | +#define STIM_CUDA_CONV2_H | |
3 | +//#define __CUDACC__ | |
4 | + | |
5 | +#ifdef __CUDACC__ | |
6 | +#include <stim/cuda/cudatools.h> | |
7 | +#endif | |
8 | + | |
9 | +namespace stim { | |
10 | + | |
11 | + //Kernel function that performs the 2D convolution. | |
12 | + template<typename T> | |
13 | + __global__ void kernel_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { | |
14 | + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; //threads correspond to indices into the output image | |
15 | + size_t yi = blockIdx.y * blockDim.y + threadIdx.y; | |
16 | + | |
17 | + size_t X = sx - kx + 1; //calculate the size of the output image | |
18 | + size_t Y = sy - ky + 1; | |
19 | + | |
20 | + if (xi >= X || yi >= Y) return; //returns if the thread is outside of the output image | |
21 | + | |
22 | + //loop through the kernel | |
23 | + size_t kxi, kyi; | |
24 | + T v = 0; | |
25 | + for (kyi = 0; kyi < ky; kyi++) { | |
26 | + for (kxi = 0; kxi < kx; kxi++) { | |
27 | + v += in[(yi + kyi) * sx + xi + kxi] * kernel[kyi * kx + kxi]; | |
28 | + } | |
29 | + } | |
30 | + out[yi * X + xi] = v; //write the result to global memory | |
31 | + | |
32 | + } | |
33 | + | |
34 | + //Performs a convolution of a 2D image using the GPU. All pointers are assumed to be to memory on the current device. | |
35 | + //@param out is a pointer to the output image | |
36 | + //@param in is a pointer to the input image | |
37 | + //@param sx is the size of the input image along X | |
38 | + //@param sy is the size of the input image along Y | |
39 | + //@param kx is the size of the kernel along X | |
40 | + //@param ky is the size of the kernel along Y | |
41 | + template<typename T> | |
42 | + void gpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { | |
43 | + cudaDeviceProp p; | |
44 | + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0)); | |
45 | + size_t tmax = p.maxThreadsPerBlock; | |
46 | + dim3 tn(sqrt(tmax), sqrt(tmax)); //calculate the block dimensions | |
47 | + size_t X = sx - kx + 1; //calculate the size of the output image | |
48 | + size_t Y = sy - ky + 1; | |
49 | + dim3 bn(X / tn.x + 1, Y / tn.y + 1); //calculate the grid dimensions | |
50 | + kernel_conv2 <<<bn, tn >>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel | |
51 | + } | |
52 | + | |
53 | + //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned. | |
54 | + // As a result, the output image will be smaller than the input image by (kx-1, ky-1) | |
55 | + //@param out is a pointer to the output image | |
56 | + //@param in is a pointer to the input image | |
57 | + //@param sx is the size of the input image along X | |
58 | + //@param sy is the size of the input image along Y | |
59 | + //@param kx is the size of the kernel along X | |
60 | + //@param ky is the size of the kernel along Y | |
61 | + template<typename T> | |
62 | + void cpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { | |
63 | + size_t X = sx - kx + 1; //x size of the output image | |
64 | + size_t Y = sy - ky + 1; //y size of the output image | |
65 | + | |
66 | +#ifdef __CUDACC__ | |
67 | + //allocate memory and copy everything to the GPU | |
68 | + T* gpu_in; | |
69 | + HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T))); | |
70 | + HANDLE_ERROR(cudaMemcpy(gpu_in, in, sx * sy * sizeof(T), cudaMemcpyHostToDevice)); | |
71 | + T* gpu_kernel; | |
72 | + HANDLE_ERROR(cudaMalloc(&gpu_kernel, kx * ky * sizeof(T))); | |
73 | + HANDLE_ERROR(cudaMemcpy(gpu_kernel, kernel, kx * ky * sizeof(T), cudaMemcpyHostToDevice)); | |
74 | + T* gpu_out; | |
75 | + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * sizeof(T))); | |
76 | + gpu_conv2(gpu_out, gpu_in, gpu_kernel, sx, sy, kx, ky); //execute the GPU kernel | |
77 | + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X * Y * sizeof(T), cudaMemcpyDeviceToHost)); //copy the result to the host | |
78 | + HANDLE_ERROR(cudaFree(gpu_in)); | |
79 | + HANDLE_ERROR(cudaFree(gpu_kernel)); | |
80 | + HANDLE_ERROR(cudaFree(gpu_out)); | |
81 | +#else | |
82 | + | |
83 | + | |
84 | + T v; //register stores the integral of the current pixel value | |
85 | + size_t yi, xi, kyi, kxi, yi_kyi_sx; | |
86 | + for (yi = 0; yi < Y; yi++) { //for each pixel in the output image | |
87 | + for (xi = 0; xi < X; xi++) { | |
88 | + v = 0; | |
89 | + for (kyi = 0; kyi < ky; kyi++) { //for each pixel in the kernel | |
90 | + yi_kyi_sx = (yi + kyi) * sx; | |
91 | + for (kxi = 0; kxi < kx; kxi++) { | |
92 | + v += in[yi_kyi_sx + xi + kxi] * kernel[kyi * kx + kxi]; | |
93 | + } | |
94 | + } | |
95 | + out[yi * X + xi] = v; //save the result to the output array | |
96 | + } | |
97 | + } | |
98 | + | |
99 | +#endif | |
100 | + } | |
101 | + | |
102 | + | |
103 | +} | |
104 | + | |
105 | + | |
106 | +#endif | |
0 | 107 | \ No newline at end of file | ... | ... |