Commit 2e5e3a2645ea3b53494717fc90ac414c0d9ef448
1 parent
cfcf8619
added 2D convolution algorithm
Showing
2 changed files
with
181 additions
and
51 deletions
Show diff stats
stim/image/image.h
1 | #ifndef STIM_IMAGE_H | 1 | #ifndef STIM_IMAGE_H |
2 | #define STIM_IMAGE_H | 2 | #define STIM_IMAGE_H |
3 | 3 | ||
4 | -#include <opencv2/core/core.hpp> | ||
5 | -#include <opencv2/highgui/highgui.hpp> | 4 | +#ifdef USING_OPENCV |
5 | + #include <opencv2/core/core.hpp> | ||
6 | + #include <opencv2/highgui/highgui.hpp> | ||
7 | +#endif | ||
6 | #include <vector> | 8 | #include <vector> |
7 | #include <iostream> | 9 | #include <iostream> |
8 | #include <limits> | 10 | #include <limits> |
@@ -18,7 +20,6 @@ namespace stim{ | @@ -18,7 +20,6 @@ namespace stim{ | ||
18 | template <class T> | 20 | template <class T> |
19 | class image{ | 21 | class image{ |
20 | 22 | ||
21 | - //cimg_library::CImg<T> img; | ||
22 | T* img; //pointer to the image data (interleaved RGB for color) | 23 | T* img; //pointer to the image data (interleaved RGB for color) |
23 | size_t R[3]; | 24 | size_t R[3]; |
24 | 25 | ||
@@ -57,18 +58,8 @@ class image{ | @@ -57,18 +58,8 @@ class image{ | ||
57 | return y * C() * X() + x * C() + c; | 58 | return y * C() * X() + x * C() + c; |
58 | } | 59 | } |
59 | 60 | ||
60 | - | 61 | +#ifdef USING_OPENCV |
61 | int cv_type(){ | 62 | int cv_type(){ |
62 | - // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution | ||
63 | - | ||
64 | - //if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C()); | ||
65 | - //if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C()); | ||
66 | - //if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C()); | ||
67 | - //if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C()); | ||
68 | - //if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C()); | ||
69 | - //if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C()); | ||
70 | - //if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C()); | ||
71 | - | ||
72 | if(typeid(T) == typeid(unsigned char)) return CV_MAKETYPE(CV_8U, (int)C()); | 63 | if(typeid(T) == typeid(unsigned char)) return CV_MAKETYPE(CV_8U, (int)C()); |
73 | if(typeid(T) == typeid(char)) return CV_MAKETYPE(CV_8S, (int)C()); | 64 | if(typeid(T) == typeid(char)) return CV_MAKETYPE(CV_8S, (int)C()); |
74 | if(typeid(T) == typeid(unsigned short)) return CV_MAKETYPE(CV_16U, (int)C()); | 65 | if(typeid(T) == typeid(unsigned short)) return CV_MAKETYPE(CV_16U, (int)C()); |
@@ -80,18 +71,9 @@ class image{ | @@ -80,18 +71,9 @@ class image{ | ||
80 | std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<<std::endl; | 71 | std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<<std::endl; |
81 | exit(1); | 72 | exit(1); |
82 | } | 73 | } |
83 | - | 74 | +#endif |
84 | /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images) | 75 | /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images) |
85 | T white(){ | 76 | T white(){ |
86 | - // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution | ||
87 | - | ||
88 | - //if(std::is_same<T, unsigned char>::value) return UCHAR_MAX; | ||
89 | - //if(std::is_same<T, unsigned short>::value) return SHRT_MAX; | ||
90 | - //if(std::is_same<T, unsigned>::value) return UINT_MAX; | ||
91 | - //if(std::is_same<T, unsigned long>::value) return ULONG_MAX; | ||
92 | - //if(std::is_same<T, unsigned long long>::value) return ULLONG_MAX; | ||
93 | - //if(std::is_same<T, float>::value) return 1.0f; | ||
94 | - //if(std::is_same<T, double>::value) return 1.0; | ||
95 | 77 | ||
96 | if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX; | 78 | if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX; |
97 | if(typeid(T) == typeid(unsigned short)) return SHRT_MAX; | 79 | if(typeid(T) == typeid(unsigned short)) return SHRT_MAX; |
@@ -164,23 +146,31 @@ public: | @@ -164,23 +146,31 @@ public: | ||
164 | exit(1); | 146 | exit(1); |
165 | } | 147 | } |
166 | 148 | ||
167 | - size_t c; //allocate space for the number of channels | ||
168 | - unsigned char format[2]; //allocate space to hold the image format tag | 149 | + size_t nc; //allocate space for the number of channels |
150 | + char format[2]; //allocate space to hold the image format tag | ||
169 | infile.read(format, 2); //read the image format tag | 151 | infile.read(format, 2); //read the image format tag |
170 | - infile.seekg(1); //skip the newline character | 152 | + infile.seekg(1, std::ios::cur); //skip the newline character |
171 | 153 | ||
172 | if (format[0] != 'P') { | 154 | if (format[0] != 'P') { |
173 | std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; | 155 | std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; |
174 | exit(1); | 156 | exit(1); |
175 | } | 157 | } |
176 | - if (format[1] == '5') c = 1; //get the number of channels from the format flag | ||
177 | - else if (format[1] == '6') c = 3; | 158 | + if (format[1] == '5') nc = 1; //get the number of channels from the format flag |
159 | + else if (format[1] == '6') nc = 3; | ||
178 | else { | 160 | else { |
179 | std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; | 161 | std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; |
180 | exit(1); | 162 | exit(1); |
181 | } | 163 | } |
164 | + | ||
165 | + unsigned char c; //stores a character | ||
166 | + while (infile.peek() == '#') { //if the next character indicates the start of a comment | ||
167 | + while (true) { | ||
168 | + c = infile.get(); | ||
169 | + if (c == 0x0A) break; | ||
170 | + } | ||
171 | + } | ||
172 | + | ||
182 | std::string sw; //create a string to store the width of the image | 173 | std::string sw; //create a string to store the width of the image |
183 | - unsigned char c; | ||
184 | while(true){ | 174 | while(true){ |
185 | c = infile.get(); //get a single character | 175 | c = infile.get(); //get a single character |
186 | if (c == ' ') break; //exit if we've encountered a space | 176 | if (c == ' ') break; //exit if we've encountered a space |
@@ -194,18 +184,38 @@ public: | @@ -194,18 +184,38 @@ public: | ||
194 | if (c == 0x0A) break; | 184 | if (c == 0x0A) break; |
195 | sh.push_back(c); | 185 | sh.push_back(c); |
196 | } | 186 | } |
197 | - size_t h = atoi(ah.c_str()); //convert the string into an integer | ||
198 | 187 | ||
199 | - allocate(w, h, c); //allocate space for the image | ||
200 | - infile.read(img, size()); //copy the binary data from the file to the image | 188 | + while (true) { //skip the maximum value |
189 | + c = infile.get(); | ||
190 | + if (c == 0x0A) break; | ||
191 | + } | ||
192 | + size_t h = atoi(sh.c_str()); //convert the string into an integer | ||
193 | + | ||
194 | + allocate(w, h, nc); //allocate space for the image | ||
195 | + infile.read((char*)img, size()); //copy the binary data from the file to the image | ||
201 | infile.close(); | 196 | infile.close(); |
202 | } | 197 | } |
203 | 198 | ||
204 | 199 | ||
205 | - | 200 | +#ifdef USING_OPENCV |
201 | + void from_opencv(unsigned char* buffer, size_t width, size_t height) { | ||
202 | + allocate(width, height, 3); | ||
203 | + T value; | ||
204 | + size_t i; | ||
205 | + for (size_t c = 0; c < C(); c++) { //copy directly | ||
206 | + for (size_t y = 0; y < Y(); y++) { | ||
207 | + for (size_t x = 0; x < X(); x++) { | ||
208 | + i = y * X() * C() + x * C() + (2 - c); | ||
209 | + value = buffer[i]; | ||
210 | + img[idx(x, y, c)] = value; | ||
211 | + } | ||
212 | + } | ||
213 | + } | ||
214 | + } | ||
215 | +#endif | ||
206 | /// Load an image from a file | 216 | /// Load an image from a file |
207 | void load(std::string filename){ | 217 | void load(std::string filename){ |
208 | - | 218 | +#ifdef USING_OPENCV |
209 | cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file | 219 | cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file |
210 | if(!cvImage.data){ | 220 | if(!cvImage.data){ |
211 | std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl; | 221 | std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl; |
@@ -220,22 +230,12 @@ public: | @@ -220,22 +230,12 @@ public: | ||
220 | memcpy(img, cv_ptr, bytes()); | 230 | memcpy(img, cv_ptr, bytes()); |
221 | if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving | 231 | if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving |
222 | from_opencv(cv_ptr, X(), Y()); | 232 | from_opencv(cv_ptr, X(), Y()); |
233 | +#else | ||
234 | + load_netpbm(filename); | ||
235 | +#endif | ||
223 | } | 236 | } |
224 | 237 | ||
225 | - void from_opencv(unsigned char* buffer, size_t width, size_t height){ | ||
226 | - allocate(width, height, 3); | ||
227 | - T value; | ||
228 | - size_t i; | ||
229 | - for(size_t c = 0; c < C(); c++){ //copy directly | ||
230 | - for(size_t y = 0; y < Y(); y++){ | ||
231 | - for(size_t x = 0; x < X(); x++){ | ||
232 | - i = y * X() * C() + x * C() + (2-c); | ||
233 | - value = buffer[i]; | ||
234 | - img[idx(x, y, c)] = value; | ||
235 | - } | ||
236 | - } | ||
237 | - } | ||
238 | - } | 238 | + |
239 | 239 | ||
240 | //save a Netpbm file | 240 | //save a Netpbm file |
241 | void save_netpbm(std::string filename) { | 241 | void save_netpbm(std::string filename) { |
@@ -255,7 +255,9 @@ public: | @@ -255,7 +255,9 @@ public: | ||
255 | std::cout << "Error in image::save_netpbm() - data must be grayscale or RGB." << std::endl; | 255 | std::cout << "Error in image::save_netpbm() - data must be grayscale or RGB." << std::endl; |
256 | exit(1); | 256 | exit(1); |
257 | } | 257 | } |
258 | - outfile << width() << " " << height() << (char)0x0A; //save the width and height | 258 | + size_t w = width(); |
259 | + size_t h = height(); | ||
260 | + outfile << w << " " << h << (char)0x0A; //save the width and height | ||
259 | outfile << "255" << (char)0x0A; //output the maximum value | 261 | outfile << "255" << (char)0x0A; //output the maximum value |
260 | outfile.write((const char*)img, size()); //write the binary data | 262 | outfile.write((const char*)img, size()); //write the binary data |
261 | outfile.close(); | 263 | outfile.close(); |
@@ -263,6 +265,7 @@ public: | @@ -263,6 +265,7 @@ public: | ||
263 | 265 | ||
264 | //save a file | 266 | //save a file |
265 | void save(std::string filename){ | 267 | void save(std::string filename){ |
268 | +#ifdef USING_OPENCV | ||
266 | //OpenCV uses an interleaved format, so convert first and then output | 269 | //OpenCV uses an interleaved format, so convert first and then output |
267 | T* buffer = (T*) malloc(bytes()); | 270 | T* buffer = (T*) malloc(bytes()); |
268 | 271 | ||
@@ -273,6 +276,9 @@ public: | @@ -273,6 +276,9 @@ public: | ||
273 | cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer); | 276 | cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer); |
274 | cv::imwrite(filename, cvImage); | 277 | cv::imwrite(filename, cvImage); |
275 | free(buffer); | 278 | free(buffer); |
279 | +#else | ||
280 | + save_netpbm(filename); | ||
281 | +#endif | ||
276 | } | 282 | } |
277 | 283 | ||
278 | void set_interleaved(T* buffer, size_t width, size_t height, size_t channels){ | 284 | void set_interleaved(T* buffer, size_t width, size_t height, size_t channels){ |
@@ -376,7 +382,7 @@ public: | @@ -376,7 +382,7 @@ public: | ||
376 | size_t x, y; | 382 | size_t x, y; |
377 | for(y = 0; y < Y(); y++){ | 383 | for(y = 0; y < Y(); y++){ |
378 | for(x = 0; x < X(); x++){ | 384 | for(x = 0; x < X(); x++){ |
379 | - img[idx(x, y, c)] = buffer[c]; | 385 | + img[idx(x, y, c)] = buffer[y * X() + x]; |
380 | } | 386 | } |
381 | } | 387 | } |
382 | } | 388 | } |
@@ -499,6 +505,24 @@ public: | @@ -499,6 +505,24 @@ public: | ||
499 | exit(1); | 505 | exit(1); |
500 | } | 506 | } |
501 | 507 | ||
508 | + template<typename V> | ||
509 | + operator image<V>() { | ||
510 | + //create a new image | ||
511 | + //image<V> r(X(), Y(), C()); | ||
512 | + V* dst = (V*)malloc(size() * sizeof(V)); | ||
513 | + | ||
514 | + for (size_t x = 0; x < X(); x++) { | ||
515 | + for (size_t y = 0; y < Y(); y++) { | ||
516 | + for (size_t c = 0; c < C(); c++) { | ||
517 | + dst[idx(x, y, c)] = (V)img[idx(x, y, c)]; | ||
518 | + } | ||
519 | + } | ||
520 | + } | ||
521 | + | ||
522 | + image<V> r(dst, X(), Y(), C()); | ||
523 | + return r; | ||
524 | + } | ||
525 | + | ||
502 | }; | 526 | }; |
503 | 527 | ||
504 | }; //end namespace stim | 528 | }; //end namespace stim |
1 | +#ifndef STIM_CUDA_CONV2_H | ||
2 | +#define STIM_CUDA_CONV2_H | ||
3 | +//#define __CUDACC__ | ||
4 | + | ||
5 | +#ifdef __CUDACC__ | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | +#endif | ||
8 | + | ||
9 | +namespace stim { | ||
10 | + | ||
11 | + //Kernel function that performs the 2D convolution. | ||
12 | + template<typename T> | ||
13 | + __global__ void kernel_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { | ||
14 | + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; //threads correspond to indices into the output image | ||
15 | + size_t yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
16 | + | ||
17 | + size_t X = sx - kx + 1; //calculate the size of the output image | ||
18 | + size_t Y = sy - ky + 1; | ||
19 | + | ||
20 | + if (xi >= X || yi >= Y) return; //returns if the thread is outside of the output image | ||
21 | + | ||
22 | + //loop through the kernel | ||
23 | + size_t kxi, kyi; | ||
24 | + T v = 0; | ||
25 | + for (kyi = 0; kyi < ky; kyi++) { | ||
26 | + for (kxi = 0; kxi < kx; kxi++) { | ||
27 | + v += in[(yi + kyi) * sx + xi + kxi] * kernel[kyi * kx + kxi]; | ||
28 | + } | ||
29 | + } | ||
30 | + out[yi * X + xi] = v; //write the result to global memory | ||
31 | + | ||
32 | + } | ||
33 | + | ||
34 | + //Performs a convolution of a 2D image using the GPU. All pointers are assumed to be to memory on the current device. | ||
35 | + //@param out is a pointer to the output image | ||
36 | + //@param in is a pointer to the input image | ||
37 | + //@param sx is the size of the input image along X | ||
38 | + //@param sy is the size of the input image along Y | ||
39 | + //@param kx is the size of the kernel along X | ||
40 | + //@param ky is the size of the kernel along Y | ||
41 | + template<typename T> | ||
42 | + void gpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { | ||
43 | + cudaDeviceProp p; | ||
44 | + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0)); | ||
45 | + size_t tmax = p.maxThreadsPerBlock; | ||
46 | + dim3 tn(sqrt(tmax), sqrt(tmax)); //calculate the block dimensions | ||
47 | + size_t X = sx - kx + 1; //calculate the size of the output image | ||
48 | + size_t Y = sy - ky + 1; | ||
49 | + dim3 bn(X / tn.x + 1, Y / tn.y + 1); //calculate the grid dimensions | ||
50 | + kernel_conv2 <<<bn, tn >>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel | ||
51 | + } | ||
52 | + | ||
53 | + //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned. | ||
54 | + // As a result, the output image will be smaller than the input image by (kx-1, ky-1) | ||
55 | + //@param out is a pointer to the output image | ||
56 | + //@param in is a pointer to the input image | ||
57 | + //@param sx is the size of the input image along X | ||
58 | + //@param sy is the size of the input image along Y | ||
59 | + //@param kx is the size of the kernel along X | ||
60 | + //@param ky is the size of the kernel along Y | ||
61 | + template<typename T> | ||
62 | + void cpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { | ||
63 | + size_t X = sx - kx + 1; //x size of the output image | ||
64 | + size_t Y = sy - ky + 1; //y size of the output image | ||
65 | + | ||
66 | +#ifdef __CUDACC__ | ||
67 | + //allocate memory and copy everything to the GPU | ||
68 | + T* gpu_in; | ||
69 | + HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T))); | ||
70 | + HANDLE_ERROR(cudaMemcpy(gpu_in, in, sx * sy * sizeof(T), cudaMemcpyHostToDevice)); | ||
71 | + T* gpu_kernel; | ||
72 | + HANDLE_ERROR(cudaMalloc(&gpu_kernel, kx * ky * sizeof(T))); | ||
73 | + HANDLE_ERROR(cudaMemcpy(gpu_kernel, kernel, kx * ky * sizeof(T), cudaMemcpyHostToDevice)); | ||
74 | + T* gpu_out; | ||
75 | + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * sizeof(T))); | ||
76 | + gpu_conv2(gpu_out, gpu_in, gpu_kernel, sx, sy, kx, ky); //execute the GPU kernel | ||
77 | + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X * Y * sizeof(T), cudaMemcpyDeviceToHost)); //copy the result to the host | ||
78 | + HANDLE_ERROR(cudaFree(gpu_in)); | ||
79 | + HANDLE_ERROR(cudaFree(gpu_kernel)); | ||
80 | + HANDLE_ERROR(cudaFree(gpu_out)); | ||
81 | +#else | ||
82 | + | ||
83 | + | ||
84 | + T v; //register stores the integral of the current pixel value | ||
85 | + size_t yi, xi, kyi, kxi, yi_kyi_sx; | ||
86 | + for (yi = 0; yi < Y; yi++) { //for each pixel in the output image | ||
87 | + for (xi = 0; xi < X; xi++) { | ||
88 | + v = 0; | ||
89 | + for (kyi = 0; kyi < ky; kyi++) { //for each pixel in the kernel | ||
90 | + yi_kyi_sx = (yi + kyi) * sx; | ||
91 | + for (kxi = 0; kxi < kx; kxi++) { | ||
92 | + v += in[yi_kyi_sx + xi + kxi] * kernel[kyi * kx + kxi]; | ||
93 | + } | ||
94 | + } | ||
95 | + out[yi * X + xi] = v; //save the result to the output array | ||
96 | + } | ||
97 | + } | ||
98 | + | ||
99 | +#endif | ||
100 | + } | ||
101 | + | ||
102 | + | ||
103 | +} | ||
104 | + | ||
105 | + | ||
106 | +#endif | ||
0 | \ No newline at end of file | 107 | \ No newline at end of file |