Commit 2e5e3a2645ea3b53494717fc90ac414c0d9ef448

Authored by David Mayerich
1 parent cfcf8619

added 2D convolution algorithm

Showing 2 changed files with 181 additions and 51 deletions   Show diff stats
stim/image/image.h
1 1 #ifndef STIM_IMAGE_H
2 2 #define STIM_IMAGE_H
3 3  
4   -#include <opencv2/core/core.hpp>
5   -#include <opencv2/highgui/highgui.hpp>
  4 +#ifdef USING_OPENCV
  5 + #include <opencv2/core/core.hpp>
  6 + #include <opencv2/highgui/highgui.hpp>
  7 +#endif
6 8 #include <vector>
7 9 #include <iostream>
8 10 #include <limits>
... ... @@ -18,7 +20,6 @@ namespace stim{
18 20 template <class T>
19 21 class image{
20 22  
21   - //cimg_library::CImg<T> img;
22 23 T* img; //pointer to the image data (interleaved RGB for color)
23 24 size_t R[3];
24 25  
... ... @@ -57,18 +58,8 @@ class image{
57 58 return y * C() * X() + x * C() + c;
58 59 }
59 60  
60   -
  61 +#ifdef USING_OPENCV
61 62 int cv_type(){
62   - // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
63   -
64   - //if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C());
65   - //if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C());
66   - //if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C());
67   - //if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C());
68   - //if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C());
69   - //if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C());
70   - //if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C());
71   -
72 63 if(typeid(T) == typeid(unsigned char)) return CV_MAKETYPE(CV_8U, (int)C());
73 64 if(typeid(T) == typeid(char)) return CV_MAKETYPE(CV_8S, (int)C());
74 65 if(typeid(T) == typeid(unsigned short)) return CV_MAKETYPE(CV_16U, (int)C());
... ... @@ -80,18 +71,9 @@ class image{
80 71 std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<<std::endl;
81 72 exit(1);
82 73 }
83   -
  74 +#endif
84 75 /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images)
85 76 T white(){
86   - // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
87   -
88   - //if(std::is_same<T, unsigned char>::value) return UCHAR_MAX;
89   - //if(std::is_same<T, unsigned short>::value) return SHRT_MAX;
90   - //if(std::is_same<T, unsigned>::value) return UINT_MAX;
91   - //if(std::is_same<T, unsigned long>::value) return ULONG_MAX;
92   - //if(std::is_same<T, unsigned long long>::value) return ULLONG_MAX;
93   - //if(std::is_same<T, float>::value) return 1.0f;
94   - //if(std::is_same<T, double>::value) return 1.0;
95 77  
96 78 if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX;
97 79 if(typeid(T) == typeid(unsigned short)) return SHRT_MAX;
... ... @@ -164,23 +146,31 @@ public:
164 146 exit(1);
165 147 }
166 148  
167   - size_t c; //allocate space for the number of channels
168   - unsigned char format[2]; //allocate space to hold the image format tag
  149 + size_t nc; //allocate space for the number of channels
  150 + char format[2]; //allocate space to hold the image format tag
169 151 infile.read(format, 2); //read the image format tag
170   - infile.seekg(1); //skip the newline character
  152 + infile.seekg(1, std::ios::cur); //skip the newline character
171 153  
172 154 if (format[0] != 'P') {
173 155 std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl;
174 156 exit(1);
175 157 }
176   - if (format[1] == '5') c = 1; //get the number of channels from the format flag
177   - else if (format[1] == '6') c = 3;
  158 + if (format[1] == '5') nc = 1; //get the number of channels from the format flag
  159 + else if (format[1] == '6') nc = 3;
178 160 else {
179 161 std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl;
180 162 exit(1);
181 163 }
  164 +
  165 + unsigned char c; //stores a character
  166 + while (infile.peek() == '#') { //if the next character indicates the start of a comment
  167 + while (true) {
  168 + c = infile.get();
  169 + if (c == 0x0A) break;
  170 + }
  171 + }
  172 +
182 173 std::string sw; //create a string to store the width of the image
183   - unsigned char c;
184 174 while(true){
185 175 c = infile.get(); //get a single character
186 176 if (c == ' ') break; //exit if we've encountered a space
... ... @@ -194,18 +184,38 @@ public:
194 184 if (c == 0x0A) break;
195 185 sh.push_back(c);
196 186 }
197   - size_t h = atoi(ah.c_str()); //convert the string into an integer
198 187  
199   - allocate(w, h, c); //allocate space for the image
200   - infile.read(img, size()); //copy the binary data from the file to the image
  188 + while (true) { //skip the maximum value
  189 + c = infile.get();
  190 + if (c == 0x0A) break;
  191 + }
  192 + size_t h = atoi(sh.c_str()); //convert the string into an integer
  193 +
  194 + allocate(w, h, nc); //allocate space for the image
  195 + infile.read((char*)img, size()); //copy the binary data from the file to the image
201 196 infile.close();
202 197 }
203 198  
204 199  
205   -
  200 +#ifdef USING_OPENCV
  201 + void from_opencv(unsigned char* buffer, size_t width, size_t height) {
  202 + allocate(width, height, 3);
  203 + T value;
  204 + size_t i;
  205 + for (size_t c = 0; c < C(); c++) { //copy directly
  206 + for (size_t y = 0; y < Y(); y++) {
  207 + for (size_t x = 0; x < X(); x++) {
  208 + i = y * X() * C() + x * C() + (2 - c);
  209 + value = buffer[i];
  210 + img[idx(x, y, c)] = value;
  211 + }
  212 + }
  213 + }
  214 + }
  215 +#endif
206 216 /// Load an image from a file
207 217 void load(std::string filename){
208   -
  218 +#ifdef USING_OPENCV
209 219 cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file
210 220 if(!cvImage.data){
211 221 std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl;
... ... @@ -220,22 +230,12 @@ public:
220 230 memcpy(img, cv_ptr, bytes());
221 231 if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving
222 232 from_opencv(cv_ptr, X(), Y());
  233 +#else
  234 + load_netpbm(filename);
  235 +#endif
223 236 }
224 237  
225   - void from_opencv(unsigned char* buffer, size_t width, size_t height){
226   - allocate(width, height, 3);
227   - T value;
228   - size_t i;
229   - for(size_t c = 0; c < C(); c++){ //copy directly
230   - for(size_t y = 0; y < Y(); y++){
231   - for(size_t x = 0; x < X(); x++){
232   - i = y * X() * C() + x * C() + (2-c);
233   - value = buffer[i];
234   - img[idx(x, y, c)] = value;
235   - }
236   - }
237   - }
238   - }
  238 +
239 239  
240 240 //save a Netpbm file
241 241 void save_netpbm(std::string filename) {
... ... @@ -255,7 +255,9 @@ public:
255 255 std::cout << "Error in image::save_netpbm() - data must be grayscale or RGB." << std::endl;
256 256 exit(1);
257 257 }
258   - outfile << width() << " " << height() << (char)0x0A; //save the width and height
  258 + size_t w = width();
  259 + size_t h = height();
  260 + outfile << w << " " << h << (char)0x0A; //save the width and height
259 261 outfile << "255" << (char)0x0A; //output the maximum value
260 262 outfile.write((const char*)img, size()); //write the binary data
261 263 outfile.close();
... ... @@ -263,6 +265,7 @@ public:
263 265  
264 266 //save a file
265 267 void save(std::string filename){
  268 +#ifdef USING_OPENCV
266 269 //OpenCV uses an interleaved format, so convert first and then output
267 270 T* buffer = (T*) malloc(bytes());
268 271  
... ... @@ -273,6 +276,9 @@ public:
273 276 cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer);
274 277 cv::imwrite(filename, cvImage);
275 278 free(buffer);
  279 +#else
  280 + save_netpbm(filename);
  281 +#endif
276 282 }
277 283  
278 284 void set_interleaved(T* buffer, size_t width, size_t height, size_t channels){
... ... @@ -376,7 +382,7 @@ public:
376 382 size_t x, y;
377 383 for(y = 0; y < Y(); y++){
378 384 for(x = 0; x < X(); x++){
379   - img[idx(x, y, c)] = buffer[c];
  385 + img[idx(x, y, c)] = buffer[y * X() + x];
380 386 }
381 387 }
382 388 }
... ... @@ -499,6 +505,24 @@ public:
499 505 exit(1);
500 506 }
501 507  
  508 + template<typename V>
  509 + operator image<V>() {
  510 + //create a new image
  511 + //image<V> r(X(), Y(), C());
  512 + V* dst = (V*)malloc(size() * sizeof(V));
  513 +
  514 + for (size_t x = 0; x < X(); x++) {
  515 + for (size_t y = 0; y < Y(); y++) {
  516 + for (size_t c = 0; c < C(); c++) {
  517 + dst[idx(x, y, c)] = (V)img[idx(x, y, c)];
  518 + }
  519 + }
  520 + }
  521 +
  522 + image<V> r(dst, X(), Y(), C());
  523 + return r;
  524 + }
  525 +
502 526 };
503 527  
504 528 }; //end namespace stim
... ...
stim/math/filters/conv2.h 0 → 100644
  1 +#ifndef STIM_CUDA_CONV2_H
  2 +#define STIM_CUDA_CONV2_H
  3 +//#define __CUDACC__
  4 +
  5 +#ifdef __CUDACC__
  6 +#include <stim/cuda/cudatools.h>
  7 +#endif
  8 +
  9 +namespace stim {
  10 +
  11 + //Kernel function that performs the 2D convolution.
  12 + template<typename T>
  13 + __global__ void kernel_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  14 + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; //threads correspond to indices into the output image
  15 + size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  16 +
  17 + size_t X = sx - kx + 1; //calculate the size of the output image
  18 + size_t Y = sy - ky + 1;
  19 +
  20 + if (xi >= X || yi >= Y) return; //returns if the thread is outside of the output image
  21 +
  22 + //loop through the kernel
  23 + size_t kxi, kyi;
  24 + T v = 0;
  25 + for (kyi = 0; kyi < ky; kyi++) {
  26 + for (kxi = 0; kxi < kx; kxi++) {
  27 + v += in[(yi + kyi) * sx + xi + kxi] * kernel[kyi * kx + kxi];
  28 + }
  29 + }
  30 + out[yi * X + xi] = v; //write the result to global memory
  31 +
  32 + }
  33 +
  34 + //Performs a convolution of a 2D image using the GPU. All pointers are assumed to be to memory on the current device.
  35 + //@param out is a pointer to the output image
  36 + //@param in is a pointer to the input image
  37 + //@param sx is the size of the input image along X
  38 + //@param sy is the size of the input image along Y
  39 + //@param kx is the size of the kernel along X
  40 + //@param ky is the size of the kernel along Y
  41 + template<typename T>
  42 + void gpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  43 + cudaDeviceProp p;
  44 + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0));
  45 + size_t tmax = p.maxThreadsPerBlock;
  46 + dim3 tn(sqrt(tmax), sqrt(tmax)); //calculate the block dimensions
  47 + size_t X = sx - kx + 1; //calculate the size of the output image
  48 + size_t Y = sy - ky + 1;
  49 + dim3 bn(X / tn.x + 1, Y / tn.y + 1); //calculate the grid dimensions
  50 + kernel_conv2 <<<bn, tn >>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel
  51 + }
  52 +
  53 + //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned.
  54 + // As a result, the output image will be smaller than the input image by (kx-1, ky-1)
  55 + //@param out is a pointer to the output image
  56 + //@param in is a pointer to the input image
  57 + //@param sx is the size of the input image along X
  58 + //@param sy is the size of the input image along Y
  59 + //@param kx is the size of the kernel along X
  60 + //@param ky is the size of the kernel along Y
  61 + template<typename T>
  62 + void cpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  63 + size_t X = sx - kx + 1; //x size of the output image
  64 + size_t Y = sy - ky + 1; //y size of the output image
  65 +
  66 +#ifdef __CUDACC__
  67 + //allocate memory and copy everything to the GPU
  68 + T* gpu_in;
  69 + HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T)));
  70 + HANDLE_ERROR(cudaMemcpy(gpu_in, in, sx * sy * sizeof(T), cudaMemcpyHostToDevice));
  71 + T* gpu_kernel;
  72 + HANDLE_ERROR(cudaMalloc(&gpu_kernel, kx * ky * sizeof(T)));
  73 + HANDLE_ERROR(cudaMemcpy(gpu_kernel, kernel, kx * ky * sizeof(T), cudaMemcpyHostToDevice));
  74 + T* gpu_out;
  75 + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * sizeof(T)));
  76 + gpu_conv2(gpu_out, gpu_in, gpu_kernel, sx, sy, kx, ky); //execute the GPU kernel
  77 + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X * Y * sizeof(T), cudaMemcpyDeviceToHost)); //copy the result to the host
  78 + HANDLE_ERROR(cudaFree(gpu_in));
  79 + HANDLE_ERROR(cudaFree(gpu_kernel));
  80 + HANDLE_ERROR(cudaFree(gpu_out));
  81 +#else
  82 +
  83 +
  84 + T v; //register stores the integral of the current pixel value
  85 + size_t yi, xi, kyi, kxi, yi_kyi_sx;
  86 + for (yi = 0; yi < Y; yi++) { //for each pixel in the output image
  87 + for (xi = 0; xi < X; xi++) {
  88 + v = 0;
  89 + for (kyi = 0; kyi < ky; kyi++) { //for each pixel in the kernel
  90 + yi_kyi_sx = (yi + kyi) * sx;
  91 + for (kxi = 0; kxi < kx; kxi++) {
  92 + v += in[yi_kyi_sx + xi + kxi] * kernel[kyi * kx + kxi];
  93 + }
  94 + }
  95 + out[yi * X + xi] = v; //save the result to the output array
  96 + }
  97 + }
  98 +
  99 +#endif
  100 + }
  101 +
  102 +
  103 +}
  104 +
  105 +
  106 +#endif
0 107 \ No newline at end of file
... ...