From 40d11588d501812254ef602402ec5143c4080da3 Mon Sep 17 00:00:00 2001
From: Tianshu Cheng <tianshucheng@foxmail.com>
Date: Fri, 4 Sep 2015 16:05:53 -0500
Subject: [PATCH] 2D inseparable convolution

---
 cudafunc.cu              | 30 +++++++++++++++++++++++++++---
 fun_mPb_theta.cpp        | 41 ++++++++++++++++++++++++++++++++++++++---
 func_mPb.cpp             |  8 ++++----
 gauss_derivative_odd.cpp | 31 +++++++++++++++++++++++--------
 test_main.cpp            |  5 ++++-
 5 files changed, 96 insertions(+), 19 deletions(-)
diff --git a/cudafunc.cu b/cudafunc.cu
index d2e5890..afb7cdb 100644
--- a/cudafunc.cu
+++ b/cudafunc.cu
@@ -1,6 +1,30 @@
-#include <stim/cuda/gaussian_blur.cuh>
+#include <stim/cuda/arraymath.cuh>
 
-void blur(float* image, float sigma, unsigned int x, unsigned int y){
+/*void blur(float* image, float sigma, unsigned int x, unsigned int y){
 
 	stim::cuda::cpu_gaussian_blur_2d<float>(image, sigma, x, y);
-}
\ No newline at end of file
+}*/
+
+void array_multiply(float* lhs, float rhs, unsigned int N){
+
+	stim::cuda::cpu_multiply(lhs, rhs, N);
+}
+
+void array_add(float* ptr1, float* ptr2, float* sum, unsigned int N){
+
+	stim::cuda::cpu_add(ptr1, ptr2, sum, N);
+
+}
+
+void conv2(float* img, float* mask, float* cpu_copy, unsigned int w, unsigned int h, unsigned int M){
+
+	stim::cuda::cpu_conv2(img, mask, cpu_copy, w, h, M);
+
+}
+
+void array_abs(float* img, unsigned int N){
+
+	stim::cuda::cpu_abs(img, N);
+
+}
+
diff --git a/fun_mPb_theta.cpp b/fun_mPb_theta.cpp
index 5e3495f..8b013b2 100644
--- a/fun_mPb_theta.cpp
+++ b/fun_mPb_theta.cpp
@@ -4,6 +4,9 @@
 #include <stim/visualization/colormap.h>
 #include <stim/image/image_contour_detection.h>
 
+void array_multiply(float* lhs, float rhs, unsigned int N);
+void array_add(float* ptr1, float* ptr2, float* sum, unsigned int N);
+
 stim::image<float> func_mPb_theta(stim::image<float> lab, float theta, unsigned int w, unsigned int h){
 
 	stim::image<float> mPb_theta(w, h, 1);
@@ -26,16 +29,25 @@ stim::image<float> func_mPb_theta(stim::image<float> lab, float theta, unsigned 
 	stim::image<float> l1,l2,l3,a1,a2,a3,b1,b2,b3;
 
 	l1 = gaussian_derivative_filter_odd(pic_light, sigma, sigma_n, r1 * 2, theta, w, h);
+	stim::cpu2image(l1.data(), "data_output/l1_tex.bmp", w, h, stim::cmBrewer);
 	l2 = gaussian_derivative_filter_odd(pic_light, sigma, sigma_n, r2 * 2, theta, w, h);
+	stim::cpu2image(l2.data(), "data_output/l2_tex.bmp", w, h, stim::cmBrewer);
 	l3 = gaussian_derivative_filter_odd(pic_light, sigma, sigma_n, r3 * 2, theta, w, h);
+	stim::cpu2image(l3.data(), "data_output/l3_tex.bmp", w, h, stim::cmBrewer);
 	a1 = gaussian_derivative_filter_odd(pic_colora, sigma, sigma_n, r2 * 2, theta, w, h);
+	stim::cpu2image(a1.data(), "data_output/a1_tex.bmp", w, h, stim::cmBrewer);
 	a2 = gaussian_derivative_filter_odd(pic_colora, sigma, sigma_n, r3 * 2, theta, w, h);
+	stim::cpu2image(a2.data(), "data_output/a2_tex.bmp", w, h, stim::cmBrewer);
 	a3 = gaussian_derivative_filter_odd(pic_colora, sigma, sigma_n, r4 * 2, theta, w, h);
+	stim::cpu2image(a3.data(), "data_output/a3_tex.bmp", w, h, stim::cmBrewer);
 	b1 = gaussian_derivative_filter_odd(pic_colorb, sigma, sigma_n, r2 * 2, theta, w, h);
+	stim::cpu2image(b1.data(), "data_output/b1_tex.bmp", w, h, stim::cmBrewer);
 	b2 = gaussian_derivative_filter_odd(pic_colorb, sigma, sigma_n, r3 * 2, theta, w, h);
+	stim::cpu2image(b2.data(), "data_output/b2_tex.bmp", w, h, stim::cmBrewer);
 	b3 = gaussian_derivative_filter_odd(pic_colorb, sigma, sigma_n, r4 * 2, theta, w, h);
+	stim::cpu2image(b3.data(), "data_output/b3_tex.bmp", w, h, stim::cmBrewer);
 
-	for (unsigned i = 0; i<N; i++){
+	/*for (unsigned i = 0; i<N; i++){
 
 		mPb_theta.data()[i] = l1.data()[i] * alpha[0] +
 							  l2.data()[i] * alpha[1] +
@@ -47,9 +59,32 @@ stim::image<float> func_mPb_theta(stim::image<float> lab, float theta, unsigned 
 							  b2.data()[i] * alpha[7] +
 							  b3.data()[i] * alpha[8] ;
 	
-	}
+	}*/
+
+
+	array_multiply(l1.data(), alpha[0], N);
+	//stim::cpu2image(l1.data(), "data_output/array_add_l1.bmp", w, h, stim::cmBrewer);
+	array_multiply(l2.data(), alpha[1], N);
+	//stim::cpu2image(l2.data(), "data_output/array_add_l2.bmp", w, h, stim::cmBrewer);
+	array_multiply(l3.data(), alpha[2], N);
+	array_multiply(a1.data(), alpha[3], N);
+	array_multiply(a2.data(), alpha[4], N);
+	array_multiply(a3.data(), alpha[5], N);
+	array_multiply(b1.data(), alpha[6], N);
+	array_multiply(b2.data(), alpha[7], N);
+	array_multiply(b3.data(), alpha[8], N);
+
+	array_add(l1.data(), l2.data(), mPb_theta.data(), N);
+	//stim::cpu2image(sum, "data_output/array_add_sum.bmp", w, h, stim::cmBrewer);
+	array_add(mPb_theta.data(), l3.data(), mPb_theta.data(), N);
+	array_add(mPb_theta.data(), a1.data(), mPb_theta.data(), N);
+	array_add(mPb_theta.data(), a2.data(), mPb_theta.data(), N);
+	array_add(mPb_theta.data(), a3.data(), mPb_theta.data(), N);
+	array_add(mPb_theta.data(), b1.data(), mPb_theta.data(), N);
+	array_add(mPb_theta.data(), b2.data(), mPb_theta.data(), N);
+	array_add(mPb_theta.data(), b3.data(), mPb_theta.data(), N);
 
-	//stim::cpu2image(mPb_theta.data(), "data_output/cmap_mPb_theta0.bmp", w, h, stim::cmBrewer);
+	//stim::cpu2image(mPb_theta.data(), "data_output/mPb_theta0_1.bmp", w, h, stim::cmBrewer);
 
 
 	//getch();
diff --git a/func_mPb.cpp b/func_mPb.cpp
index 18db59b..43860fd 100644
--- a/func_mPb.cpp
+++ b/func_mPb.cpp
@@ -20,9 +20,9 @@ stim::image<float> func_mPb(stim::image<float> lab, unsigned int theta_n, unsign
 	float* ptr;
 	ptr = (float*) malloc(size * sizeof(float) * theta_n);
 
-	for (unsigned int n = 0; n < 1; n++){
+	for (unsigned int n = 0; n < theta_n; n++){
 	
-		ss << "data_output/mPb_theta"<< n << ".bmp";
+		ss << "data_output/mPb_theta"<< n << "_conv2.bmp";
 		float theta = 180 * ((float)n/theta_n); 
 
 		mPb_theta = func_mPb_theta(lab, theta, w, h);
@@ -37,7 +37,7 @@ stim::image<float> func_mPb(stim::image<float> lab, unsigned int theta_n, unsign
 		unsigned long idx = n * w * h * 1;  //index for the nth slice
 
 		std::string sss = ss.str();
-		stim::cpu2image(&ptr[idx], sss, w, h, stim::cmBrewer);
+		//stim::cpu2image(&ptr[idx], sss, w, h, stim::cmBrewer);
 	 
 
 		for(unsigned long i = 0; i < N; i++){
@@ -57,7 +57,7 @@ stim::image<float> func_mPb(stim::image<float> lab, unsigned int theta_n, unsign
 		ss.str("");
 	}              
 
-	stim::cpu2image(mPb.data(), "data_output/mPb.bmp", w, h, stim::cmBrewer);
+	//stim::cpu2image(mPb.data(), "data_output/mPb_conv2.bmp", w, h, stim::cmBrewer);
 
 	double duration2 = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
 	std::cout<<"total time:"<< duration2 <<"s"<<'\n';
diff --git a/gauss_derivative_odd.cpp b/gauss_derivative_odd.cpp
index 77389b2..cbce934 100644
--- a/gauss_derivative_odd.cpp
+++ b/gauss_derivative_odd.cpp
@@ -1,10 +1,15 @@
 #include <stim/image/image.h>
 #include <cmath>
 #include <stim/visualization/colormap.h>
-#include <iostream>
+//#include <iostream>
 
 #define PI 3.1415926
 
+void conv2(float* img, float* mask, float* cpu_copy, unsigned int w, unsigned int h, unsigned int M);
+void array_abs(float* img, unsigned int N);
+void array_multiply(float* lhs, float rhs, unsigned int N);
+
+// winsize = 2 * r, side of mask = winsize + 1
 stim::image<float> gaussian_derivative_filter_odd(stim::image<float> image, float sigma, unsigned int sigma_n, unsigned int winsize, float theta, unsigned int w, unsigned int h){
 
 	stim::image<float> mask_x(winsize+1, winsize+1), mask_y(winsize+1, winsize+1), mask_theta(winsize+1, winsize+1), derivative_x, derivative_y, derivative_theta(w, h);
@@ -38,27 +43,37 @@ stim::image<float> gaussian_derivative_filter_odd(stim::image<float> image, floa
 
 	//stim::cpu2image(mask_x.data(), "data_output/cmapgray_mask_x.bmp", winsize+1, winsize+1, stim::cmBrewer);
 	
-	//stim::cpu2image(mask_y.data(), "data_output/cmapgray_mask_y.bmp", winsize+1, winsize+1, stim::cmBrewer);
+	stim::cpu2image(image.data(), "data_output/image.bmp", w, h, stim::cmBrewer);
 
 
-	//stim::cpu2image(mask_theta.data(), "data_output/cmapgray_mask_theta.bmp", winsize+1, winsize+1, stim::cmBrewer);
+	stim::cpu2image(mask_theta.data(), "data_output/mask.bmp", winsize+1, winsize+1, stim::cmBrewer);
 
 	// 2D convolution
-	derivative_theta = image.convolve2(mask_theta);
+	//derivative_theta = image.convolve2(mask_theta);
+	//stim::cpu2image(derivative_theta.data(), "data_output/derivative_theta1.bmp", w, h, stim::cmBrewer);
+	conv2(image.data(), mask_theta.data(), derivative_theta.data(), w, h, winsize+1);
+	//stim::cpu2image(derivative_theta.data(), "data_output/derivative_theta_tex1.bmp", w, h, stim::cmBrewer);
 
-	for (unsigned k = 0; k < w * h; k++){
+	//array_abs(derivative_theta.data(), N);
+	
+	/*for (unsigned k = 0; k < w * h; k++){
 		
 		derivative_theta.data()[k] = abs(derivative_theta.data()[k]);
 
-	}
+	}*/
 
-	float max = derivative_theta.max();
+	//stim::cpu2image(derivative_theta.data(), "data_output/derivative_theta2_abs.bmp", w, h, stim::cmBrewer);
 
+	/*float max = derivative_theta.max();
+
+	array_multiply(derivative_theta.data(), 1/max, N);*/
+
+	/*(
 	for (unsigned k = 0; k < w * h; k++){
 		
 		derivative_theta.data()[k] = derivative_theta.data()[k]/max;
 
-	}
+	})*/
 
 	//float max2 = derivative_theta.max();
 
diff --git a/test_main.cpp b/test_main.cpp
index a0fa536..d40e81c 100644
--- a/test_main.cpp
+++ b/test_main.cpp
@@ -9,11 +9,14 @@ void main()
 {
 	stim::image<float> rgb,gaussgradient;				//generate an image object
 
+	//unsigned int a = 5%5;
+	//unsigned int b = 5/5;
+
 	rgb.load("101087.bmp");					//load the input image
 	unsigned int w = rgb.width();		//get the image size
 	unsigned int h = rgb.height();
 	unsigned int s = rgb.size();
-	unsigned a = sizeof(float);
+	//unsigned a = sizeof(float);
 	
 	stim::image<float> lab;						//create an image object for a single-channel (grayscale) image
 	lab = rgb.srgb2lab();						//create the single-channel image
--
libgit2 0.21.4