Commit 5de3a9c2be3a6d1931e2d2040de0245261ce4dc2
CHECKPOINT: before the swap of globj for glnetwork in the use of segmentation.
Showing
24 changed files
with
1371 additions
and
53 deletions
Show diff stats
stim/cuda/arraymath.cuh
@@ -3,6 +3,11 @@ | @@ -3,6 +3,11 @@ | ||
3 | 3 | ||
4 | #include <stim/cuda/arraymath/array_add.cuh> | 4 | #include <stim/cuda/arraymath/array_add.cuh> |
5 | #include <stim/cuda/arraymath/array_multiply.cuh> | 5 | #include <stim/cuda/arraymath/array_multiply.cuh> |
6 | +#include <stim/cuda/arraymath/array_multiply2.cuh> | ||
7 | +#include <stim/cuda/arraymath/array_divide.cuh> | ||
8 | +#include <stim/cuda/arraymath/array_cos.cuh> | ||
9 | +#include <stim/cuda/arraymath/array_sin.cuh> | ||
10 | +#include <stim/cuda/arraymath/array_atan.cuh> | ||
6 | #include <stim/cuda/arraymath/array_abs.cuh> | 11 | #include <stim/cuda/arraymath/array_abs.cuh> |
7 | #include <stim/cuda/arraymath/array_cart2polar.cuh> | 12 | #include <stim/cuda/arraymath/array_cart2polar.cuh> |
8 | 13 |
stim/cuda/arraymath/array_add.cuh
@@ -3,6 +3,7 @@ | @@ -3,6 +3,7 @@ | ||
3 | 3 | ||
4 | #include <iostream> | 4 | #include <iostream> |
5 | #include <cuda.h> | 5 | #include <cuda.h> |
6 | +//#include <cmath> | ||
6 | #include <stim/cuda/cudatools.h> | 7 | #include <stim/cuda/cudatools.h> |
7 | 8 | ||
8 | namespace stim{ | 9 | namespace stim{ |
@@ -27,7 +28,7 @@ namespace stim{ | @@ -27,7 +28,7 @@ namespace stim{ | ||
27 | int threads = stim::maxThreadsPerBlock(); | 28 | int threads = stim::maxThreadsPerBlock(); |
28 | 29 | ||
29 | //calculate the number of blocks | 30 | //calculate the number of blocks |
30 | - int blocks = N / threads + (N%threads == 0 ? 0:1); | 31 | + int blocks = N / threads + 1; |
31 | 32 | ||
32 | //call the kernel to do the multiplication | 33 | //call the kernel to do the multiplication |
33 | cuda_add <<< blocks, threads >>>(ptr1, ptr2, sum, N); | 34 | cuda_add <<< blocks, threads >>>(ptr1, ptr2, sum, N); |
1 | +#ifndef STIM_CUDA_ARRAY_ATAN_H | ||
2 | +#define STIM_CUDA_ARRAY_ATAN_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <cmath> | ||
7 | +#include <stim/cuda/cudatools.h> | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + namespace cuda{ | ||
11 | + | ||
12 | + template<typename T> | ||
13 | + __global__ void cuda_atan(T* ptr1, T* out, unsigned int N){ | ||
14 | + | ||
15 | + //calculate the 1D index for this thread | ||
16 | + int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | + | ||
18 | + if(idx < N){ | ||
19 | + out[idx] = atan(ptr1[idx]); | ||
20 | + } | ||
21 | + | ||
22 | + } | ||
23 | + | ||
24 | + template<typename T> | ||
25 | + void gpu_atan(T* ptr1, T* out, unsigned int N){ | ||
26 | + | ||
27 | + //get the maximum number of threads per block for the CUDA device | ||
28 | + int threads = stim::maxThreadsPerBlock(); | ||
29 | + | ||
30 | + //calculate the number of blocks | ||
31 | + int blocks = N / threads + 1; | ||
32 | + | ||
33 | + //call the kernel to do the multiplication | ||
34 | + cuda_atan <<< blocks, threads >>>(ptr1, out, N); | ||
35 | + | ||
36 | + } | ||
37 | + | ||
38 | + template<typename T> | ||
39 | + void cpu_atan(T* ptr1, T* cpu_out, unsigned int N){ | ||
40 | + | ||
41 | + //allocate memory on the GPU for the array | ||
42 | + T* gpu_ptr1; | ||
43 | + T* gpu_out; | ||
44 | + HANDLE_ERROR( cudaMalloc( &gpu_ptr1, N * sizeof(T) ) ); | ||
45 | + HANDLE_ERROR( cudaMalloc( &gpu_out, N * sizeof(T) ) ); | ||
46 | + | ||
47 | + //copy the array to the GPU | ||
48 | + HANDLE_ERROR( cudaMemcpy( gpu_ptr1, ptr1, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
49 | + | ||
50 | + //call the GPU version of this function | ||
51 | + gpu_atan<T>(gpu_ptr1 ,gpu_out, N); | ||
52 | + | ||
53 | + //copy the array back to the CPU | ||
54 | + HANDLE_ERROR( cudaMemcpy( cpu_out, gpu_out, N * sizeof(T), cudaMemcpyDeviceToHost) ); | ||
55 | + | ||
56 | + //free allocated memory | ||
57 | + cudaFree(gpu_ptr1); | ||
58 | + cudaFree(gpu_out); | ||
59 | + | ||
60 | + } | ||
61 | + | ||
62 | + } | ||
63 | +} | ||
64 | + | ||
65 | + | ||
66 | + | ||
67 | +#endif | ||
0 | \ No newline at end of file | 68 | \ No newline at end of file |
1 | +#ifndef STIM_CUDA_ARRAY_COS_H | ||
2 | +#define STIM_CUDA_ARRAY_COS_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <cmath> | ||
7 | +#include <stim/cuda/cudatools.h> | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + namespace cuda{ | ||
11 | + | ||
12 | + template<typename T> | ||
13 | + __global__ void cuda_cos(T* ptr1, T* out, unsigned int N){ | ||
14 | + | ||
15 | + //calculate the 1D index for this thread | ||
16 | + int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | + | ||
18 | + if(idx < N){ | ||
19 | + out[idx] = cos(ptr1[idx]); | ||
20 | + } | ||
21 | + | ||
22 | + } | ||
23 | + | ||
24 | + template<typename T> | ||
25 | + void gpu_cos(T* ptr1, T* out, unsigned int N){ | ||
26 | + | ||
27 | + //get the maximum number of threads per block for the CUDA device | ||
28 | + int threads = stim::maxThreadsPerBlock(); | ||
29 | + | ||
30 | + //calculate the number of blocks | ||
31 | + int blocks = N / threads + 1; | ||
32 | + | ||
33 | + //call the kernel to do the multiplication | ||
34 | + cuda_cos <<< blocks, threads >>>(ptr1, out, N); | ||
35 | + | ||
36 | + } | ||
37 | + | ||
38 | + template<typename T> | ||
39 | + void cpu_cos(T* ptr1, T* cpu_out, unsigned int N){ | ||
40 | + | ||
41 | + //allocate memory on the GPU for the array | ||
42 | + T* gpu_ptr1; | ||
43 | + T* gpu_out; | ||
44 | + HANDLE_ERROR( cudaMalloc( &gpu_ptr1, N * sizeof(T) ) ); | ||
45 | + HANDLE_ERROR( cudaMalloc( &gpu_out, N * sizeof(T) ) ); | ||
46 | + | ||
47 | + //copy the array to the GPU | ||
48 | + HANDLE_ERROR( cudaMemcpy( gpu_ptr1, ptr1, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
49 | + | ||
50 | + //call the GPU version of this function | ||
51 | + gpu_cos<T>(gpu_ptr1 ,gpu_out, N); | ||
52 | + | ||
53 | + //copy the array back to the CPU | ||
54 | + HANDLE_ERROR( cudaMemcpy( cpu_out, gpu_out, N * sizeof(T), cudaMemcpyDeviceToHost) ); | ||
55 | + | ||
56 | + //free allocated memory | ||
57 | + cudaFree(gpu_ptr1); | ||
58 | + cudaFree(gpu_out); | ||
59 | + | ||
60 | + } | ||
61 | + | ||
62 | + } | ||
63 | +} | ||
64 | + | ||
65 | + | ||
66 | + | ||
67 | +#endif | ||
0 | \ No newline at end of file | 68 | \ No newline at end of file |
1 | +#ifndef STIM_CUDA_ARRAY_DIVIDE_H | ||
2 | +#define STIM_CUDA_ARRAY_DIVIDE_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | + | ||
8 | +namespace stim{ | ||
9 | + namespace cuda{ | ||
10 | + | ||
11 | + template<typename T> | ||
12 | + __global__ void cuda_divide(T* ptr1, T* ptr2, T* quotient, unsigned int N){ | ||
13 | + | ||
14 | + //calculate the 1D index for this thread | ||
15 | + int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
16 | + | ||
17 | + if(idx < N){ | ||
18 | + quotient[idx] = ptr1[idx] / ptr2[idx]; | ||
19 | + } | ||
20 | + | ||
21 | + } | ||
22 | + | ||
23 | + template<typename T> | ||
24 | + void gpu_divide(T* ptr1, T* ptr2, T* quotient, unsigned int N){ | ||
25 | + | ||
26 | + //get the maximum number of threads per block for the CUDA device | ||
27 | + int threads = stim::maxThreadsPerBlock(); | ||
28 | + | ||
29 | + //calculate the number of blocks | ||
30 | + int blocks = N / threads + 1; | ||
31 | + | ||
32 | + //call the kernel to do the multiplication | ||
33 | + cuda_divide <<< blocks, threads >>>(ptr1, ptr2, quotient, N); | ||
34 | + | ||
35 | + } | ||
36 | + | ||
37 | + template<typename T> | ||
38 | + void cpu_divide(T* ptr1, T* ptr2, T* cpu_quotient, unsigned int N){ | ||
39 | + | ||
40 | + //allocate memory on the GPU for the array | ||
41 | + T* gpu_ptr1; | ||
42 | + T* gpu_ptr2; | ||
43 | + T* gpu_quotient; | ||
44 | + HANDLE_ERROR( cudaMalloc( &gpu_ptr1, N * sizeof(T) ) ); | ||
45 | + HANDLE_ERROR( cudaMalloc( &gpu_ptr2, N * sizeof(T) ) ); | ||
46 | + HANDLE_ERROR( cudaMalloc( &gpu_quotient, N * sizeof(T) ) ); | ||
47 | + | ||
48 | + //copy the array to the GPU | ||
49 | + HANDLE_ERROR( cudaMemcpy( gpu_ptr1, ptr1, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
50 | + HANDLE_ERROR( cudaMemcpy( gpu_ptr2, ptr2, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
51 | + | ||
52 | + //call the GPU version of this function | ||
53 | + gpu_divide<T>(gpu_ptr1, gpu_ptr2 ,gpu_quotient, N); | ||
54 | + | ||
55 | + //copy the array back to the CPU | ||
56 | + HANDLE_ERROR( cudaMemcpy( cpu_quotient, gpu_quotient, N * sizeof(T), cudaMemcpyDeviceToHost) ); | ||
57 | + | ||
58 | + //free allocated memory | ||
59 | + cudaFree(gpu_ptr1); | ||
60 | + cudaFree(gpu_ptr2); | ||
61 | + cudaFree(gpu_quotient); | ||
62 | + | ||
63 | + } | ||
64 | + | ||
65 | + } | ||
66 | +} | ||
67 | + | ||
68 | + | ||
69 | + | ||
70 | +#endif | ||
0 | \ No newline at end of file | 71 | \ No newline at end of file |
1 | +#ifndef STIM_CUDA_ARRAY_MULTIPLY_H | ||
2 | +#define STIM_CUDA_ARRAY_MULTIPLY_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <stim/cuda/cudatools.h> | ||
7 | + | ||
8 | +namespace stim{ | ||
9 | + namespace cuda{ | ||
10 | + | ||
11 | + template<typename T> | ||
12 | + __global__ void cuda_multiply(T* ptr1, T* ptr2, T* product, unsigned int N){ | ||
13 | + | ||
14 | + //calculate the 1D index for this thread | ||
15 | + int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
16 | + | ||
17 | + if(idx < N){ | ||
18 | + product[idx] = ptr1[idx] * ptr2[idx]; | ||
19 | + } | ||
20 | + | ||
21 | + } | ||
22 | + | ||
23 | + template<typename T> | ||
24 | + void gpu_multiply(T* ptr1, T* ptr2, T* product, unsigned int N){ | ||
25 | + | ||
26 | + //get the maximum number of threads per block for the CUDA device | ||
27 | + int threads = stim::maxThreadsPerBlock(); | ||
28 | + | ||
29 | + //calculate the number of blocks | ||
30 | + int blocks = N / threads + 1; | ||
31 | + | ||
32 | + //call the kernel to do the multiplication | ||
33 | + cuda_multiply <<< blocks, threads >>>(ptr1, ptr2, product, N); | ||
34 | + | ||
35 | + } | ||
36 | + | ||
37 | + template<typename T> | ||
38 | + void cpu_multiply(T* ptr1, T* ptr2, T* cpu_product, unsigned int N){ | ||
39 | + | ||
40 | + //allocate memory on the GPU for the array | ||
41 | + T* gpu_ptr1; | ||
42 | + T* gpu_ptr2; | ||
43 | + T* gpu_product; | ||
44 | + HANDLE_ERROR( cudaMalloc( &gpu_ptr1, N * sizeof(T) ) ); | ||
45 | + HANDLE_ERROR( cudaMalloc( &gpu_ptr2, N * sizeof(T) ) ); | ||
46 | + HANDLE_ERROR( cudaMalloc( &gpu_product, N * sizeof(T) ) ); | ||
47 | + | ||
48 | + //copy the array to the GPU | ||
49 | + HANDLE_ERROR( cudaMemcpy( gpu_ptr1, ptr1, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
50 | + HANDLE_ERROR( cudaMemcpy( gpu_ptr2, ptr2, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
51 | + | ||
52 | + //call the GPU version of this function | ||
53 | + gpu_multiply<T>(gpu_ptr1, gpu_ptr2 ,gpu_product, N); | ||
54 | + | ||
55 | + //copy the array back to the CPU | ||
56 | + HANDLE_ERROR( cudaMemcpy( cpu_product, gpu_product, N * sizeof(T), cudaMemcpyDeviceToHost) ); | ||
57 | + | ||
58 | + //free allocated memory | ||
59 | + cudaFree(gpu_ptr1); | ||
60 | + cudaFree(gpu_ptr2); | ||
61 | + cudaFree(gpu_product); | ||
62 | + | ||
63 | + } | ||
64 | + | ||
65 | + } | ||
66 | +} | ||
67 | + | ||
68 | + | ||
69 | + | ||
70 | +#endif | ||
0 | \ No newline at end of file | 71 | \ No newline at end of file |
1 | +#ifndef STIM_CUDA_ARRAY_SIN_H | ||
2 | +#define STIM_CUDA_ARRAY_SIN_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <cmath> | ||
7 | +#include <stim/cuda/cudatools.h> | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + namespace cuda{ | ||
11 | + | ||
12 | + template<typename T> | ||
13 | + __global__ void cuda_sin(T* ptr1, T* out, unsigned int N){ | ||
14 | + | ||
15 | + //calculate the 1D index for this thread | ||
16 | + int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | + | ||
18 | + if(idx < N){ | ||
19 | + out[idx] = sin(ptr1[idx]); | ||
20 | + } | ||
21 | + | ||
22 | + } | ||
23 | + | ||
24 | + template<typename T> | ||
25 | + void gpu_sin(T* ptr1, T* out, unsigned int N){ | ||
26 | + | ||
27 | + //get the maximum number of threads per block for the CUDA device | ||
28 | + int threads = stim::maxThreadsPerBlock(); | ||
29 | + | ||
30 | + //calculate the number of blocks | ||
31 | + int blocks = N / threads + 1; | ||
32 | + | ||
33 | + //call the kernel to do the multiplication | ||
34 | + cuda_sin <<< blocks, threads >>>(ptr1, out, N); | ||
35 | + | ||
36 | + } | ||
37 | + | ||
38 | + template<typename T> | ||
39 | + void cpu_sin(T* ptr1, T* cpu_out, unsigned int N){ | ||
40 | + | ||
41 | + //allocate memory on the GPU for the array | ||
42 | + T* gpu_ptr1; | ||
43 | + T* gpu_out; | ||
44 | + HANDLE_ERROR( cudaMalloc( &gpu_ptr1, N * sizeof(T) ) ); | ||
45 | + HANDLE_ERROR( cudaMalloc( &gpu_out, N * sizeof(T) ) ); | ||
46 | + | ||
47 | + //copy the array to the GPU | ||
48 | + HANDLE_ERROR( cudaMemcpy( gpu_ptr1, ptr1, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
49 | + | ||
50 | + //call the GPU version of this function | ||
51 | + gpu_sin<T>(gpu_ptr1 ,gpu_out, N); | ||
52 | + | ||
53 | + //copy the array back to the CPU | ||
54 | + HANDLE_ERROR( cudaMemcpy( cpu_out, gpu_out, N * sizeof(T), cudaMemcpyDeviceToHost) ); | ||
55 | + | ||
56 | + //free allocated memory | ||
57 | + cudaFree(gpu_ptr1); | ||
58 | + cudaFree(gpu_out); | ||
59 | + | ||
60 | + } | ||
61 | + | ||
62 | + } | ||
63 | +} | ||
64 | + | ||
65 | + | ||
66 | + | ||
67 | +#endif | ||
0 | \ No newline at end of file | 68 | \ No newline at end of file |
1 | +#include <stim/image/image.h> | ||
2 | +#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | +#include <sstream> | ||
6 | + | ||
7 | + | ||
8 | +void array_multiply(float* lhs, float rhs, unsigned int N); | ||
9 | +void array_add(float* ptr1, float* ptr2, float* sum, unsigned int N); | ||
10 | + | ||
11 | +/// This function evaluates the cPb given an multi-channel image | ||
12 | + | ||
13 | +/// @param img is the multi-channel image | ||
14 | +/// @param r is an array of radii for different scaled discs(filters) | ||
15 | +/// @param alpha is is an array of weights for different scaled discs(filters) | ||
16 | +/// @param s is the number of scales | ||
17 | + | ||
18 | +stim::image<float> cPb(stim::image<float> img, int* r, float* alpha, int s){ | ||
19 | + | ||
20 | + unsigned int w = img.width(); // get the width of picture | ||
21 | + unsigned int h = img.height(); // get the height of picture | ||
22 | + unsigned int c = img.channels(); // get the channels of picture | ||
23 | + | ||
24 | + | ||
25 | + stim::image<float> cPb(w, h, 1); // allocate space for cPb | ||
26 | + unsigned size = cPb.size(); // get the size of cPb | ||
27 | + memset ( cPb.data(), 0, size * sizeof(float)); // initialize all the pixels of cPb to 0 | ||
28 | + | ||
29 | + | ||
30 | + unsigned int N = w * h; // get the number of pixels | ||
31 | + int sigma_n = 3; // set the number of standard deviations used to define the sigma | ||
32 | + | ||
33 | + std::ostringstream ss; // (optional) set the stream to designate the test result file | ||
34 | + | ||
35 | + stim::image<float> temp; // set the temporary image to store the addtion result | ||
36 | + | ||
37 | + for (int i = 0; i < c; i++){ | ||
38 | + for (int j = 0; j < s; j++){ | ||
39 | + | ||
40 | + ss << "data_output/cPb_slice"<< i*s + j << ".bmp"; // set the name for test result file (optional) | ||
41 | + std::string sss = ss.str(); | ||
42 | + | ||
43 | + // get the gaussian gradient by convolving each image slice with the mask | ||
44 | + temp = Pb(img.channel(i), r[i*s + j], sigma_n); | ||
45 | + | ||
46 | + // output the test result of each slice (optional) | ||
47 | + //stim::cpu2image(temp.data(), sss, w, h, stim::cmBrewer); | ||
48 | + | ||
49 | + // multiply each gaussian gradient with its weight | ||
50 | + array_multiply(temp.data(), alpha[i*s + j], N); | ||
51 | + | ||
52 | + // add up all the weighted gaussian gradients | ||
53 | + array_add(cPb.data(), temp.data(), cPb.data(), N); | ||
54 | + | ||
55 | + ss.str(""); //(optional) clear the space for stream | ||
56 | + | ||
57 | + } | ||
58 | + } | ||
59 | + | ||
60 | + float max = cPb.maxv(); // get the maximum of cPb used for normalization | ||
61 | + array_multiply(cPb.data(), 1/max, N); // normalize the cPb | ||
62 | + | ||
63 | + // output the test result of cPb (optional) | ||
64 | + //stim::cpu2image(cPb.data(), "data_output/cPb_0916.bmp", w, h, stim::cmBrewer); | ||
65 | + | ||
66 | + return cPb; | ||
67 | +} |
1 | +#include <stim/image/image.h> | ||
2 | +//#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | + | ||
6 | +/// This function generates the first-order gaussian derivative filter gx gy, | ||
7 | +/// convolves the image with gx gy, | ||
8 | +/// and returns an image class which channel(0) is Ix and channel(1) is Iy | ||
9 | + | ||
10 | +/// @param img is the one-channel image | ||
11 | +/// @param r is an array of radii for different scaled discs(filters) | ||
12 | +/// @param sigma_n is the number of standard deviations used to define the sigma | ||
13 | + | ||
14 | +void conv2_sep(float* img, unsigned int x, unsigned int y, float* kernel0, unsigned int k0, float* kernel1, unsigned int k1); | ||
15 | +//void array_abs(float* img, unsigned int N); | ||
16 | + | ||
17 | +stim::image<float> Gd1(stim::image<float> image, int r, unsigned int sigma_n){ | ||
18 | + | ||
19 | + unsigned int w = image.width(); // get the width of picture | ||
20 | + unsigned int h = image.height(); // get the height of picture | ||
21 | + unsigned N = w * h; // get the number of pixels of picture | ||
22 | + int winsize = 2 * r + 1; // set the winsdow size of disc(filter) | ||
23 | + float sigma = float(r)/float(sigma_n); // calculate the sigma used in gaussian function | ||
24 | + | ||
25 | + stim::image<float> I(w, h, 1, 2); // allocate space for return image class | ||
26 | + stim::image<float> Ix(w, h); // allocate space for Ix | ||
27 | + stim::image<float> Iy(w, h); // allocate space for Iy | ||
28 | + Ix = image; // initialize Ix | ||
29 | + Iy = image; // initialize Iy | ||
30 | + | ||
31 | + float* array_x1; | ||
32 | + array_x1 = new float[winsize]; //allocate space for the 1D x-oriented gaussian derivative filter array_x1 for gx | ||
33 | + float* array_y1; | ||
34 | + array_y1 = new float[winsize]; //allocate space for the 1D y-oriented gaussian derivative filter array_y1 for gx | ||
35 | + float* array_x2; | ||
36 | + array_x2 = new float[winsize]; //allocate space for the 1D x-oriented gaussian derivative filter array_x2 for gy | ||
37 | + float* array_y2; | ||
38 | + array_y2 = new float[winsize]; //allocate space for the 1D y-oriented gaussian derivative filter array_y2 for gy | ||
39 | + | ||
40 | + | ||
41 | + for (int i = 0; i < winsize; i++){ | ||
42 | + | ||
43 | + int x = i - r; //range of x | ||
44 | + int y = i - r; //range of y | ||
45 | + | ||
46 | + // create the 1D x-oriented gaussian derivative filter array_x1 for gx | ||
47 | + array_x1[i] = (-1) * x * exp((-1)*(pow(x, 2))/(2*pow(sigma, 2))); | ||
48 | + // create the 1D y-oriented gaussian derivative filter array_y1 for gx | ||
49 | + array_y1[i] = exp((-1)*(pow(y, 2))/(2*pow(sigma, 2))); | ||
50 | + // create the 1D x-oriented gaussian derivative filter array_x2 for gy | ||
51 | + array_x2[i] = exp((-1)*(pow(x, 2))/(2*pow(sigma, 2))); | ||
52 | + // create the 1D y-oriented gaussian derivative filter array_y2 for gy | ||
53 | + array_y2[i] = (-1) * y * exp((-1)*(pow(y, 2))/(2*pow(sigma, 2))); | ||
54 | + } | ||
55 | + | ||
56 | + //stim::cpu2image(array_x1, "data_output/array_x1_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
57 | + //stim::cpu2image(array_y1, "data_output/array_y1_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
58 | + //stim::cpu2image(array_x2, "data_output/array_x2_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
59 | + //stim::cpu2image(array_y2, "data_output/array_y2_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
60 | + | ||
61 | + // get Ix by convolving the image with gx | ||
62 | + conv2_sep(Ix.data(), w, h, array_x1, winsize, array_y1, winsize); | ||
63 | + | ||
64 | + //stim::cpu2image(Ix.data(), "data_output/Ix_0915.bmp", w, h, stim::cmBrewer); | ||
65 | + // get Iy by convolving the image with gy | ||
66 | + conv2_sep(Iy.data(), w, h, array_x2, winsize, array_y2, winsize); | ||
67 | + | ||
68 | + //stim::cpu2image(Iy.data(), "data_output/Iy_0915.bmp", w, h, stim::cmBrewer); | ||
69 | + | ||
70 | + delete [] array_x1; //free the memory of array_x1 | ||
71 | + delete [] array_y1; //free the memory of array_y1 | ||
72 | + delete [] array_x2; //free the memory of array_x2 | ||
73 | + delete [] array_y2; //free the memory of array_y2 | ||
74 | + | ||
75 | + I.set_channel(0, Ix.data()); | ||
76 | + I.set_channel(1, Iy.data()); | ||
77 | + | ||
78 | + return I; | ||
79 | + | ||
80 | +} | ||
0 | \ No newline at end of file | 81 | \ No newline at end of file |
1 | +#include <stim/image/image.h> | ||
2 | +#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | + | ||
6 | +#define PI 3.1415926 | ||
7 | + | ||
8 | +void array_multiply(float* lhs, float rhs, unsigned int N); | ||
9 | +void array_add(float* ptr1, float* ptr2, float* sum, unsigned int N); | ||
10 | +void array_abs(float* img, unsigned int N); | ||
11 | + | ||
12 | +/// This function evaluates the theta-dependent odd symmetric gaussian derivative gradient of an one-channel image | ||
13 | + | ||
14 | +/// @param img is the one-channel image | ||
15 | +/// @param r is an array of radii for different scaled discs(filters) | ||
16 | +/// @param sigma_n is the number of standard deviations used to define the sigma | ||
17 | +/// @param theta is angle used for computing the gradient | ||
18 | + | ||
19 | +stim::image<float> Gd_odd(stim::image<float> image, int r, unsigned int sigma_n, float theta){ | ||
20 | + | ||
21 | + float theta_r = (theta * PI)/180; //change angle unit from degree to rad | ||
22 | + | ||
23 | + unsigned int w = image.width(); // get the width of picture | ||
24 | + unsigned int h = image.height(); // get the height of picture | ||
25 | + unsigned N = w * h; // get the number of pixels of picture | ||
26 | + int winsize = 2 * r + 1; // set the winsdow size of disc(filter) | ||
27 | + | ||
28 | + stim::image<float> I(w, h, 1, 2); // allocate space for return image of Gd1 | ||
29 | + stim::image<float> Ix(w, h); // allocate space for Ix | ||
30 | + stim::image<float> Iy(w, h); // allocate space for Iy | ||
31 | + stim::image<float> Gd_odd_theta(w, h); // allocate space for Pb | ||
32 | + | ||
33 | + I = Gd1(image, r, sigma_n); // calculate the Ix, Iy | ||
34 | + Ix = I.channel(0); | ||
35 | + Iy = I.channel(1); | ||
36 | + | ||
37 | + array_multiply(Ix.data(), cos(theta_r), N); //Ix = Ix*cos(theta_r) | ||
38 | + array_multiply(Iy.data(), sin(theta_r), N); //Iy = Iy*sin(theta_r) | ||
39 | + array_add(Ix.data(), Iy.data(), Gd_odd_theta.data(), N); //Gd_odd_theta = Ix + Iy; | ||
40 | + array_abs(Gd_odd_theta.data(), N); | ||
41 | + | ||
42 | + //stim::cpu2image(I.channel(0).data(), "data_output/Gd_odd_x_0919.bmp", w, h, stim::cmBrewer); | ||
43 | + //stim::cpu2image(I.channel(1).data(), "data_output/Gd_odd_y_0919.bmp", w, h, stim::cmBrewer); | ||
44 | + //stim::cpu2image(Gd_odd_theta.data(), "data_output/Gd_odd_theta_0919.bmp", w, h, stim::cmBrewer); | ||
45 | + | ||
46 | + return Gd_odd_theta; | ||
47 | + | ||
48 | +} |
1 | +#include <stim/image/image.h> | ||
2 | +//#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | + | ||
6 | +/// This function generates the second-order gaussian derivative filter gxx gyy, | ||
7 | +/// convolves the image with gxx gyy, | ||
8 | +/// and returns an image class which channel(0) is Ixx and channel(1) is Iyy | ||
9 | + | ||
10 | +/// @param img is the one-channel image | ||
11 | +/// @param r is an array of radii for different scaled discs(filters) | ||
12 | +/// @param sigma_n is the number of standard deviations used to define the sigma | ||
13 | + | ||
14 | +void conv2_sep(float* img, unsigned int x, unsigned int y, float* kernel0, unsigned int k0, float* kernel1, unsigned int k1); | ||
15 | +//void array_abs(float* img, unsigned int N); | ||
16 | + | ||
17 | +stim::image<float> Gd2(stim::image<float> image, int r, unsigned int sigma_n){ | ||
18 | + | ||
19 | + unsigned int w = image.width(); // get the width of picture | ||
20 | + unsigned int h = image.height(); // get the height of picture | ||
21 | + unsigned N = w * h; // get the number of pixels of picture | ||
22 | + int winsize = 2 * r + 1; // set the winsdow size of disc(filter) | ||
23 | + float sigma = float(r)/float(sigma_n); // calculate the sigma used in gaussian function | ||
24 | + | ||
25 | + stim::image<float> I(w, h, 1, 2); // allocate space for return image class | ||
26 | + stim::image<float> Ixx(w, h); // allocate space for Ixx | ||
27 | + stim::image<float> Iyy(w, h); // allocate space for Iyy | ||
28 | + Ixx = image; // initialize Ixx | ||
29 | + Iyy = image; // initialize Iyy | ||
30 | + | ||
31 | + float* array_x1; | ||
32 | + array_x1 = new float[winsize]; //allocate space for the 1D x-oriented gaussian derivative filter array_x1 for gxx | ||
33 | + float* array_y1; | ||
34 | + array_y1 = new float[winsize]; //allocate space for the 1D y-oriented gaussian derivative filter array_y1 for gxx | ||
35 | + float* array_x2; | ||
36 | + array_x2 = new float[winsize]; //allocate space for the 1D x-oriented gaussian derivative filter array_x2 for gyy | ||
37 | + float* array_y2; | ||
38 | + array_y2 = new float[winsize]; //allocate space for the 1D y-oriented gaussian derivative filter array_y2 for gyy | ||
39 | + | ||
40 | + | ||
41 | + for (int i = 0; i < winsize; i++){ | ||
42 | + | ||
43 | + int x = i - r; //range of x | ||
44 | + int y = i - r; //range of y | ||
45 | + | ||
46 | + // create the 1D x-oriented gaussian derivative filter array_x1 for gxx | ||
47 | + array_x1[i] = (-1) * (1 - pow(x, 2)) * exp((-1)*(pow(x, 2))/(2*pow(sigma, 2))); | ||
48 | + // create the 1D y-oriented gaussian derivative filter array_y1 for gxx | ||
49 | + array_y1[i] = exp((-1)*(pow(y, 2))/(2*pow(sigma, 2))); | ||
50 | + // create the 1D x-oriented gaussian derivative filter array_x2 for gyy | ||
51 | + array_x2[i] = exp((-1)*(pow(x, 2))/(2*pow(sigma, 2))); | ||
52 | + // create the 1D y-oriented gaussian derivative filter array_y2 for gyy | ||
53 | + array_y2[i] = (-1) * (1 - pow(y, 2)) * exp((-1)*(pow(y, 2))/(2*pow(sigma, 2))); | ||
54 | + } | ||
55 | + | ||
56 | + //stim::cpu2image(array_x1, "data_output/array_x1_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
57 | + //stim::cpu2image(array_y1, "data_output/array_y1_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
58 | + //stim::cpu2image(array_x2, "data_output/array_x2_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
59 | + //stim::cpu2image(array_y2, "data_output/array_y2_0915.bmp", winsize, 1, stim::cmBrewer); // (optional) show the mask result | ||
60 | + | ||
61 | + // get Ixx by convolving the image with gxx | ||
62 | + conv2_sep(Ixx.data(), w, h, array_x1, winsize, array_y1, winsize); | ||
63 | + | ||
64 | + //stim::cpu2image(Ixx.data(), "data_output/Ixx_0915.bmp", w, h, stim::cmBrewer); | ||
65 | + // get Iyy by convolving the image with gyy | ||
66 | + conv2_sep(Iyy.data(), w, h, array_x2, winsize, array_y2, winsize); | ||
67 | + | ||
68 | + //stim::cpu2image(Iyy.data(), "data_output/Iyy_0915.bmp", w, h, stim::cmBrewer); | ||
69 | + | ||
70 | + delete [] array_x1; //free the memory of array_x1 | ||
71 | + delete [] array_y1; //free the memory of array_y1 | ||
72 | + delete [] array_x2; //free the memory of array_x2 | ||
73 | + delete [] array_y2; //free the memory of array_y2 | ||
74 | + | ||
75 | + I.set_channel(0, Ixx.data()); | ||
76 | + I.set_channel(1, Iyy.data()); | ||
77 | + | ||
78 | + return I; | ||
79 | + | ||
80 | +} | ||
0 | \ No newline at end of file | 81 | \ No newline at end of file |
1 | +#include <stim/image/image.h> | ||
2 | +#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | + | ||
6 | +/// This function evaluates the theta-dependent even-symmetric gaussian derivative gradient of an one-channel image | ||
7 | + | ||
8 | +/// @param img is the one-channel image | ||
9 | +/// @param r is an array of radii for different scaled discs(filters) | ||
10 | +/// @param sigma_n is the number of standard deviations used to define the sigma | ||
11 | +/// @param theta is angle used for computing the gradient | ||
12 | + | ||
13 | +void conv2(float* img, float* mask, float* cpu_copy, unsigned int w, unsigned int h, unsigned int M); | ||
14 | +void array_abs(float* img, unsigned int N); | ||
15 | + | ||
16 | +stim::image<float> Gd_even(stim::image<float> image, int r, unsigned int sigma_n, float theta){ | ||
17 | + | ||
18 | + unsigned int w = image.width(); // get the width of picture | ||
19 | + unsigned int h = image.height(); // get the height of picture | ||
20 | + unsigned N = w * h; // get the number of pixels of picture | ||
21 | + int winsize = 2 * r + 1; // set the winsdow size of disc(filter) | ||
22 | + float sigma = float(r)/float(sigma_n); // calculate the sigma used in gaussian function | ||
23 | + | ||
24 | + stim::image<float> I(w, h, 1, 2); // allocate space for return image class | ||
25 | + stim::image<float> Gd_even_theta(w, h); // allocate space for Gd_even_theta | ||
26 | + stim::image<float> mask_x(winsize, winsize); // allocate space for x-axis-oriented filter | ||
27 | + stim::image<float> mask_r(winsize, winsize); // allocate space for theta-oriented filter | ||
28 | + | ||
29 | + for (int j = 0; j < winsize; j++){ | ||
30 | + for (int i = 0; i< winsize; i++){ | ||
31 | + | ||
32 | + int x = i - r; //range of x | ||
33 | + int y = j - r; //range of y | ||
34 | + | ||
35 | + // create the x-oriented gaussian derivative filter mask_x | ||
36 | + mask_x.data()[j*winsize + i] = (-1) * (1 - pow(x, 2)) * exp((-1)*(pow(x, 2))/(2*pow(sigma, 2))) * exp((-1)*(pow(y, 2))/(2*pow(sigma, 2))); | ||
37 | + | ||
38 | + } | ||
39 | + } | ||
40 | + | ||
41 | + mask_r = mask_x.rotate(theta, r, r); | ||
42 | + //mask_r = mask_x.rotate(45, r, r); | ||
43 | + //stim::cpu2image(mask_r.data(), "data_output/mask_r_0919.bmp", winsize, winsize, stim::cmBrewer); | ||
44 | + | ||
45 | + // do the 2D convolution with image and mask | ||
46 | + conv2(image.data(), mask_r.data(), Gd_even_theta.data(), w, h, winsize); | ||
47 | + array_abs(Gd_even_theta.data(), N); | ||
48 | + | ||
49 | + //stim::cpu2image(Gd_even_theta.data(), "data_output/Gd_even_theta_0919.bmp", w, h, stim::cmGrayscale); | ||
50 | + | ||
51 | + return Gd_even_theta; | ||
52 | +} | ||
0 | \ No newline at end of file | 53 | \ No newline at end of file |
1 | +#include <stim/image/image.h> | ||
2 | +//#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | +#include <opencv2/opencv.hpp> | ||
6 | +#include <iostream> | ||
7 | + | ||
8 | +/// This function use cvkmeans to cluster given textons | ||
9 | + | ||
10 | +/// @param testons is a multi-channel image | ||
11 | +/// @param k is the number of clusters | ||
12 | + | ||
13 | +stim::image<float> kmeans(stim::image<float> textons, unsigned int K){ | ||
14 | + | ||
15 | + unsigned int w = textons.width(); // get the width of picture | ||
16 | + unsigned int h = textons.height(); // get the height of picture | ||
17 | + unsigned int feature_n = textons.channels(); // get the spectrum of picture | ||
18 | + unsigned int N = w * h; // get the number of pixels | ||
19 | + | ||
20 | + float* sample1 = (float*) malloc(sizeof(float) * N * feature_n); //allocate the space for textons | ||
21 | + | ||
22 | + //reallocate a multi-channel texton image to a single-channel image | ||
23 | + for(unsigned int c = 0; c < feature_n; c++){ | ||
24 | + | ||
25 | + stim::image<float> temp; | ||
26 | + temp = textons.channel(c); | ||
27 | + | ||
28 | + for(unsigned int j = 0; j < N; j++){ | ||
29 | + | ||
30 | + sample1[c + j * feature_n] = temp.data()[j]; | ||
31 | + } | ||
32 | + } | ||
33 | + | ||
34 | + | ||
35 | + cv::Mat sample2(N, feature_n, CV_32F, sample1); //copy image to cv::mat | ||
36 | + | ||
37 | + //(optional) show the test result | ||
38 | + //imshow("sample2", sample2); | ||
39 | + | ||
40 | + | ||
41 | + cv::TermCriteria criteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 0.1); // set stop-criteria for kmeans iteration | ||
42 | + cv::Mat labels(N, 1, CV_8U, cvScalarAll(0)); // allocate space for kmeans output | ||
43 | + cv::Mat centers; // allocate space for kmeans output | ||
44 | + | ||
45 | + unsigned int test_times = 2; // set the number of times of trying kmeans, it will return the best result | ||
46 | + | ||
47 | + cv::kmeans(sample2, K, labels, criteria, test_times, cv::KMEANS_PP_CENTERS, centers); // kmeans clustering | ||
48 | + | ||
49 | + //(optional) show the test result | ||
50 | + //imwrite( "data_output/labels_1D.bmp", labels); | ||
51 | + | ||
52 | + stim::image<float> texture(w, h, 1, 1); // allocate space for texture | ||
53 | + | ||
54 | + for(unsigned int i = 0; i < N; i++){ // reshape the labels from iD array to image | ||
55 | + | ||
56 | + texture.data()[i] = labels.at<int>(i); | ||
57 | + | ||
58 | + } | ||
59 | + | ||
60 | + //texture.save("data_output/kmeans_test0924_2.bmp"); | ||
61 | + | ||
62 | + //(optional) show the test result | ||
63 | + //stim::cpu2image(texture.data(), "data_output/kmeans_test.bmp", w, h, stim::cmBrewer); | ||
64 | + | ||
65 | + return texture; | ||
66 | + | ||
67 | +} | ||
0 | \ No newline at end of file | 68 | \ No newline at end of file |
1 | +#include <stim/image/image.h> | ||
2 | +#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | + | ||
6 | +#define PI 3.1415926 | ||
7 | + | ||
8 | +void array_multiply(float* lhs, float rhs, unsigned int N); | ||
9 | +void array_add(float* ptr1, float* ptr2, float* sum, unsigned int N); | ||
10 | +void array_abs(float* img, unsigned int N); | ||
11 | + | ||
12 | +/// This function evaluates the center-surround(Laplacian of Gaussian) gaussian derivative gradient of an one-channel image | ||
13 | + | ||
14 | +/// @param img is the one-channel image | ||
15 | +/// @param r is an array of radii for different scaled discs(filters) | ||
16 | +/// @param sigma_n is the number of standard deviations used to define the sigma | ||
17 | + | ||
18 | +stim::image<float> Gd_center(stim::image<float> image, int r, unsigned int sigma_n){ | ||
19 | + | ||
20 | + unsigned int w = image.width(); // get the width of picture | ||
21 | + unsigned int h = image.height(); // get the height of picture | ||
22 | + unsigned N = w * h; // get the number of pixels of picture | ||
23 | + int winsize = 2 * r + 1; // set the winsdow size of disc(filter) | ||
24 | + | ||
25 | + stim::image<float> I(w, h, 1, 2); // allocate space for return image of Gd2 | ||
26 | + stim::image<float> Ixx(w, h); // allocate space for Ixx | ||
27 | + stim::image<float> Iyy(w, h); // allocate space for Iyy | ||
28 | + stim::image<float> Gd_center(w, h); // allocate space for Pb | ||
29 | + | ||
30 | + I = Gd2(image, r, sigma_n); // calculate the Ixx, Iyy | ||
31 | + Ixx = I.channel(0); | ||
32 | + Iyy = I.channel(1); | ||
33 | + | ||
34 | + array_add(Ixx.data(), Iyy.data(), Gd_center.data(), N); //Gd_center = Ixx + Iyy; | ||
35 | + array_abs(Gd_center.data(), N); | ||
36 | + | ||
37 | + //stim::cpu2image(Gd_center.data(), "data_output/Gd_center_0919.bmp", w, h, stim::cmBrewer); | ||
38 | + | ||
39 | + return Gd_center; | ||
40 | + | ||
41 | +} |
1 | +#include <stim/image/image.h> | ||
2 | +#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | +#include <sstream> | ||
6 | + | ||
7 | + | ||
8 | +void array_multiply(float* lhs, float rhs, unsigned int N); | ||
9 | +void array_add(float* ptr1, float* ptr2, float* sum, unsigned int N); | ||
10 | +void chi_grad(float* img, float* cpu_copy, unsigned int w, unsigned int h, int r, unsigned int bin_n, unsigned int bin_size, float theta); | ||
11 | + | ||
12 | +/// This function evaluates the tPb given a grayscale image | ||
13 | + | ||
14 | +/// @param img is the multi-channel image | ||
15 | +/// @param theta_n is the number of angles used for computing oriented chi-gradient | ||
16 | +/// @param r is an array of radii for different scaled discs(filters) | ||
17 | +/// @param alpha is is an array of weights for different scaled discs(filters) | ||
18 | +/// @param s is the number of scales | ||
19 | +/// @param K is the number of clusters | ||
20 | + | ||
21 | +stim::image<float> tPb(stim::image<float> img, int* r, float* alpha, unsigned int theta_n, unsigned int bin_n, int s, unsigned K){ | ||
22 | + | ||
23 | + unsigned int w = img.width(); // get the width of picture | ||
24 | + unsigned int h = img.height(); // get the height of picture | ||
25 | + unsigned int N = w * h; // get the number of pixels | ||
26 | + | ||
27 | + stim::image<float> img_textons(w, h, 1, theta_n*2+1); // allocate space for img_textons | ||
28 | + stim::image<float> img_texture(w, h, 1, 1); // allocate space for img_texture | ||
29 | + stim::image<float> tPb_theta(w, h, 1, 1); // allocate space for tPb_theta | ||
30 | + stim::image<float> tPb(w, h, 1, 1); // allocate space for tPb | ||
31 | + unsigned size = tPb_theta.size(); // get the size of tPb_theta | ||
32 | + memset (tPb.data(), 0, size * sizeof(float)); // initialize all the pixels of tPb to 0 | ||
33 | + stim::image<float> temp(w, h, 1, 1); // set the temporary image to store the addtion result | ||
34 | + | ||
35 | + std::ostringstream ss; // (optional) set the stream to designate the test result file | ||
36 | + | ||
37 | + | ||
38 | + img_textons = textons(img, theta_n); | ||
39 | + | ||
40 | + img_texture = kmeans(img_textons, K); // changing kmeans result into float type is required | ||
41 | + | ||
42 | + stim::cpu2image(img_texture.data(), "data_output/texture_0925.bmp", w, h, stim::cmBrewer); | ||
43 | + | ||
44 | + | ||
45 | + unsigned int max1 = img_texture.maxv(); // get the maximum of Pb used for normalization | ||
46 | + unsigned int bin_size = (max1 + 1)/bin_n; // (whether"+1" or not depends on kmeans result) | ||
47 | + | ||
48 | + for (int i = 0; i < theta_n; i++){ | ||
49 | + | ||
50 | + float theta = 180 * ((float)i/theta_n); // calculate the even-splited angle for each tPb_theta | ||
51 | + | ||
52 | + memset (tPb_theta.data(), 0, size * sizeof(float)); // initialize all the pixels of tPb_theta to 0 | ||
53 | + | ||
54 | + //ss << "data_output/0922tPb_theta"<< theta << ".bmp"; // set the name for test result file (optional) | ||
55 | + //std::string sss = ss.str(); | ||
56 | + | ||
57 | + for (int j = 0; j < s; j++){ | ||
58 | + | ||
59 | + // get the chi-gradient by convolving each image slice with the mask | ||
60 | + chi_grad(img_texture.data(), temp.data(), w, h, r[j], bin_n, bin_size, theta); | ||
61 | + | ||
62 | + float max2 = temp.maxv(); // get the maximum of tPb_theta used for normalization | ||
63 | + array_multiply(temp.data(), 1/max2, N); // normalize the tPb_theta | ||
64 | + | ||
65 | + //output the test result of each slice (optional) | ||
66 | + //stim::cpu2image(temp.data(), "data_output/tPb_slice0924_2.bmp", w, h, stim::cmBrewer); | ||
67 | + | ||
68 | + // multiply each chi-gradient with its weight | ||
69 | + array_multiply(temp.data(), alpha[j], N); | ||
70 | + | ||
71 | + // add up all the weighted chi-gradients | ||
72 | + array_add(tPb_theta.data(), temp.data(), tPb_theta.data(), N); | ||
73 | + | ||
74 | + | ||
75 | + } | ||
76 | + | ||
77 | + //ss.str(""); //(optional) clear the space for stream | ||
78 | + | ||
79 | + for(unsigned long ti = 0; ti < N; ti++){ | ||
80 | + | ||
81 | + if(tPb_theta.data()[ti] > tPb.data()[ti]){ //get the maximum value among all tPb_theta for ith pixel | ||
82 | + tPb.data()[ti] = tPb_theta.data()[ti]; | ||
83 | + } | ||
84 | + | ||
85 | + else{ | ||
86 | + } | ||
87 | + } | ||
88 | + } | ||
89 | + | ||
90 | + float max3 = tPb.maxv(); // get the maximum of tPb used for normalization | ||
91 | + array_multiply(tPb.data(), 1/max3, N); // normalize the tPb | ||
92 | + | ||
93 | + //output the test result of tPb (optional) | ||
94 | + //stim::cpu2image(tPb.data(), "data_output/tPb_0922.bmp", w, h, stim::cmBrewer); | ||
95 | + | ||
96 | + return tPb; | ||
97 | +} |
1 | +#include <stim/image/image.h> | ||
2 | +//#include <cmath> | ||
3 | +#include <stim/visualization/colormap.h> | ||
4 | +#include <stim/image/image_contour_detection.h> | ||
5 | +#include <sstream> | ||
6 | + | ||
7 | +/// This function convolve the grayscale image with a set of oriented Gaussian | ||
8 | +/// derivative filters, and return a texton image with (theta_n*2+1) channels | ||
9 | + | ||
10 | +/// @param image is an one-channel grayscale image | ||
11 | +/// @param theta_n is the number of angles used for computing the gradient | ||
12 | + | ||
13 | +stim::image<float> textons(stim::image<float> image, unsigned int theta_n){ | ||
14 | + | ||
15 | + unsigned int w = image.width(); // get the width of picture | ||
16 | + unsigned int h = image.height(); // get the height of picture | ||
17 | + unsigned N = w * h; // get the number of pixels of picture | ||
18 | + | ||
19 | + stim::image<float> textons(w, h, 1, theta_n*2+1); // allocate space for textons | ||
20 | + stim::image<float> temp(w, h); // allocate space for temp | ||
21 | + | ||
22 | + unsigned int r_odd = 3; // set disc radii for odd-symmetric filter | ||
23 | + unsigned int sigma_n_odd = 3; // set sigma_n for odd-symmetric filter | ||
24 | + unsigned int r_even = 3; // set disc radii for even-symmetric filter | ||
25 | + unsigned int sigma_n_even = 3; // set sigma_n for even-symmetric filter | ||
26 | + unsigned int r_center = 3; // set disc radii for center-surround filter | ||
27 | + unsigned int sigma_n_center = 3; // set sigma_n for center-surround filter | ||
28 | + | ||
29 | + //std::ostringstream ss1, ss2; // (optional) set the stream to designate the test result file | ||
30 | + | ||
31 | + for (unsigned int i = 0; i < theta_n; i++){ | ||
32 | + | ||
33 | + //ss1 << "data_output/textons_channel_"<< i << ".bmp"; // set the name for test result file (optional) | ||
34 | + //std::string sss1 = ss1.str(); | ||
35 | + //ss2 << "data_output/textons_channel_"<< i+theta_n << ".bmp"; // set the name for test result file (optional) | ||
36 | + //std::string sss2 = ss2.str(); | ||
37 | + | ||
38 | + float theta = 180 * ((float)i/theta_n); // calculate the even-splited angle for each oriented filter | ||
39 | + | ||
40 | + temp = Gd_odd(image, r_odd, sigma_n_odd, theta); // return Gd_odd to temp | ||
41 | + //stim::cpu2image(temp.data(), sss1, w, h, stim::cmBrewer); | ||
42 | + textons.set_channel(i, temp.data()); // copy temp to ith channel of textons | ||
43 | + | ||
44 | + temp = Gd_even(image, r_even, sigma_n_even, theta); // return Gd_even to temp | ||
45 | + //stim::cpu2image(temp.data(), sss2, w, h, stim::cmBrewer); | ||
46 | + textons.set_channel(i + theta_n, temp.data()); // copy temp to (i+theta_n)th channel of textons | ||
47 | + | ||
48 | + //ss1.str(""); //(optional) clear the space for stream | ||
49 | + //ss2.str(""); //(optional) clear the space for stream | ||
50 | + | ||
51 | + } | ||
52 | + | ||
53 | + temp = Gd_center(image, r_center, sigma_n_center); // return Gd_center to temp | ||
54 | + //stim::cpu2image(temp.data(), "data_output/textons_channel_16.bmp", w, h, stim::cmBrewer); | ||
55 | + textons.set_channel(theta_n*2, temp.data()); // copy temp to (theta_n*2)th channel of textons | ||
56 | + | ||
57 | + return textons; | ||
58 | + | ||
59 | +} | ||
60 | + | ||
61 | + | ||
0 | \ No newline at end of file | 62 | \ No newline at end of file |
stim/cuda/cudatools/devices.h
@@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
4 | #include <cuda.h> | 4 | #include <cuda.h> |
5 | 5 | ||
6 | namespace stim{ | 6 | namespace stim{ |
7 | - | 7 | +extern "C" |
8 | int maxThreadsPerBlock() | 8 | int maxThreadsPerBlock() |
9 | { | 9 | { |
10 | int device; | 10 | int device; |
@@ -13,6 +13,16 @@ int maxThreadsPerBlock() | @@ -13,6 +13,16 @@ int maxThreadsPerBlock() | ||
13 | cudaGetDeviceProperties(&props, device); | 13 | cudaGetDeviceProperties(&props, device); |
14 | return props.maxThreadsPerBlock; | 14 | return props.maxThreadsPerBlock; |
15 | } | 15 | } |
16 | + | ||
17 | +extern "C" | ||
18 | +int sharedMemPerBlock() | ||
19 | +{ | ||
20 | + int device; | ||
21 | + cudaGetDevice(&device); //get the id of the current device | ||
22 | + cudaDeviceProp props; //device property structure | ||
23 | + cudaGetDeviceProperties(&props, device); | ||
24 | + return props.sharedMemPerBlock; | ||
25 | +} | ||
16 | } //end namespace rts | 26 | } //end namespace rts |
17 | 27 | ||
18 | #endif | 28 | #endif |
stim/cuda/ivote/update_dir.cuh
1 | +#ifndef STIM_CUDA_CHI_GRAD_H | ||
2 | +#define STIM_CUDA_CHI_GRAD_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include <cuda_runtime.h> | ||
7 | +#include <stim/cuda/sharedmem.cuh> | ||
8 | +#include <cmath> | ||
9 | +#include <algorithm> | ||
10 | + | ||
11 | +#define PI 3.14159265358979 | ||
12 | + | ||
13 | +namespace stim{ | ||
14 | + namespace cuda{ | ||
15 | + | ||
16 | + /// template parameter @param T is the data type | ||
17 | + template<typename T> | ||
18 | + __global__ void cuda_chi_grad(T* copy, cudaTextureObject_t texObj, unsigned int w, unsigned int h, int r, unsigned int bin_n, unsigned int bin_size, float theta){ | ||
19 | + | ||
20 | + double theta_r = ((theta) * PI)/180; //change angle unit from degree to rad | ||
21 | + float sum = 0; | ||
22 | + unsigned int N = w * h; | ||
23 | + | ||
24 | + //change 1D index to 2D cordinates | ||
25 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
26 | + int yj = blockIdx.y; | ||
27 | + int idx = yj * w + xi; | ||
28 | + int shareidx = threadIdx.x; | ||
29 | + | ||
30 | + extern __shared__ unsigned short bin[]; | ||
31 | + | ||
32 | + | ||
33 | + if(xi < w && yj < h){ | ||
34 | + | ||
35 | + int gidx; | ||
36 | + int hidx; | ||
37 | + | ||
38 | + //initialize histogram bin to zeros | ||
39 | + for(int i = 0; i < bin_n; i++){ | ||
40 | + | ||
41 | + bin[shareidx * bin_n + i] = 0; | ||
42 | + __syncthreads(); | ||
43 | + | ||
44 | + } | ||
45 | + | ||
46 | + //get the histogram of the first half of disc and store in bin | ||
47 | + for (int y = yj - r; y <= yj + r; y++){ | ||
48 | + for (int x = xi - r; x <= xi + r; x++){ | ||
49 | + | ||
50 | + if ((y - yj)*cos(theta_r) + (x - xi)*sin(theta_r) > 0){ | ||
51 | + | ||
52 | + gidx = (int) tex2D<T>(texObj, (float)x/w, (float)y/h)/bin_size; | ||
53 | + __syncthreads(); | ||
54 | + | ||
55 | + bin[shareidx * bin_n + gidx]++; | ||
56 | + __syncthreads(); | ||
57 | + | ||
58 | + } | ||
59 | + | ||
60 | + else{} | ||
61 | + } | ||
62 | + } | ||
63 | + | ||
64 | + //initiallize the gbin | ||
65 | + unsigned short* gbin = (unsigned short*) malloc(bin_n*sizeof(unsigned short)); | ||
66 | + memset (gbin, 0, bin_n*sizeof(unsigned short)); | ||
67 | + | ||
68 | + //copy the histogram to gbin | ||
69 | + for (unsigned int gi = 0; gi < bin_n; gi++){ | ||
70 | + | ||
71 | + gbin[gi] = bin[shareidx * bin_n + gi]; | ||
72 | + | ||
73 | + } | ||
74 | + | ||
75 | + //initialize histogram bin to zeros | ||
76 | + for(int j = 0; j < bin_n; j++){ //initialize histogram bin to zeros | ||
77 | + | ||
78 | + bin[shareidx * bin_n + j] = 0; | ||
79 | + __syncthreads(); | ||
80 | + } | ||
81 | + | ||
82 | + //get the histogram of the second half of disc and store in bin | ||
83 | + for (int y = yj - r; y <= yj + r; y++){ | ||
84 | + for (int x = xi - r; x <= xi + r; x++){ | ||
85 | + | ||
86 | + if ((y - yj)*cos(theta_r) + (x - xi)*sin(theta_r) < 0){ | ||
87 | + | ||
88 | + hidx = (int) tex2D<T>(texObj, (float)x/w, (float)y/h)/bin_size; | ||
89 | + __syncthreads(); | ||
90 | + | ||
91 | + bin[shareidx * bin_n + hidx]++; | ||
92 | + __syncthreads(); | ||
93 | + | ||
94 | + } | ||
95 | + else{} | ||
96 | + } | ||
97 | + } | ||
98 | + | ||
99 | + //initiallize the gbin | ||
100 | + unsigned short* hbin = (unsigned short*) malloc(bin_n*sizeof(unsigned short)); | ||
101 | + memset (hbin, 0, bin_n*sizeof(unsigned short)); | ||
102 | + | ||
103 | + //copy the histogram to hbin | ||
104 | + for (unsigned int hi = 0; hi < bin_n; hi++){ | ||
105 | + | ||
106 | + hbin[hi] = bin[shareidx * bin_n + hi]; | ||
107 | + | ||
108 | + } | ||
109 | + | ||
110 | + //compare gbin, hbin and calculate the chi distance | ||
111 | + for (int k = 0; k < bin_n; k++){ | ||
112 | + | ||
113 | + float flag; // set flag to avoid zero denominator | ||
114 | + | ||
115 | + if ((gbin[k] + hbin[k]) == 0){ | ||
116 | + flag = 1; | ||
117 | + } | ||
118 | + else { | ||
119 | + flag = (gbin[k] + hbin[k]); | ||
120 | + __syncthreads(); | ||
121 | + } | ||
122 | + | ||
123 | + sum += (gbin[k] - hbin[k])*(gbin[k] - hbin[k])/flag; | ||
124 | + __syncthreads(); | ||
125 | + | ||
126 | + } | ||
127 | + | ||
128 | + // return chi-distance for each pixel | ||
129 | + copy[idx] = sum; | ||
130 | + | ||
131 | + free(gbin); | ||
132 | + free(hbin); | ||
133 | + } | ||
134 | + } | ||
135 | + | ||
136 | + | ||
137 | + template<typename T> | ||
138 | + void gpu_chi_grad(T* img, T* copy, unsigned int w, unsigned int h, int r, unsigned int bin_n, unsigned int bin_size, float theta){ | ||
139 | + | ||
140 | + unsigned long N = w * h; | ||
141 | + | ||
142 | + // Allocate CUDA array in device memory | ||
143 | + | ||
144 | + //define a channel descriptor for a single 32-bit channel | ||
145 | + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); | ||
146 | + cudaArray* cuArray; //declare the cuda array | ||
147 | + cudaMallocArray(&cuArray, &channelDesc, w, h); //allocate the cuda array | ||
148 | + | ||
149 | + // Copy the image data from global memory to the array | ||
150 | + cudaMemcpyToArray(cuArray, 0, 0, img, N * sizeof(T), cudaMemcpyDeviceToDevice); | ||
151 | + | ||
152 | + // Specify texture | ||
153 | + struct cudaResourceDesc resDesc; //create a resource descriptor | ||
154 | + memset(&resDesc, 0, sizeof(resDesc)); //set all values to zero | ||
155 | + resDesc.resType = cudaResourceTypeArray; //specify the resource descriptor type | ||
156 | + resDesc.res.array.array = cuArray; //add a pointer to the cuda array | ||
157 | + | ||
158 | + // Specify texture object parameters | ||
159 | + struct cudaTextureDesc texDesc; //create a texture descriptor | ||
160 | + memset(&texDesc, 0, sizeof(texDesc)); //set all values in the texture descriptor to zero | ||
161 | + texDesc.addressMode[0] = cudaAddressModeMirror; //use wrapping (around the edges) | ||
162 | + texDesc.addressMode[1] = cudaAddressModeMirror; | ||
163 | + texDesc.filterMode = cudaFilterModePoint; //use linear filtering | ||
164 | + texDesc.readMode = cudaReadModeElementType; //reads data based on the element type (32-bit floats) | ||
165 | + texDesc.normalizedCoords = 1; //using normalized coordinates | ||
166 | + | ||
167 | + // Create texture object | ||
168 | + cudaTextureObject_t texObj = 0; | ||
169 | + cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL); | ||
170 | + | ||
171 | + //get the maximum number of threads per block for the CUDA device | ||
172 | + int threads = stim::maxThreadsPerBlock(); | ||
173 | + int sharemax = stim::sharedMemPerBlock(); //get the size of Shared memory available per block in bytes | ||
174 | + unsigned int shared_bytes = threads * bin_n * sizeof(unsigned short); | ||
175 | + | ||
176 | + if(threads * bin_n > sharemax){ | ||
177 | + | ||
178 | + cout <<"Error: shared_bytes exceeds the max value."<<'\n'; | ||
179 | + exit(1); | ||
180 | + | ||
181 | + } | ||
182 | + | ||
183 | + | ||
184 | + //calculate the number of blocks | ||
185 | + dim3 blocks(w / threads + 1, h); | ||
186 | + | ||
187 | + //call the kernel to do the multiplication | ||
188 | + cuda_chi_grad <<< blocks, threads, shared_bytes >>>(copy, texObj, w, h, r, bin_n, bin_size, theta); | ||
189 | + | ||
190 | + } | ||
191 | + | ||
192 | + template<typename T> | ||
193 | + void cpu_chi_grad(T* img, T* cpu_copy, unsigned int w, unsigned int h, int r, unsigned int bin_n, unsigned int bin_size, float theta){ | ||
194 | + | ||
195 | + unsigned long N = w * h; | ||
196 | + //allocate memory on the GPU for the array | ||
197 | + T* gpu_img; | ||
198 | + T* gpu_copy; | ||
199 | + HANDLE_ERROR( cudaMalloc( &gpu_img, N * sizeof(T) ) ); | ||
200 | + HANDLE_ERROR( cudaMalloc( &gpu_copy, N * sizeof(T) ) ); | ||
201 | + | ||
202 | + //copy the array to the GPU | ||
203 | + HANDLE_ERROR( cudaMemcpy( gpu_img, img, N * sizeof(T), cudaMemcpyHostToDevice) ); | ||
204 | + | ||
205 | + //call the GPU version of this function | ||
206 | + gpu_chi_grad<T>(gpu_img, gpu_copy, w, h, r, bin_n, bin_size, theta); | ||
207 | + | ||
208 | + //copy the array back to the CPU | ||
209 | + HANDLE_ERROR( cudaMemcpy( cpu_copy, gpu_copy, N * sizeof(T), cudaMemcpyDeviceToHost) ); | ||
210 | + | ||
211 | + //free allocated memory | ||
212 | + cudaFree(gpu_img); | ||
213 | + cudaFree(gpu_copy); | ||
214 | + | ||
215 | + } | ||
216 | + | ||
217 | + } | ||
218 | +} | ||
219 | + | ||
220 | + | ||
221 | +#endif | ||
0 | \ No newline at end of file | 222 | \ No newline at end of file |
stim/cuda/templates/conv2.cuh
@@ -11,8 +11,7 @@ namespace stim{ | @@ -11,8 +11,7 @@ namespace stim{ | ||
11 | namespace cuda{ | 11 | namespace cuda{ |
12 | 12 | ||
13 | template<typename T> | 13 | template<typename T> |
14 | - //__global__ void cuda_conv2(T* img, T* mask, T* copy, cudaTextureObject_t texObj, unsigned int w, unsigned int h, unsigned M){ | ||
15 | - __global__ void cuda_conv2(T* img, T* mask, T* copy, cudaTextureObject_t texObj, unsigned int w, unsigned int h, unsigned M){ | 14 | + __global__ void cuda_conv2(T* mask, T* copy, cudaTextureObject_t texObj, unsigned int w, unsigned int h, unsigned int M){ |
16 | 15 | ||
17 | 16 | ||
18 | //the radius of mask | 17 | //the radius of mask |
@@ -34,7 +33,7 @@ namespace stim{ | @@ -34,7 +33,7 @@ namespace stim{ | ||
34 | //copy[idx] = tex2D<float>(texObj, i+100, j+100); | 33 | //copy[idx] = tex2D<float>(texObj, i+100, j+100); |
35 | //return; | 34 | //return; |
36 | 35 | ||
37 | - //tex2D<float>(texObj, i, j); | 36 | + tex2D<float>(texObj, (float)i/w, (float)j/h); |
38 | 37 | ||
39 | //allocate memory for result | 38 | //allocate memory for result |
40 | T sum = 0; | 39 | T sum = 0; |
@@ -51,9 +50,7 @@ namespace stim{ | @@ -51,9 +50,7 @@ namespace stim{ | ||
51 | int xx = x - (i - r); | 50 | int xx = x - (i - r); |
52 | int yy = y - (j - r); | 51 | int yy = y - (j - r); |
53 | 52 | ||
54 | - //T temp = img[y * w + x] * mask[yy * M + xx]; | ||
55 | - //sum += img[y * w + x] * mask[yy * M + xx]; | ||
56 | - sum += tex2D<T>(texObj, x, y) * 1.0;//mask[yy * M + xx]; | 53 | + sum += tex2D<T>(texObj, (float)x/w, (float)y/h) * mask[yy * M + xx]; |
57 | } | 54 | } |
58 | } | 55 | } |
59 | copy[idx] = sum; | 56 | copy[idx] = sum; |
@@ -88,11 +85,11 @@ namespace stim{ | @@ -88,11 +85,11 @@ namespace stim{ | ||
88 | // Specify texture object parameters | 85 | // Specify texture object parameters |
89 | struct cudaTextureDesc texDesc; //create a texture descriptor | 86 | struct cudaTextureDesc texDesc; //create a texture descriptor |
90 | memset(&texDesc, 0, sizeof(texDesc)); //set all values in the texture descriptor to zero | 87 | memset(&texDesc, 0, sizeof(texDesc)); //set all values in the texture descriptor to zero |
91 | - texDesc.addressMode[0] = cudaAddressModeMirror; //use wrapping (around the edges) | ||
92 | - texDesc.addressMode[1] = cudaAddressModeMirror; | 88 | + texDesc.addressMode[0] = cudaAddressModeClamp; //use wrapping (around the edges) |
89 | + texDesc.addressMode[1] = cudaAddressModeClamp; | ||
93 | texDesc.filterMode = cudaFilterModePoint; //use linear filtering | 90 | texDesc.filterMode = cudaFilterModePoint; //use linear filtering |
94 |